├── README.md ├── world.py ├── net.py ├── sensor.py ├── simulator.py └── moving_object.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep Reinforcement Learning for Fixed-Wing Flight Control 2 | 3 | This is a Deep Q-Network (DQN) reinforcement learning agent which navigates a 4 | fixed wing aircraft in a simulator to a target waypoint while avoiding 5 | stationary and moving obstacles. 6 | 7 | 8 | screen shot 2016-11-30 at 02 24 59 9 | 10 | # Setup 11 | 12 | To run this, one needs to set up MIT's director visualization tool as it will 13 | be used to display the simulator. Instructions on how to build this can be 14 | found [here](https://github.com/RobotLocomotion/director). 15 | 16 | Following that, you must set up an alias for the `directorPython` executable 17 | that was built. This can be simply done by running: 18 | 19 | ```bash 20 | alias director=/path/to/director/build/install/bin/directorPython 21 | ``` 22 | 23 | You can add this to your shell profile to avoid running this every time. 24 | 25 | Finally, you need to install Tensorflow for Python 2.7. This can be done by 26 | following the steps 27 | [here](https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html). 28 | 29 | # Running 30 | 31 | Once everything is setup, you can simply run: 32 | 33 | ```bash 34 | director simulator.py 35 | ``` 36 | 37 | or 38 | 39 | ```bash 40 | director simulator.py --help 41 | ``` 42 | 43 | for advanced options. 44 | 45 | If everything works out, a window should appear with a plane model that flies 46 | around slowly learning how to reach the green circle and avoiding the white 47 | circle as in the screenshot above. The white circles denote obstacles, whereas 48 | the green circle denotes a target waypoint. The rays protruding from the plane 49 | represent the distances measured by the plane's sensor. 50 | 51 | The learning process will take several hundreds of episodes, but rerunning the 52 | simulation will proceed from where it left off by reusing the `model.ckpt` 53 | file. 54 | 55 | # Acknowledgements 56 | 57 | This project was inspired by McGill Robotics Obstacle Avoidance at AUVSI SUAS. 58 | -------------------------------------------------------------------------------- /world.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*= 2 | 3 | import numpy as np 4 | from moving_object import Obstacle 5 | from director.debugVis import DebugData 6 | 7 | 8 | class World(object): 9 | 10 | """Base world.""" 11 | 12 | def __init__(self, width, height): 13 | """Construct an empty world. 14 | 15 | Args: 16 | width: Width of the field. 17 | height: Height of the field. 18 | """ 19 | self._data = DebugData() 20 | 21 | self._width = width 22 | self._height = height 23 | self._add_boundaries() 24 | 25 | def _add_boundaries(self): 26 | """Adds boundaries to the world.""" 27 | self._x_max, self._x_min = self._width / 2, -self._width / 2 28 | self._y_max, self._y_min = self._height / 2, -self._height / 2 29 | 30 | corners = [ 31 | (self._x_max, self._y_max, 0), # Top-right corner. 32 | (self._x_max, self._y_min, 0), # Bottom-right corner. 33 | (self._x_min, self._y_min, 0), # Bottom-left corner. 34 | (self._x_min, self._y_max, 0) # Top-left corner. 35 | ] 36 | 37 | # Loopback to begining. 38 | corners.append(corners[0]) 39 | 40 | for start, end in zip(corners, corners[1:]): 41 | self._data.addLine(start, end, radius=0.2) 42 | 43 | def generate_obstacles(self, density=0.05, moving_obstacle_ratio=0.20, 44 | seed=None): 45 | """Generates randomly scattered obstacles to the world. 46 | 47 | Args: 48 | density: Obstacle to world area ratio, default: 0.1. 49 | moving_obstacle_ratio: Ratio of moving to stationary obstacles, 50 | default: 0.2. 51 | seed: Random seed, default: None. 52 | 53 | Yields: 54 | Obstacle. 55 | """ 56 | if seed is not None: 57 | np.random.seed(seed) 58 | 59 | field_area = self._width * self._height 60 | obstacle_area = int(field_area * density) 61 | 62 | bounds = self._x_min, self._x_max, self._y_min, self._y_max 63 | while obstacle_area > 0: 64 | radius = np.random.uniform(1.0, 3.0) 65 | center_x_range = (self._x_min + radius, self._x_max - radius) 66 | center_y_range = (self._y_min + radius, self._y_max - radius) 67 | center_x = np.random.uniform(*center_x_range) 68 | center_y = np.random.uniform(*center_y_range) 69 | theta = np.random.uniform(0., 360.) 70 | obstacle_area -= np.pi * radius ** 2 71 | 72 | # Only some obstacles should be moving. 73 | if np.random.random_sample() >= moving_obstacle_ratio: 74 | velocity = 0.0 75 | else: 76 | velocity = np.random.uniform(-30.0, 30.0) 77 | 78 | obstacle = Obstacle(velocity, radius, bounds) 79 | obstacle.x = center_x 80 | obstacle.y = center_y 81 | obstacle.theta = np.radians(theta) 82 | yield obstacle 83 | 84 | def to_polydata(self): 85 | """Converts world to visualizable poly data.""" 86 | return self._data.getPolyData() 87 | -------------------------------------------------------------------------------- /net.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import tensorflow as tf 5 | 6 | 7 | class NeuralNetwork(object): 8 | 9 | def __init__(self, input_size, output_size, hidden_layer_sizes, 10 | learning_rate, dtype=tf.float32): 11 | self._x = tf.placeholder(dtype, [None, input_size]) 12 | self._weights = [] 13 | self._biases = [] 14 | self._layers = [] 15 | 16 | prev_layer = self._x 17 | for layer_size in hidden_layer_sizes: 18 | # Add hidden layer with RELU activation. 19 | prev_layer = self._add_layer(prev_layer, input_size, layer_size, 20 | tf.nn.relu) 21 | input_size = layer_size 22 | 23 | # Add output layer with linear activation. 24 | self._y = self._add_layer(prev_layer, input_size, output_size, 25 | lambda x: x) 26 | 27 | # Set up trainer. 28 | self._y_truth = tf.placeholder(dtype, [None, output_size]) 29 | loss = tf.reduce_mean(tf.square(self._y_truth - self._y)) / 2 30 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 31 | self._trainer = optimizer.minimize(loss) 32 | 33 | # Set up session. 34 | self._session = tf.Session() 35 | self._session.run(tf.initialize_all_variables()) 36 | 37 | self._saver = tf.train.Saver() 38 | 39 | def _add_layer(self, prev_layer, input_size, output_size, activation): 40 | # Build layer. 41 | W = tf.Variable(tf.truncated_normal([input_size, output_size])) 42 | b = tf.Variable(tf.zeros([output_size])) 43 | out = activation(tf.matmul(prev_layer, W) + b) 44 | 45 | # Maintain references. 46 | self._weights.append(W) 47 | self._biases.append(b) 48 | self._layers.append(out) 49 | 50 | return out 51 | 52 | @property 53 | def weights(self): 54 | return self._session.run(self._weights) 55 | 56 | @property 57 | def biases(self): 58 | return self._session.run(self._biases) 59 | 60 | def evaluate(self, x): 61 | return self.evaluate_many([x])[0] 62 | 63 | def evaluate_many(self, xs): 64 | return self._session.run(self._y, feed_dict={self._x: xs}) 65 | 66 | def train(self, x, y): 67 | self.train_many([x], [y]) 68 | 69 | def train_many(self, xs, ys): 70 | self._session.run(self._trainer, 71 | feed_dict={self._x: xs, self._y_truth: ys}) 72 | 73 | def save(self, save_path="model.ckpt"): 74 | save_path = self._saver.save(self._session, save_path) 75 | print("model saved to file: {}".format(save_path)) 76 | 77 | def load(self, load_path="model.ckpt"): 78 | if os.path.exists(load_path): 79 | self._session = tf.Session() 80 | self._saver.restore(self._session, load_path) 81 | print("model restored from: {}".format(load_path)) 82 | 83 | 84 | class Controller(object): 85 | 86 | def __init__(self, learning_rate=0.01): 87 | self._nn = NeuralNetwork(19, 3, [11, 7], learning_rate) 88 | 89 | def evaluate(self, x): 90 | return self._nn.evaluate(x) 91 | 92 | def train(self, x, actual): 93 | self._nn.train(x, actual) 94 | 95 | def load(self): 96 | self._nn.load() 97 | 98 | def save(self): 99 | self._nn.save() 100 | -------------------------------------------------------------------------------- /sensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*= 2 | 3 | import math 4 | import numpy as np 5 | import director.vtkAll as vtk 6 | from director.debugVis import DebugData 7 | 8 | 9 | class RaySensor(object): 10 | 11 | """Ray sensor.""" 12 | 13 | def __init__(self, num_rays=16, radius=40, min_angle=-45, max_angle=45): 14 | """Constructs a RaySensor. 15 | 16 | Args: 17 | num_rays: Number of rays. 18 | radius: Max distance of the rays. 19 | min_angle: Minimum angle of the rays in degrees. 20 | max_angle: Maximum angle of the rays in degrees. 21 | """ 22 | self._num_rays = num_rays 23 | self._radius = radius 24 | self._min_angle = math.radians(min_angle) 25 | self._max_angle = math.radians(max_angle) 26 | 27 | self._locator = None 28 | self._state = [0., 0., 0.] # x, y, theta 29 | 30 | self._hit = np.zeros(num_rays) 31 | self._distances = np.zeros(num_rays) 32 | self._intersections = [[0, 0, 0] for i in range(num_rays)] 33 | 34 | self._update_rays(self._state[2]) 35 | 36 | @property 37 | def distances(self): 38 | """Array of distances measured by each ray.""" 39 | normalized_distances = [ 40 | self._distances[i] / self._radius if self._hit[i] else 1.0 41 | for i in range(self._num_rays) 42 | ] 43 | return normalized_distances 44 | 45 | def has_collided(self, max_distance=0.05): 46 | """Returns whether a collision has occured or not. 47 | 48 | Args: 49 | max_distance: Threshold for collision distance. 50 | """ 51 | for hit, distance in zip(self._hit, self._distances): 52 | if hit and distance <= max_distance: 53 | return True 54 | 55 | return False 56 | 57 | def set_locator(self, locator): 58 | """Sets the vtk cell locator. 59 | 60 | Args: 61 | locator: Cell locator. 62 | """ 63 | self._locator = locator 64 | 65 | def update(self, x, y, theta): 66 | """Updates the sensor's readings. 67 | 68 | Args: 69 | x: X coordinate. 70 | y: Y coordinate. 71 | theta: Yaw. 72 | """ 73 | self._update_rays(theta) 74 | origin = np.array([x, y, 0]) 75 | self._state = [x, y, theta] 76 | 77 | if self._locator is None: 78 | return 79 | 80 | for i in range(self._num_rays): 81 | hit, dist, inter = self._cast_ray(origin, origin + self._rays[i]) 82 | self._hit[i] = hit 83 | self._distances[i] = dist 84 | self._intersections[i] = inter 85 | 86 | def _update_rays(self, theta): 87 | """Updates the rays' readings. 88 | 89 | Args: 90 | theta: Yaw. 91 | """ 92 | r = self._radius 93 | angle_step = (self._max_angle - self._min_angle) / (self._num_rays - 1) 94 | self._rays = [ 95 | np.array([ 96 | r * math.cos(theta + self._min_angle + i * angle_step), 97 | r * math.sin(theta + self._min_angle + i * angle_step), 98 | 0 99 | ]) 100 | for i in range(self._num_rays) 101 | ] 102 | 103 | def _cast_ray(self, start, end): 104 | """Casts a ray and determines intersections and distances. 105 | 106 | Args: 107 | start: Origin of the ray. 108 | end: End point of the ray. 109 | 110 | Returns: 111 | Tuple of (whether it intersected, distance, intersection). 112 | """ 113 | tolerance = 0.0 # intersection tolerance 114 | pt = [0.0, 0.0, 0.0] # coordinate of intersection 115 | distance = vtk.mutable(0.0) # distance of intersection 116 | pcoords = [0.0, 0.0, 0.0] # location within intersected cell 117 | subID = vtk.mutable(0) # subID of intersected cell 118 | 119 | hit = self._locator.IntersectWithLine(start, end, tolerance, 120 | distance, pt, pcoords, subID) 121 | 122 | return hit, distance, pt 123 | 124 | def to_polydata(self): 125 | """Converts the sensor to polydata.""" 126 | d = DebugData() 127 | origin = np.array([self._state[0], self._state[1], 0]) 128 | for hit, intersection, ray in zip(self._hit, 129 | self._intersections, 130 | self._rays): 131 | if hit: 132 | color = [1., 0.45882353, 0.51372549] 133 | endpoint = intersection 134 | else: 135 | color = [0., 0.6, 0.58823529] 136 | endpoint = origin + ray 137 | 138 | d.addLine(origin, endpoint, color=color, radius=0.05) 139 | 140 | return d.getPolyData() 141 | -------------------------------------------------------------------------------- /simulator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import argparse 4 | import numpy as np 5 | from world import World 6 | from PythonQt import QtGui 7 | from net import Controller 8 | from sensor import RaySensor 9 | from director import applogic 10 | from moving_object import Robot 11 | from director import vtkAll as vtk 12 | from director import objectmodel as om 13 | from director.debugVis import DebugData 14 | from director import visualization as vis 15 | from director.consoleapp import ConsoleApp 16 | from director.timercallback import TimerCallback 17 | 18 | 19 | class Simulator(object): 20 | 21 | """Simulator.""" 22 | 23 | def __init__(self, world): 24 | """Constructs the simulator. 25 | 26 | Args: 27 | world: World. 28 | """ 29 | self._robots = [] 30 | self._obstacles = [] 31 | self._world = world 32 | self._app = ConsoleApp() 33 | self._view = self._app.createView(useGrid=False) 34 | 35 | # performance tracker 36 | self._num_targets = 0 37 | self._num_crashes = 0 38 | self._run_ticks = 0 39 | 40 | self._initialize() 41 | 42 | def _initialize(self): 43 | """Initializes the world.""" 44 | # Add world to view. 45 | om.removeFromObjectModel(om.findObjectByName("world")) 46 | vis.showPolyData(self._world.to_polydata(), "world") 47 | 48 | def _add_polydata(self, polydata, frame_name, color): 49 | """Adds polydata to the simulation. 50 | 51 | Args: 52 | polydata: Polydata. 53 | frame_name: Frame name. 54 | color: Color of object. 55 | 56 | Returns: 57 | Frame. 58 | """ 59 | om.removeFromObjectModel(om.findObjectByName(frame_name)) 60 | frame = vis.showPolyData(polydata, frame_name, color=color) 61 | 62 | vis.addChildFrame(frame) 63 | return frame 64 | 65 | def add_target(self, target): 66 | data = DebugData() 67 | center = [target[0], target[1], 1] 68 | axis = [0, 0, 1] # Upright cylinder. 69 | data.addCylinder(center, axis, 2, 3) 70 | om.removeFromObjectModel(om.findObjectByName("target")) 71 | self._add_polydata(data.getPolyData(), "target", [0, 0.8, 0]) 72 | 73 | def add_robot(self, robot): 74 | """Adds a robot to the simulation. 75 | 76 | Args: 77 | robot: Robot. 78 | """ 79 | color = [0.4, 0.85098039, 0.9372549] 80 | frame_name = "robot{}".format(len(self._robots)) 81 | frame = self._add_polydata(robot.to_polydata(), frame_name, color) 82 | self._robots.append((robot, frame)) 83 | self._update_moving_object(robot, frame) 84 | 85 | def add_obstacle(self, obstacle): 86 | """Adds an obstacle to the simulation. 87 | 88 | Args: 89 | obstacle: Obstacle. 90 | """ 91 | color = [1.0, 1.0, 1.0] 92 | frame_name = "obstacle{}".format(len(self._obstacles)) 93 | frame = self._add_polydata(obstacle.to_polydata(), frame_name, color) 94 | self._obstacles.append((obstacle, frame)) 95 | self._update_moving_object(obstacle, frame) 96 | 97 | def _update_moving_object(self, moving_object, frame): 98 | """Updates moving object's state. 99 | 100 | Args: 101 | moving_object: Moving object. 102 | frame: Corresponding frame. 103 | """ 104 | t = vtk.vtkTransform() 105 | t.Translate(moving_object.x, moving_object.y, 0.0) 106 | t.RotateZ(np.degrees(moving_object.theta)) 107 | frame.getChildFrame().copyFrame(t) 108 | 109 | def _update_sensor(self, sensor, frame_name): 110 | """Updates sensor's rays. 111 | 112 | Args: 113 | sensor: Sensor. 114 | frame_name: Frame name. 115 | """ 116 | vis.updatePolyData(sensor.to_polydata(), frame_name, 117 | colorByName="RGB255") 118 | 119 | def update_locator(self): 120 | """Updates cell locator.""" 121 | d = DebugData() 122 | 123 | d.addPolyData(self._world.to_polydata()) 124 | for obstacle, frame in self._obstacles: 125 | d.addPolyData(obstacle.to_positioned_polydata()) 126 | 127 | self.locator = vtk.vtkCellLocator() 128 | self.locator.SetDataSet(d.getPolyData()) 129 | self.locator.BuildLocator() 130 | 131 | def run(self, display): 132 | """Launches and displays the simulator. 133 | 134 | Args: 135 | display: Displays the simulator or not. 136 | """ 137 | if display: 138 | widget = QtGui.QWidget() 139 | layout = QtGui.QVBoxLayout(widget) 140 | layout.addWidget(self._view) 141 | widget.showMaximized() 142 | 143 | # Set camera. 144 | applogic.resetCamera(viewDirection=[0.2, 0, -1]) 145 | 146 | # Set timer. 147 | self._tick_count = 0 148 | self._timer = TimerCallback(targetFps=120) 149 | self._timer.callback = self.tick 150 | self._timer.start() 151 | 152 | self._app.start() 153 | 154 | def tick(self): 155 | """Update simulation clock.""" 156 | self._tick_count += 1 157 | self._run_ticks += 1 158 | if self._tick_count >= 500: 159 | print("timeout") 160 | for robot, frame in self._robots: 161 | self.reset(robot, frame) 162 | 163 | need_update = False 164 | for obstacle, frame in self._obstacles: 165 | if obstacle.velocity != 0.: 166 | obstacle.move() 167 | self._update_moving_object(obstacle, frame) 168 | need_update = True 169 | 170 | if need_update: 171 | self.update_locator() 172 | 173 | for i, (robot, frame) in enumerate(self._robots): 174 | self._update_moving_object(robot, frame) 175 | for sensor in robot.sensors: 176 | sensor.set_locator(self.locator) 177 | robot.move() 178 | for sensor in robot.sensors: 179 | frame_name = "rays{}".format(i) 180 | self._update_sensor(sensor, frame_name) 181 | if sensor.has_collided(): 182 | self._num_crashes += 1 183 | print("collided", min(d for d in sensor._distances if d > 0)) 184 | print("targets hit", self._num_targets) 185 | print("ticks lived", self._run_ticks) 186 | print("deaths", self._num_crashes) 187 | self._run_ticks = 0 188 | self._num_targets = 0 189 | new_target = self.generate_position() 190 | for robot, frame in self._robots: 191 | robot.set_target(new_target) 192 | self.add_target(new_target) 193 | self.reset(robot, frame) 194 | 195 | if robot.at_target(): 196 | self._num_targets += 1 197 | self._tick_count = 0 198 | new_target = self.generate_position() 199 | for robot, frame in self._robots: 200 | robot.set_target(new_target) 201 | self.add_target(new_target) 202 | 203 | def generate_position(self): 204 | return tuple(np.random.uniform(-75, 75, 2)) 205 | 206 | def set_safe_position(self, robot): 207 | while True: 208 | robot.x, robot.y = self.generate_position() 209 | robot.theta = np.random.uniform(0, 2 * np.pi) 210 | if min(robot.sensors[0].distances) >= 0.30: 211 | return 212 | 213 | def reset(self, robot, frame_name): 214 | self._tick_count = 0 215 | self.set_safe_position(robot) 216 | self._update_moving_object(robot, frame_name) 217 | robot._ctrl.save() 218 | 219 | 220 | def get_args(): 221 | """Gets parsed command-line arguments. 222 | 223 | Returns: 224 | Parsed command-line arguments. 225 | """ 226 | parser = argparse.ArgumentParser(description="avoids obstacles") 227 | parser.add_argument("--obstacle-density", default=0.01, type=float, 228 | help="area density of obstacles") 229 | parser.add_argument("--moving-obstacle-ratio", default=0.0, type=float, 230 | help="percentage of moving obstacles") 231 | parser.add_argument("--exploration", default=0.5, type=float, 232 | help="exploration rate") 233 | parser.add_argument("--learning-rate", default=0.01, type=float, 234 | help="learning rate") 235 | parser.add_argument("--no-display", action="store_false", default=True, 236 | help="whether to display the simulator or not", 237 | dest="display") 238 | 239 | return parser.parse_args() 240 | 241 | 242 | if __name__ == "__main__": 243 | args = get_args() 244 | 245 | world = World(200, 200) 246 | sim = Simulator(world) 247 | for obstacle in world.generate_obstacles(args.obstacle_density, 248 | args.moving_obstacle_ratio): 249 | sim.add_obstacle(obstacle) 250 | 251 | sim.update_locator() 252 | 253 | target = sim.generate_position() 254 | sim.add_target(target) 255 | 256 | controller = Controller(args.learning_rate) 257 | controller.load() 258 | 259 | robot = Robot(exploration=args.exploration) 260 | robot.set_target(target) 261 | robot.attach_sensor(RaySensor()) 262 | robot.set_controller(controller) 263 | sim.set_safe_position(robot) 264 | sim.add_robot(robot) 265 | 266 | sim.run(args.display) 267 | controller.save() 268 | -------------------------------------------------------------------------------- /moving_object.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | from net import Controller 5 | from director import vtkAll as vtk 6 | from director.debugVis import DebugData 7 | from director import ioUtils, filterUtils 8 | 9 | 10 | class MovingObject(object): 11 | 12 | """Moving object.""" 13 | 14 | def __init__(self, velocity, polydata): 15 | """Constructs a MovingObject. 16 | 17 | Args: 18 | velocity: Velocity. 19 | polydata: Polydata. 20 | """ 21 | self._state = np.array([0., 0., 0.]) 22 | self._velocity = float(velocity) 23 | self._raw_polydata = polydata 24 | self._polydata = polydata 25 | self._sensors = [] 26 | 27 | @property 28 | def x(self): 29 | """X coordinate.""" 30 | return self._state[0] 31 | 32 | @x.setter 33 | def x(self, value): 34 | """X coordinate.""" 35 | next_state = self._state.copy() 36 | next_state[0] = float(value) 37 | self._update_state(next_state) 38 | 39 | @property 40 | def y(self): 41 | """Y coordinate.""" 42 | return self._state[1] 43 | 44 | @y.setter 45 | def y(self, value): 46 | """Y coordinate.""" 47 | next_state = self._state.copy() 48 | next_state[1] = float(value) 49 | self._update_state(next_state) 50 | 51 | @property 52 | def theta(self): 53 | """Yaw in radians.""" 54 | return self._state[2] 55 | 56 | @theta.setter 57 | def theta(self, value): 58 | """Yaw in radians.""" 59 | next_state = self._state.copy() 60 | next_state[2] = float(value) % (2 * np.pi) 61 | self._update_state(next_state) 62 | 63 | @property 64 | def velocity(self): 65 | """Velocity.""" 66 | return self._velocity 67 | 68 | @velocity.setter 69 | def velocity(self, value): 70 | """Velocity.""" 71 | self._velocity = float(value) 72 | 73 | @property 74 | def sensors(self): 75 | """List of attached sensors.""" 76 | return self._sensors 77 | 78 | def attach_sensor(self, sensor): 79 | """Attaches a sensor. 80 | 81 | Args: 82 | sensor: Sensor. 83 | """ 84 | self._sensors.append(sensor) 85 | 86 | def _dynamics(self, state, t, controller=None): 87 | """Dynamics of the object. 88 | 89 | Args: 90 | state: Initial condition. 91 | t: Time. 92 | 93 | Returns: 94 | Derivative of state at t. 95 | """ 96 | dqdt = np.zeros_like(state) 97 | dqdt[0] = self._velocity * np.cos(state[2]) 98 | dqdt[1] = self._velocity * np.sin(state[2]) 99 | dqdt[2] = self._control(state, t) 100 | return dqdt * t 101 | 102 | def _control(self, state, t): 103 | """Returns the yaw given state. 104 | 105 | Args: 106 | state: State. 107 | t: Time. 108 | 109 | Returns: 110 | Yaw. 111 | """ 112 | raise NotImplementedError 113 | 114 | def _simulate(self, dt): 115 | """Simulates the object moving. 116 | 117 | Args: 118 | dt: Time length of step. 119 | 120 | Returns: 121 | New state. 122 | """ 123 | return self._state + self._dynamics(self._state, dt) 124 | 125 | def move(self, dt=1.0/30.0): 126 | """Moves the object by a given time step. 127 | 128 | Args: 129 | dt: Length of time step. 130 | """ 131 | state = self._simulate(dt) 132 | self._update_state(state) 133 | 134 | def _update_state(self, next_state): 135 | """Updates the moving object's state. 136 | 137 | Args: 138 | next_state: New state. 139 | """ 140 | t = vtk.vtkTransform() 141 | t.Translate([next_state[0], next_state[1], 0.]) 142 | t.RotateZ(np.degrees(next_state[2])) 143 | self._polydata = filterUtils.transformPolyData(self._raw_polydata, t) 144 | self._state = next_state 145 | list(map(lambda s: s.update(*self._state), self._sensors)) 146 | 147 | def to_positioned_polydata(self): 148 | """Converts object to visualizable poly data. 149 | 150 | Note: Transformations have been already applied to this. 151 | """ 152 | return self._polydata 153 | 154 | def to_polydata(self): 155 | """Converts object to visualizable poly data. 156 | 157 | Note: This is centered at (0, 0, 0) and is not rotated. 158 | """ 159 | return self._raw_polydata 160 | 161 | 162 | class Robot(MovingObject): 163 | 164 | """Robot.""" 165 | 166 | def __init__(self, velocity=25.0, scale=0.15, exploration=0.5, 167 | model="A10.obj"): 168 | """Constructs a Robot. 169 | 170 | Args: 171 | velocity: Velocity of the robot in the forward direction. 172 | scale: Scale of the model. 173 | exploration: Exploration rate. 174 | model: Object model to use. 175 | """ 176 | self._target = (0, 0) 177 | self._exploration = exploration 178 | t = vtk.vtkTransform() 179 | t.Scale(scale, scale, scale) 180 | polydata = ioUtils.readPolyData(model) 181 | polydata = filterUtils.transformPolyData(polydata, t) 182 | super(Robot, self).__init__(velocity, polydata) 183 | self._ctrl = Controller() 184 | 185 | def move(self, dt=1.0/30.0): 186 | """Moves the object by a given time step. 187 | 188 | Args: 189 | dt: Length of time step. 190 | """ 191 | gamma = 0.9 192 | prev_xy = self._state[0], self._state[1] 193 | prev_state = self._get_state() 194 | prev_utilities = self._ctrl.evaluate(prev_state) 195 | super(Robot, self).move(dt) 196 | next_state = self._get_state() 197 | next_utilities = self._ctrl.evaluate(next_state) 198 | print("action: {}, utility: {}".format( 199 | self._selected_i, prev_utilities[self._selected_i])) 200 | 201 | terminal = self._sensors[0].has_collided() 202 | curr_reward = self._get_reward(prev_xy) 203 | total_reward =\ 204 | curr_reward if terminal else \ 205 | curr_reward + gamma * next_utilities[self._selected_i] 206 | rewards = [total_reward if i == self._selected_i else prev_utilities[i] 207 | for i in range(len(next_utilities))] 208 | self._ctrl.train(prev_state, rewards) 209 | 210 | def set_target(self, target): 211 | self._target = target 212 | 213 | def set_controller(self, ctrl): 214 | self._ctrl = ctrl 215 | 216 | def at_target(self, threshold=3): 217 | """Return whether the robot has reached its target. 218 | 219 | Args: 220 | threshold: Target distance threshold. 221 | 222 | Returns: 223 | True if target is reached. 224 | """ 225 | return (abs(self._state[0] - self._target[0]) <= threshold and 226 | abs(self._state[1] - self._target[1]) <= threshold) 227 | 228 | def _get_reward(self, prev_state): 229 | prev_dx = self._target[0] - prev_state[0] 230 | prev_dy = self._target[1] - prev_state[1] 231 | prev_distance = np.sqrt(prev_dx ** 2 + prev_dy ** 2) 232 | new_dx = self._target[0] - self._state[0] 233 | new_dy = self._target[1] - self._state[1] 234 | new_distance = np.sqrt(new_dx ** 2 + new_dy ** 2) 235 | if self._sensors[0].has_collided(): 236 | return -20 237 | elif self.at_target(): 238 | return 15 239 | else: 240 | delta_distance = prev_distance - new_distance 241 | angle_distance = -abs(self._angle_to_destination()) / 4 242 | obstacle_ahead = self._sensors[0].distances[8] - 1 243 | return delta_distance + angle_distance + obstacle_ahead 244 | 245 | def _angle_to_destination(self): 246 | x, y = self._target[0] - self.x, self._target[1] - self.y 247 | return self._wrap_angles(np.arctan2(y, x) - self.theta) 248 | 249 | def _wrap_angles(self, a): 250 | return (a + np.pi) % (2 * np.pi) - np.pi 251 | 252 | def _get_state(self): 253 | dx, dy = self._target[0] - self.x, self._target[1] - self.y 254 | curr_state = [dx / 1000, dy / 1000, self._angle_to_destination()] 255 | return np.hstack([curr_state, self._sensors[0].distances]) 256 | 257 | def _control(self, state, t): 258 | """Returns the yaw given state. 259 | 260 | Args: 261 | state: State. 262 | t: Time. 263 | 264 | Returns: 265 | Yaw. 266 | """ 267 | actions = [-np.pi/2, 0., np.pi/2] 268 | 269 | utilities = self._ctrl.evaluate(self._get_state()) 270 | optimal_i = np.argmax(utilities) 271 | if np.random.random() <= self._exploration: 272 | optimal_i = np.random.choice([0, 1, 2]) 273 | 274 | optimal_a = actions[optimal_i] 275 | self._selected_i = optimal_i 276 | return optimal_a 277 | 278 | 279 | class Obstacle(MovingObject): 280 | 281 | """Obstacle.""" 282 | 283 | def __init__(self, velocity, radius, bounds, height=1.0): 284 | """Constructs a Robot. 285 | 286 | Args: 287 | velocity: Velocity of the robot in the forward direction. 288 | radius: Radius of the obstacle. 289 | """ 290 | data = DebugData() 291 | self._bounds = bounds 292 | self._radius = radius 293 | self._height = height 294 | center = [0, 0, height / 2 - 0.5] 295 | axis = [0, 0, 1] # Upright cylinder. 296 | data.addCylinder(center, axis, height, radius) 297 | polydata = data.getPolyData() 298 | super(Obstacle, self).__init__(velocity, polydata) 299 | 300 | def _control(self, state, t): 301 | """Returns the yaw given state. 302 | 303 | Args: 304 | state: State. 305 | t: Time. 306 | 307 | Returns: 308 | Yaw. 309 | """ 310 | x_min, x_max, y_min, y_max = self._bounds 311 | x, y, theta = state 312 | if x - self._radius <= x_min: 313 | return np.pi 314 | elif x + self._radius >= x_max: 315 | return np.pi 316 | elif y - self._radius <= y_min: 317 | return np.pi 318 | elif y + self._radius >= y_max: 319 | return np.pi 320 | return 0. 321 | --------------------------------------------------------------------------------