├── .gitignore ├── README.md ├── Report.pdf ├── assets └── report-frontpage.png ├── main.py ├── models ├── actor.py └── critic.py ├── plot.py ├── robot-env ├── README.md ├── robot_env.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── requires.txt │ └── top_level.txt ├── robot_env │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-36.pyc │ └── envs │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── robot_env.cpython-36.pyc │ │ └── robot_extrahard_env.cpython-36.pyc │ │ ├── assets │ │ ├── robot.png │ │ └── target.png │ │ ├── robot_env.py │ │ ├── robot_env_controller.py │ │ ├── robot_env_path.py │ │ ├── simulation.py │ │ └── viewer.py └── setup.py └── spinning.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,pycharm+iml 3 | # Edit at https://www.gitignore.io/?templates=python,pycharm+iml 4 | 5 | ### PyCharm+iml ### 6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 8 | 9 | # User-specific stuff 10 | .idea/**/workspace.xml 11 | .idea/**/tasks.xml 12 | .idea/**/usage.statistics.xml 13 | .idea/**/dictionaries 14 | .idea/**/shelf 15 | 16 | # Generated files 17 | .idea/**/contentModel.xml 18 | 19 | # Sensitive or high-churn files 20 | .idea/**/dataSources/ 21 | .idea/**/dataSources.ids 22 | .idea/**/dataSources.local.xml 23 | .idea/**/sqlDataSources.xml 24 | .idea/**/dynamic.xml 25 | .idea/**/uiDesigner.xml 26 | .idea/**/dbnavigator.xml 27 | 28 | # Gradle 29 | .idea/**/gradle.xml 30 | .idea/**/libraries 31 | 32 | # Gradle and Maven with auto-import 33 | # When using Gradle or Maven with auto-import, you should exclude module files, 34 | # since they will be recreated, and may cause churn. Uncomment if using 35 | # auto-import. 36 | # .idea/modules.xml 37 | # .idea/*.iml 38 | # .idea/modules 39 | # *.iml 40 | # *.ipr 41 | 42 | # CMake 43 | cmake-build-*/ 44 | 45 | # Mongo Explorer plugin 46 | .idea/**/mongoSettings.xml 47 | 48 | # File-based project format 49 | *.iws 50 | 51 | # IntelliJ 52 | out/ 53 | 54 | # mpeltonen/sbt-idea plugin 55 | .idea_modules/ 56 | 57 | # JIRA plugin 58 | atlassian-ide-plugin.xml 59 | 60 | # Cursive Clojure plugin 61 | .idea/replstate.xml 62 | 63 | # Crashlytics plugin (for Android Studio and IntelliJ) 64 | com_crashlytics_export_strings.xml 65 | crashlytics.properties 66 | crashlytics-build.properties 67 | fabric.properties 68 | 69 | # Editor-based Rest Client 70 | .idea/httpRequests 71 | 72 | # Android studio 3.1+ serialized cache file 73 | .idea/caches/build_file_checksums.ser 74 | 75 | ### PyCharm+iml Patch ### 76 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 77 | 78 | *.iml 79 | modules.xml 80 | .idea/misc.xml 81 | *.ipr 82 | 83 | ### Python ### 84 | # Byte-compiled / optimized / DLL files 85 | __pycache__/ 86 | *.py[cod] 87 | *$py.class 88 | 89 | # C extensions 90 | *.so 91 | 92 | # Distribution / packaging 93 | .Python 94 | build/ 95 | develop-eggs/ 96 | dist/ 97 | downloads/ 98 | eggs/ 99 | .eggs/ 100 | lib/ 101 | lib64/ 102 | parts/ 103 | sdist/ 104 | var/ 105 | wheels/ 106 | pip-wheel-metadata/ 107 | share/python-wheels/ 108 | *.egg-info/ 109 | .installed.cfg 110 | *.egg 111 | MANIFEST 112 | 113 | # PyInstaller 114 | # Usually these files are written by a python script from a template 115 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 116 | *.manifest 117 | *.spec 118 | 119 | # Installer logs 120 | pip-log.txt 121 | pip-delete-this-directory.txt 122 | 123 | # Unit test / coverage reports 124 | htmlcov/ 125 | .tox/ 126 | .nox/ 127 | .coverage 128 | .coverage.* 129 | .cache 130 | nosetests.xml 131 | coverage.xml 132 | *.cover 133 | .hypothesis/ 134 | .pytest_cache/ 135 | 136 | # Translations 137 | *.mo 138 | *.pot 139 | 140 | # Scrapy stuff: 141 | .scrapy 142 | 143 | # Sphinx documentation 144 | docs/_build/ 145 | 146 | # PyBuilder 147 | target/ 148 | 149 | # pyenv 150 | .python-version 151 | 152 | # pipenv 153 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 154 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 155 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 156 | # install all needed dependencies. 157 | #Pipfile.lock 158 | 159 | # celery beat schedule file 160 | celerybeat-schedule 161 | 162 | # SageMath parsed files 163 | *.sage.py 164 | 165 | # Spyder project settings 166 | .spyderproject 167 | .spyproject 168 | 169 | # Rope project settings 170 | .ropeproject 171 | 172 | # Mr Developer 173 | .mr.developer.cfg 174 | .project 175 | .pydevproject 176 | 177 | # mkdocs documentation 178 | /site 179 | 180 | # mypy 181 | .mypy_cache/ 182 | .dmypy.json 183 | dmypy.json 184 | 185 | # Pyre type checker 186 | .pyre/ 187 | 188 | # End of https://www.gitignore.io/api/python,pycharm+iml 189 | 190 | .idea 191 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Reinforcement Learning for Path Following 2 | 3 | _Consider the task of a problem attempting to follow a path in a constrained environment with only a few lines to follow. We attempt this using end-to-end reinforcement learning and explore two algorithms for doing so: Deep Deterministic Policy Gradients (DDPG) and Proximal Policy Optimisation (PPO). We further explore different problem formulations to learn a path-following controller or the velocities of the agent directly, and report our findings._ 4 | 5 | ## Report 6 | 7 | The PDF containing the project's Report can be found here. 8 | 9 | 10 | 11 | ## Presentation slides 12 | 13 | Slides used for the project's presentation can be found [here](https://slides.com/jurajmicko/mrs-project7-presentation/). 14 | 15 | ## Authors 16 | * Juraj Micko (jm2186@cam.ac.uk) 17 | * Kwot Sin Lee (ksl36@cam.ac.uk) 18 | * Wilson Suen (wss28@cam.ac.uk) 19 | -------------------------------------------------------------------------------- /Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/Report.pdf -------------------------------------------------------------------------------- /assets/report-frontpage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/assets/report-frontpage.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings('ignore') 3 | 4 | import numpy as np 5 | import gym 6 | 7 | from keras.layers import Input 8 | from keras.optimizers import Adam 9 | 10 | from rl.agents import DDPGAgent 11 | from rl.memory import SequentialMemory 12 | from rl.random import OrnsteinUhlenbeckProcess 13 | 14 | from models import actor, critic 15 | 16 | ############ 17 | # Setup 18 | ############ 19 | ENV_NAME = 'robot_env:robot-env-path-v0' 20 | 21 | # Get the environment and extract the number of actions. 22 | env = gym.make(ENV_NAME) 23 | np.random.seed(123) 24 | env.seed(123) 25 | nb_actions = env.action_space.shape[0] 26 | 27 | ################ 28 | # Parameters 29 | ################ 30 | # Training parameters 31 | batch_size = 64 32 | lr = 1e-3 33 | max_episode_steps = 100 34 | limit = 100000 35 | nb_steps = 10000 36 | 37 | # Agent parameters 38 | num_steps_warmup_critic = 100 39 | num_steps_warmup_actor = 100 40 | gamma = 0.99 41 | 42 | ############### 43 | # Models 44 | ############### 45 | action_input = Input(shape=(nb_actions,), name='action_input') 46 | observation_input = Input(shape=(batch_size,) + env.observation_space.shape, name='observation_input') 47 | num_feat = 32 48 | 49 | # print(env.observation_space.shape) 50 | # print(nb_actions) 51 | # assert False 52 | 53 | actor = actor.build_actor( 54 | batch_size=batch_size, 55 | nb_actions=nb_actions, 56 | env=env) 57 | 58 | critic = critic.build_critic( 59 | action_input=action_input, 60 | observation_input=observation_input) 61 | 62 | # Optimizer 63 | opt = Adam(lr=lr, clipnorm=1.0) 64 | # opt = Adam(lr=lr, clipnorm=0.01) 65 | 66 | # Build and compile agent 67 | memory = SequentialMemory( 68 | limit=limit, 69 | window_length=batch_size) 70 | 71 | random_process = OrnsteinUhlenbeckProcess( 72 | size=nb_actions, 73 | theta=.15, 74 | mu=0., 75 | sigma=.3) 76 | 77 | agent = DDPGAgent( 78 | nb_actions=nb_actions, 79 | actor=actor, 80 | critic=critic, 81 | critic_action_input=action_input, 82 | memory=memory, 83 | nb_steps_warmup_critic=num_steps_warmup_critic, 84 | nb_steps_warmup_actor=num_steps_warmup_actor, 85 | random_process=random_process, 86 | gamma=gamma, 87 | target_model_update=1e-2) 88 | 89 | agent.compile(opt, metrics=['mae']) 90 | 91 | history = agent.fit( 92 | env, 93 | nb_steps=30000, 94 | visualize=True, 95 | verbose=0, 96 | nb_max_episode_steps=max_episode_steps) 97 | 98 | print(history) 99 | 100 | # After training is done, we save the final weights. 101 | agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) 102 | 103 | #%% 104 | 105 | # Finally, evaluate our algorithm for 5 episodes. 106 | agent.test( 107 | env, 108 | nb_episodes=5, 109 | visualize=True, 110 | nb_max_episode_steps=max_episode_steps) 111 | -------------------------------------------------------------------------------- /models/actor.py: -------------------------------------------------------------------------------- 1 | from keras.initializers import RandomNormal 2 | from keras.layers import Dense, Flatten 3 | from keras.models import Sequential 4 | 5 | 6 | def build_actor(batch_size, nb_actions, env): 7 | """ 8 | Docs 9 | """ 10 | init = RandomNormal(mean=0.0, stddev=0.2, seed=None) 11 | actor = Sequential([ 12 | Flatten(input_shape=(batch_size,) + env.observation_space.shape), 13 | Dense(8, activation='relu', kernel_initializer=init), 14 | Dense(8, activation='relu', kernel_initializer=init), 15 | Dense(nb_actions, activation='tanh', kernel_initializer=init), 16 | ]) 17 | print(actor.summary()) 18 | 19 | return actor 20 | -------------------------------------------------------------------------------- /models/critic.py: -------------------------------------------------------------------------------- 1 | from keras.initializers import RandomNormal 2 | from keras.layers import Dense, Flatten, Concatenate 3 | from keras.models import Model 4 | 5 | 6 | def build_critic(action_input, observation_input): 7 | """ 8 | Docs 9 | """ 10 | init = RandomNormal(mean=0.0, stddev=0.2, seed=None) 11 | flattened_observation = Flatten()(observation_input) 12 | x = Concatenate()([action_input, flattened_observation]) 13 | x = Dense(8, activation='relu', kernel_initializer=init)(x) 14 | x = Dense(8, activation='relu', kernel_initializer=init)(x) 15 | x = Dense(1, activation='linear', kernel_initializer=init)(x) 16 | critic = Model(inputs=[action_input, observation_input], outputs=x) 17 | print(critic.summary()) 18 | 19 | return critic 20 | -------------------------------------------------------------------------------- /plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pylab as plt 2 | import gym 3 | import numpy as np 4 | 5 | ENV_NAME = 'robot_env:robot-env-v0' 6 | 7 | env = gym.make(ENV_NAME) 8 | np.random.seed(123) 9 | env.seed(123) 10 | nb_actions = env.action_space.shape[0] 11 | 12 | MIN_X, MAX_X = (-5, 5) 13 | MIN_Y, MAX_Y = (-5, 5) 14 | 15 | def get_output(observation): 16 | u = 1 17 | w = 1 18 | action = [observation[0],observation[1]] 19 | ''' 20 | feed observation into the agent to obtain the output 21 | ''' 22 | return action 23 | 24 | ax = plt.axes() 25 | ax.set(xlim=(MIN_X, MAX_X), ylim=(MIN_Y, MAX_Y)) 26 | 27 | split = 30 28 | X_grid, Y_grid = np.meshgrid(np.linspace(MIN_X, MAX_X, split), 29 | np.linspace(MIN_Y, MAX_Y, split)) 30 | U = np.zeros_like(X_grid) 31 | V = np.zeros_like(U) 32 | 33 | for X in range(len(X_grid)): 34 | for Y in range (len(X_grid[0])): 35 | cur_X = X * (MAX_X - MIN_X) / (split-1) + MIN_X 36 | cur_Y = Y * (MAX_Y - MIN_Y) / (split-1) + MIN_Y 37 | action = get_output([cur_X, cur_Y]) 38 | U[X, Y] = action[0] 39 | V[X, Y] = action[1] 40 | #ax.arrow(cur_X, cur_Y, cur_X, cur_Y, head_width=0.05, head_length=0.1) 41 | ax.quiver(X_grid, Y_grid, U, V, units='width') 42 | plt.savefig('potential_template.png') 43 | plt.show() 44 | -------------------------------------------------------------------------------- /robot-env/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/README.md -------------------------------------------------------------------------------- /robot-env/robot_env.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: robot-env 3 | Version: 0.0.1 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /robot-env/robot_env.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | setup.py 3 | robot_env.egg-info/PKG-INFO 4 | robot_env.egg-info/SOURCES.txt 5 | robot_env.egg-info/dependency_links.txt 6 | robot_env.egg-info/requires.txt 7 | robot_env.egg-info/top_level.txt -------------------------------------------------------------------------------- /robot-env/robot_env.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /robot-env/robot_env.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | gym 2 | -------------------------------------------------------------------------------- /robot-env/robot_env.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /robot-env/robot_env/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='robot-env-v0', 5 | entry_point='robot_env.envs:RobotEnv', 6 | ) 7 | register( 8 | id='robot-env-path-v0', 9 | entry_point='robot_env.envs:RobotEnvPath', 10 | ) 11 | register( 12 | id='robot-env-controller-v0', 13 | entry_point='robot_env.envs:RobotEnvController', 14 | ) 15 | -------------------------------------------------------------------------------- /robot-env/robot_env/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /robot-env/robot_env/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from robot_env.envs.robot_env import RobotEnv 2 | from robot_env.envs.robot_env_path import RobotEnvPath 3 | from robot_env.envs.robot_env_controller import RobotEnvController 4 | -------------------------------------------------------------------------------- /robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc -------------------------------------------------------------------------------- /robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc -------------------------------------------------------------------------------- /robot-env/robot_env/envs/assets/robot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/robot.png -------------------------------------------------------------------------------- /robot-env/robot_env/envs/assets/target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/target.png -------------------------------------------------------------------------------- /robot-env/robot_env/envs/robot_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from gym import spaces 4 | 5 | from .simulation import Simulation 6 | from .viewer import Viewer 7 | 8 | 9 | class RobotEnv(gym.Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'], 12 | 'video.frames_per_second': 30 13 | } 14 | 15 | def __init__(self): 16 | """ 17 | Action spaces 18 | 1. forward u 19 | 2. change in rotational w 20 | 21 | Observation Space: 22 | 1. forward u 23 | 2. rotational w 24 | 3. x 25 | 4. y 26 | 5. theta heading 27 | """ 28 | self.dt = 0.1 # Time delta per step 29 | self.sim = Simulation(self.dt) 30 | self.get_virtual_position = lambda: self.sim.position 31 | self.seed(0) 32 | self.testItr = 0 33 | 34 | # Boundaries of the environment 35 | self.MIN_SPEED = 0.2 36 | self.MAX_SPEED = 1. 37 | self.MAX_THETA = 2. 38 | 39 | # Environment parameters 40 | self.path = np.array([]) 41 | self.goal_pos = np.array([3, 0]) # goal position 42 | self.dist_threshold = 0.5 # how close to goal = reach goal 43 | 44 | # Action and observation spaces 45 | self.action_space = spaces.Box( 46 | low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32), 47 | high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32), 48 | dtype=np.float32) 49 | 50 | self.observation_space = spaces.Box( 51 | low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32), 52 | high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32), 53 | dtype=np.float32) 54 | 55 | # Visualisation variables 56 | self.viewer = None 57 | 58 | def _is_goal_reached(self): 59 | """ 60 | Check if goal is reached. 61 | """ 62 | return self._get_goal_dist() < self.dist_threshold 63 | 64 | def _is_done(self): 65 | return self.sim.is_invalid() or self._is_goal_reached() 66 | 67 | def _get_dist(self, p1: np.ndarray, p2: np.ndarray): 68 | return np.linalg.norm(p1 - p2) 69 | 70 | def _get_goal_dist(self): 71 | return self._get_dist(self.get_virtual_position(), self.goal_pos) 72 | 73 | def _get_reward(self): 74 | u, w = self.sim.speed 75 | x, y = self.sim.position 76 | theta = self.sim.theta 77 | 78 | reward_distance = 0 79 | 80 | next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt) 81 | next_pos = np.array([next_x, next_y]) 82 | if self._get_goal_dist() < 2.: 83 | reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos( 84 | np.clip(self.sim.theta * 2, -np.pi, np.pi)) 85 | else: 86 | reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos) 87 | 88 | reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1 89 | if reward_directional < 0: 90 | reward_directional *= 4 91 | if reward_directional < -np.pi * 2: 92 | reward_directional = -np.pi * 2 93 | 94 | reward = reward_distance + reward_directional - np.abs(w) * 0.1 95 | # Check correctness 96 | if self._is_goal_reached(): 97 | reward += 25 / self.sim.time 98 | 99 | return reward 100 | 101 | def _get_observation(self): 102 | x, y = self.get_virtual_position() 103 | theta = self.sim.theta 104 | 105 | goal_relative = np.array([ 106 | [np.cos(-theta), -np.sin(-theta)], 107 | [np.sin(-theta), np.cos(-theta)] 108 | ]).dot(self.goal_pos - np.array([x, y])) 109 | 110 | return goal_relative 111 | 112 | def step(self, action: np.ndarray): 113 | """ 114 | Args: 115 | - action (tuple): u and change in v. 116 | Returns: 117 | - observation (object): 118 | - reward (float): 119 | - done (boolean): 120 | - info (dict): 121 | """ 122 | 123 | u, w = action 124 | u = (np.tanh(u) + 1) / 2 * ( 125 | self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED # normalize the range of action is -1.5 to 1.5 126 | w = np.tanh(w) * self.MAX_THETA # normalize 127 | 128 | self.sim.step(np.array([u, w])) 129 | 130 | return (self._get_observation()), (self._get_reward()), self._is_done(), {} 131 | 132 | def _print_info(self, reward): 133 | frequency = 50 134 | if self._is_done() or self.sim.ticks % np.round(1 / self.dt / frequency) == 0: 135 | u, w = self.sim.speed 136 | x, y = self.sim.position 137 | 138 | print(f"T {self.sim.time}: Pos ({x:.4f}, {y:.4f}), action ({u:.4f}, {w:.4f}), reward {reward}") 139 | 140 | def reset(self): 141 | self.sim.reset_mode(self.testItr) 142 | self.testItr += 1 143 | if self.testItr > 200 and self.testItr % 10 == 0: 144 | self.goal_pos = np.array([np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9, np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9]) 145 | return self._get_observation() 146 | 147 | def render(self, mode='human'): 148 | if self.viewer is None: 149 | self.viewer = Viewer(self) 150 | 151 | return self.viewer.render(mode) 152 | 153 | def _abs_to_rel(self, vec): 154 | theta = self.sim.theta 155 | return np.array([ 156 | [np.cos(-theta), -np.sin(-theta)], 157 | [np.sin(-theta), np.cos(-theta)] 158 | ]).dot(vec) 159 | 160 | def get_cp_vector(self): 161 | rg = self.goal_pos - self.sim.position 162 | sg = self.goal_pos - self.sim.start_state[[2,3]] 163 | sg_norm = sg / np.linalg.norm(sg) 164 | pg = rg.dot(sg_norm) * sg_norm 165 | rp = rg - pg 166 | return self._abs_to_rel(rp) 167 | 168 | def get_rough_direction(self): 169 | return self._abs_to_rel(self.goal_pos - self.get_virtual_position()) 170 | 171 | def close(self): 172 | if self.viewer: 173 | self.viewer.viewer.close() 174 | self.viewer = None 175 | -------------------------------------------------------------------------------- /robot-env/robot_env/envs/robot_env_controller.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from gym import spaces 4 | from simple_pid import PID 5 | 6 | from .robot_env import RobotEnv 7 | from .robot_env_path import RobotEnvPath 8 | 9 | 10 | class RobotEnvController(RobotEnvPath): 11 | metadata = { 12 | 'render.modes': ['human', 'rgb_array'], 13 | 'video.frames_per_second': 30 14 | } 15 | 16 | def __init__(self): 17 | super().__init__() 18 | self.pid = PID(1., 1., 1., 19 | sample_time=None) 20 | self.epsilon = 0.1 21 | self.get_virtual_position = self.get_holonomic_point 22 | 23 | n_params = 3 24 | self.action_space = spaces.Box( 25 | low=np.array([-np.inf] * n_params, dtype=np.float32), 26 | high=np.array([np.inf] * n_params, dtype=np.float32), 27 | dtype=np.float32) 28 | 29 | def step(self, action: np.ndarray): 30 | """ 31 | :param action: [kp, ki, kd] 32 | :return: observation 33 | """ 34 | # Set PID parameters 35 | self.pid.tunings = [0., 0., 0.] 36 | 37 | vector_forward = self.get_rough_direction() 38 | vector_perpendicular = self.get_cp_vector() 39 | sign = np.sign(np.cross(vector_forward, vector_perpendicular)) 40 | vector_perpendicular_oriented = vector_perpendicular * sign 41 | 42 | # Use PID 43 | error_signed = np.linalg.norm(vector_perpendicular) * sign 44 | 45 | direction_forward = vector_forward / np.linalg.norm(vector_forward) 46 | direction_perpendicular = vector_perpendicular_oriented / np.abs(error_signed) if np.abs(error_signed) > 1e-7 else direction_forward 47 | 48 | desired_vector = -self.pid(error_signed) * direction_perpendicular + direction_forward 49 | 50 | direction_desired = desired_vector / np.linalg.norm(desired_vector) 51 | 52 | # Feedback linearisation 53 | theta = 0. 54 | u, w = np.array([ 55 | [1., 0.], 56 | [0., 1. / self.epsilon]] 57 | ).dot(np.array([ 58 | [np.cos(theta), np.sin(theta)], 59 | [-np.sin(theta), np.cos(theta)] 60 | ])).dot( 61 | direction_desired 62 | ) 63 | 64 | # Step (observation, reward, done, info) 65 | return super().step(np.array([u, w])) 66 | 67 | def get_holonomic_point(self): 68 | """Returns absolute coordinates of the robot's holonomic point""" 69 | theta = self.sim.theta 70 | direction = np.array([np.cos(theta), np.sin(theta)]) 71 | return self.sim.position + self.epsilon * direction 72 | -------------------------------------------------------------------------------- /robot-env/robot_env/envs/robot_env_path.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | from gym import spaces 4 | 5 | from .simulation import Simulation 6 | from .viewer import Viewer 7 | 8 | 9 | class RobotEnvPath(gym.Env): 10 | metadata = { 11 | 'render.modes': ['human', 'rgb_array'], 12 | 'video.frames_per_second': 30 13 | } 14 | 15 | def __init__(self): 16 | """ 17 | Action spaces 18 | 1. forward u 19 | 2. change in rotational w 20 | 21 | Observation Space: 22 | 1. forward u 23 | 2. rotational w 24 | 3. x 25 | 4. y 26 | 5. theta heading 27 | """ 28 | self.dt = 0.1 # Time delta per step 29 | self.sim = Simulation(self.dt) 30 | self.seed(0) 31 | self.testItr = 0 32 | self.path_pointer = -1. 33 | 34 | # Boundaries of the environment 35 | self.MIN_SPEED = 0.2 36 | self.MAX_SPEED = 1. 37 | self.MAX_THETA = 2. 38 | 39 | # Environment parameters 40 | self.path = np.array([[1,0],[1,1],[1,2],[2,2],[3,2],[3,3],[4,4]]) 41 | self.goal_pos = np.array([3, 0]) # goal position 42 | self.dist_threshold = 0.5 # how close to goal = reach goal 43 | 44 | # Action and observation spaces 45 | self.action_space = spaces.Box( 46 | low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32), 47 | high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32), 48 | dtype=np.float32) 49 | 50 | self.observation_space = spaces.Box( 51 | low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32), 52 | high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32), 53 | dtype=np.float32) 54 | 55 | # Visualisation variables 56 | self.viewer = None 57 | 58 | def _is_goal_reached(self): 59 | """ 60 | Check if goal is reached. 61 | """ 62 | return self._get_goal_dist() < self.dist_threshold 63 | 64 | def _is_done(self): 65 | return self.sim.is_invalid() or self._is_goal_reached() 66 | 67 | def _get_dist(self, p1: np.ndarray, p2: np.ndarray): 68 | return np.linalg.norm(p1 - p2) 69 | 70 | def _get_goal_dist(self): 71 | return self._get_dist(self.sim.position, self.goal_pos) 72 | 73 | def _get_reward(self): 74 | u, w = self.sim.speed 75 | x, y = self.sim.position 76 | theta = self.sim.theta 77 | 78 | reward_distance = 0 79 | 80 | next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt) 81 | next_pos = np.array([next_x, next_y]) 82 | if self._get_goal_dist() < 2.: 83 | reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos( 84 | np.clip(self.sim.theta * 2, -np.pi, np.pi)) 85 | else: 86 | reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos) 87 | 88 | reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1 89 | if reward_directional < 0: 90 | reward_directional *= 4 91 | if reward_directional < -np.pi * 2: 92 | reward_directional = -np.pi * 2 93 | 94 | reward = reward_distance + reward_directional - np.abs(w) * 0.1 95 | # Check correctness 96 | if self._is_goal_reached(): 97 | reward += 25 / self.sim.time 98 | 99 | return reward 100 | 101 | def _get_observation(self): 102 | x, y = self.sim.position 103 | theta = self.sim.theta 104 | 105 | goal_relative = np.array([ 106 | [np.cos(-theta), -np.sin(-theta)], 107 | [np.sin(-theta), np.cos(-theta)] 108 | ]).dot(self.goal_pos - np.array([x, y])) 109 | 110 | return goal_relative 111 | 112 | def step(self, action: np.ndarray): 113 | """ 114 | Args: 115 | - action (tuple): u and change in v. 116 | Returns: 117 | - observation (object): 118 | - reward (float): 119 | - done (boolean): 120 | - info (dict): 121 | """ 122 | 123 | u, w = action 124 | u = (np.tanh(u) + 1) / 2 * ( 125 | self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED # normalize the range of action is -1.5 to 1.5 126 | w = np.tanh(w) * self.MAX_THETA # normalize 127 | 128 | self.sim.step(np.array([u, w])) 129 | 130 | if self.path_pointer>=0: 131 | while self._get_dist(self.sim.position, self.goal_pos) < 1. and not self.path_pointer>=len(self.path)-1: 132 | self.path_pointer += .05 133 | if self.path_pointer>len(self.path)-1: 134 | self.path_pointer = len(self.path)-1 135 | progress = self.path_pointer - int(self.path_pointer) 136 | new_goal_pos = self.path[int(self.path_pointer)] * (1 - progress) + progress * self.path[int(np.ceil(self.path_pointer))] 137 | self.goal_pos = new_goal_pos 138 | 139 | return (self._get_observation()), (self._get_reward()), self._is_done(), {} 140 | 141 | def get_closest_directions(self): 142 | min_dist = -1 143 | prev = [0,0] 144 | min_direction_curr = [0,0] 145 | min_direction_next = [0,0] 146 | for i in range(np.ceil(self.path_pointer+0.05)): 147 | if i==0: 148 | min_dist = self._get_dist(self.sim.position, self.path[i]) 149 | min_direction_curr = self.path[i] 150 | min_direction_next = self.path[i] 151 | prev = min_direction_curr 152 | if i>=len(self.path)-1: 153 | if self._get_dist(self.sim.position, self.path[i]) self.FIELD_SIZE or y < -self.FIELD_SIZE or y > self.FIELD_SIZE 22 | 23 | @property 24 | def speed(self): 25 | return self._state[[0, 1]] 26 | 27 | @property 28 | def position(self): 29 | return self._state[[2, 3]] 30 | 31 | @property 32 | def theta(self): 33 | return self._state[4] 34 | 35 | def step(self, action): 36 | next_u, next_w = action 37 | 38 | u, w, x, y, theta = self._state 39 | 40 | # Update state 41 | new_theta = theta + w * self.dt 42 | while new_theta > np.pi: 43 | new_theta -= np.pi * 2 44 | while new_theta < -np.pi: 45 | new_theta += np.pi * 2 46 | new_x = x + u * np.cos(theta) * self.dt 47 | new_y = y + u * np.sin(theta) * self.dt 48 | 49 | self.ticks += 1 50 | 51 | self._state = np.array([next_u, next_w, new_x, new_y, new_theta]) 52 | 53 | def reset(self): 54 | rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5. 55 | rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5. 56 | rand_theta = np.random.uniform(-np.pi, np.pi) 57 | 58 | self._state = np.array([ 59 | 0, 60 | 0, 61 | rand_x, 62 | rand_y, 63 | rand_theta 64 | ]) 65 | self.start_state = self._state.copy() 66 | 67 | 68 | def reset_mode(self, itr, mode = 'gradual'): 69 | rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5. 70 | rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5. 71 | rand_theta = np.random.uniform(-np.pi, np.pi) 72 | 73 | if itr < 30: 74 | self._state = np.array([ 75 | 0, 76 | 0, 77 | 0, 78 | 0, 79 | 0 80 | ]) 81 | elif itr < 100: 82 | self._state = np.array([ 83 | 0, 84 | 0, 85 | rand_x, 86 | rand_y, 87 | 0 88 | ]) 89 | elif itr < 150: 90 | self._state = np.array([ 91 | 0, 92 | 0, 93 | rand_x * 2, 94 | rand_y * 2, 95 | 0 96 | ]) 97 | else: 98 | self._state = np.array([ 99 | 0, 100 | 0, 101 | rand_x * 2, 102 | rand_y * 2, 103 | rand_theta 104 | ]) -------------------------------------------------------------------------------- /robot-env/robot_env/envs/viewer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from os import path 3 | from gym.envs.classic_control import rendering 4 | 5 | class Viewer: 6 | def __init__(self, env): 7 | self.env = env 8 | self.sim = env.sim 9 | 10 | self.pathTrace = 30 11 | self.pathTraceSpace = 2 12 | self.pathTraceSpaceCounter = 0 13 | self.path = np.zeros([self.pathTrace, 2]) 14 | self.pathPtr = 0 15 | 16 | # Set the display window size and range 17 | self.viewer = rendering.Viewer(500, 500) 18 | self.viewer.set_bounds(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE, 19 | self.sim.FIELD_SIZE) # Scale (X,X,Y,Y) 20 | 21 | # Create the robot 22 | fname = path.join(path.dirname(__file__), "assets/robot.png") 23 | self.robotobj = rendering.Image(fname, .25, .25) 24 | self.robot_t = rendering.Transform() 25 | self.robotobj.add_attr(self.robot_t) 26 | 27 | # Create the target goal 28 | fname = path.join(path.dirname(__file__), "assets/target.png") 29 | self.targetobj = rendering.Image(fname, .3, .3) 30 | self.targetobj.set_color(255, 0, 0) 31 | self.target_t = rendering.Transform() 32 | self.targetobj.add_attr(self.target_t) 33 | 34 | # Create the goal location 35 | #self.goalobj = rendering.make_circle(.1) 36 | #self.goalobj.set_color(255, 0, 0) 37 | #self.goal_t = rendering.Transform() 38 | #self.goalobj.add_attr(self.goal_t) 39 | #self.viewer.add_geom(self.goalobj) 40 | #self.goal_t.set_translation(*self.env.goal_pos) 41 | 42 | # Create trace path 43 | self.traceobj = [] 44 | self.traceobj_t = [] 45 | for i in range(self.pathTrace): 46 | self.traceobj.append(rendering.make_circle(.02 + .03 * i / self.pathTrace)) 47 | # print(.5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace, i / self.pathTrace) 48 | self.traceobj[i].set_color(.5 - .5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace, 49 | i / self.pathTrace) # Setting the color gradiant for path 50 | self.traceobj_t.append(rendering.Transform()) 51 | self.traceobj[i].add_attr(self.traceobj_t[i]) 52 | self.traceobj_t[i].set_translation(-2 + i * 0.05, 0) 53 | self.viewer.add_geom(self.traceobj[i]) 54 | 55 | self.goalPathobj = [] 56 | self.goalPathobj_t = [] 57 | for i in range (1,len(self.env.path)): 58 | self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i])) 59 | self.goalPathobj[i-1].set_color(1.,0,0) 60 | self.viewer.add_geom(self.goalPathobj[i-1]) 61 | 62 | def render(self, mode='human'): 63 | # Draw the robot 64 | self.viewer.add_onetime(self.robotobj) 65 | self.robot_t.set_translation(self.sim.position[0], self.sim.position[1]) 66 | self.robot_t.set_rotation(self.sim.theta - np.pi / 2) 67 | 68 | self.viewer.add_onetime(self.targetobj) 69 | self.target_t.set_translation(*self.env.goal_pos) 70 | 71 | # Update trace 72 | self.pathTraceSpaceCounter = (self.pathTraceSpaceCounter + 1) % self.pathTraceSpace 73 | if self.pathTraceSpaceCounter == 0: 74 | self.path[self.pathPtr][0], self.path[self.pathPtr][1] = (self.sim.position[0], self.sim.position[1]) 75 | self.pathPtr = (self.pathPtr + 1) % self.pathTrace 76 | for i in range(self.pathTrace): 77 | counter = (i + self.pathPtr) % self.pathTrace 78 | self.traceobj_t[i].set_translation(self.path[counter][0], self.path[counter][1]) 79 | 80 | self.viewer.geoms = self.viewer.geoms[:self.pathTrace] 81 | self.goalPathobj = [] 82 | self.goalPathobj_t = [] 83 | for i in range (1,len(self.env.path)): 84 | self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i])) 85 | self.goalPathobj[i-1].set_color(1.,0,0) 86 | self.viewer.add_geom(self.goalPathobj[i-1]) 87 | #self.goal_t.set_translation(*self.env.goal_pos) 88 | output = self.viewer.render(return_rgb_array=mode == 'rgb_array') 89 | 90 | return output 91 | -------------------------------------------------------------------------------- /robot-env/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='robot-env', 4 | version='0.0.1', 5 | install_requires=['gym'] # And any other dependencies foo needs 6 | ) -------------------------------------------------------------------------------- /spinning.py: -------------------------------------------------------------------------------- 1 | # from spinup.utils.run_utils import ExperimentGrid 2 | # from spinup import ppo_pytorch 3 | # import torch 4 | 5 | # if __name__ == '__main__': 6 | # import argparse 7 | # parser = argparse.ArgumentParser() 8 | # parser.add_argument('--cpu', type=int, default=4) 9 | # parser.add_argument('--num_runs', type=int, default=3) 10 | # args = parser.parse_args() 11 | 12 | # eg = ExperimentGrid(name='ppo-pyt-bench') 13 | # eg.add('env_name', 'CartPole-v0', '', True) 14 | # eg.add('seed', [10*i for i in range(args.num_runs)]) 15 | # eg.add('epochs', 10) 16 | # eg.add('steps_per_epoch', 4000) 17 | # eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid') 18 | # eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '') 19 | # eg.run(ppo_pytorch, num_cpu=args.cpu) 20 | 21 | 22 | #======== 23 | 24 | from spinup.utils.run_utils import ExperimentGrid 25 | from spinup import ppo_pytorch, ddpg_pytorch, sac_pytorch 26 | import torch 27 | import gym 28 | 29 | if __name__ == '__main__': 30 | ############ 31 | # Setup 32 | ############ 33 | ENV_NAME = 'robot_env:robot-env-v0' 34 | #ENV_NAME = 'robot_env:robot-env-controller-v0' 35 | env = gym.make(ENV_NAME) 36 | feat = 32 37 | 38 | import argparse 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--cpu', type=int, default=4) 41 | parser.add_argument('--num_runs', type=int, default=3) 42 | parser.add_argument('--algo', required=True) 43 | parser.add_argument('--name', default=None) 44 | parser.add_argument('--epochs', type=int, default=50) 45 | args = parser.parse_args() 46 | 47 | if args.name is None: 48 | args.name = args.algo 49 | 50 | eg = ExperimentGrid(name=args.name) 51 | eg.add('env_name', ENV_NAME, '', True) 52 | eg.add('seed', [10*i for i in range(args.num_runs)]) 53 | eg.add('epochs', args.epochs) 54 | eg.add('steps_per_epoch', 4000) 55 | # eg.add('ac_kwargs:hidden_sizes', [(feat,), (feat*2,feat*2)], 'hid') 56 | # eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '') 57 | eg.add('ac_kwargs:hidden_sizes', [(feat*2,feat*2)], 'hid') 58 | eg.add('ac_kwargs:activation', [torch.nn.ReLU], '') 59 | 60 | if args.algo == 'ppo': 61 | eg.run(ppo_pytorch, num_cpu=args.cpu) 62 | 63 | elif args.algo == 'ddpg': 64 | eg.run(ddpg_pytorch, num_cpu=args.cpu) 65 | 66 | elif args.algo == 'sac': 67 | eg.run(sac_pytorch, num_cpu=args.cpu) 68 | 69 | else: 70 | raise ValueError("invalid algo.") 71 | --------------------------------------------------------------------------------