├── .gitignore
├── README.md
├── Report.pdf
├── assets
└── report-frontpage.png
├── main.py
├── models
├── actor.py
└── critic.py
├── plot.py
├── robot-env
├── README.md
├── robot_env.egg-info
│ ├── PKG-INFO
│ ├── SOURCES.txt
│ ├── dependency_links.txt
│ ├── requires.txt
│ └── top_level.txt
├── robot_env
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-36.pyc
│ └── envs
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── robot_env.cpython-36.pyc
│ │ └── robot_extrahard_env.cpython-36.pyc
│ │ ├── assets
│ │ ├── robot.png
│ │ └── target.png
│ │ ├── robot_env.py
│ │ ├── robot_env_controller.py
│ │ ├── robot_env_path.py
│ │ ├── simulation.py
│ │ └── viewer.py
└── setup.py
└── spinning.py
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/python,pycharm+iml
3 | # Edit at https://www.gitignore.io/?templates=python,pycharm+iml
4 |
5 | ### PyCharm+iml ###
6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
8 |
9 | # User-specific stuff
10 | .idea/**/workspace.xml
11 | .idea/**/tasks.xml
12 | .idea/**/usage.statistics.xml
13 | .idea/**/dictionaries
14 | .idea/**/shelf
15 |
16 | # Generated files
17 | .idea/**/contentModel.xml
18 |
19 | # Sensitive or high-churn files
20 | .idea/**/dataSources/
21 | .idea/**/dataSources.ids
22 | .idea/**/dataSources.local.xml
23 | .idea/**/sqlDataSources.xml
24 | .idea/**/dynamic.xml
25 | .idea/**/uiDesigner.xml
26 | .idea/**/dbnavigator.xml
27 |
28 | # Gradle
29 | .idea/**/gradle.xml
30 | .idea/**/libraries
31 |
32 | # Gradle and Maven with auto-import
33 | # When using Gradle or Maven with auto-import, you should exclude module files,
34 | # since they will be recreated, and may cause churn. Uncomment if using
35 | # auto-import.
36 | # .idea/modules.xml
37 | # .idea/*.iml
38 | # .idea/modules
39 | # *.iml
40 | # *.ipr
41 |
42 | # CMake
43 | cmake-build-*/
44 |
45 | # Mongo Explorer plugin
46 | .idea/**/mongoSettings.xml
47 |
48 | # File-based project format
49 | *.iws
50 |
51 | # IntelliJ
52 | out/
53 |
54 | # mpeltonen/sbt-idea plugin
55 | .idea_modules/
56 |
57 | # JIRA plugin
58 | atlassian-ide-plugin.xml
59 |
60 | # Cursive Clojure plugin
61 | .idea/replstate.xml
62 |
63 | # Crashlytics plugin (for Android Studio and IntelliJ)
64 | com_crashlytics_export_strings.xml
65 | crashlytics.properties
66 | crashlytics-build.properties
67 | fabric.properties
68 |
69 | # Editor-based Rest Client
70 | .idea/httpRequests
71 |
72 | # Android studio 3.1+ serialized cache file
73 | .idea/caches/build_file_checksums.ser
74 |
75 | ### PyCharm+iml Patch ###
76 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
77 |
78 | *.iml
79 | modules.xml
80 | .idea/misc.xml
81 | *.ipr
82 |
83 | ### Python ###
84 | # Byte-compiled / optimized / DLL files
85 | __pycache__/
86 | *.py[cod]
87 | *$py.class
88 |
89 | # C extensions
90 | *.so
91 |
92 | # Distribution / packaging
93 | .Python
94 | build/
95 | develop-eggs/
96 | dist/
97 | downloads/
98 | eggs/
99 | .eggs/
100 | lib/
101 | lib64/
102 | parts/
103 | sdist/
104 | var/
105 | wheels/
106 | pip-wheel-metadata/
107 | share/python-wheels/
108 | *.egg-info/
109 | .installed.cfg
110 | *.egg
111 | MANIFEST
112 |
113 | # PyInstaller
114 | # Usually these files are written by a python script from a template
115 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
116 | *.manifest
117 | *.spec
118 |
119 | # Installer logs
120 | pip-log.txt
121 | pip-delete-this-directory.txt
122 |
123 | # Unit test / coverage reports
124 | htmlcov/
125 | .tox/
126 | .nox/
127 | .coverage
128 | .coverage.*
129 | .cache
130 | nosetests.xml
131 | coverage.xml
132 | *.cover
133 | .hypothesis/
134 | .pytest_cache/
135 |
136 | # Translations
137 | *.mo
138 | *.pot
139 |
140 | # Scrapy stuff:
141 | .scrapy
142 |
143 | # Sphinx documentation
144 | docs/_build/
145 |
146 | # PyBuilder
147 | target/
148 |
149 | # pyenv
150 | .python-version
151 |
152 | # pipenv
153 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
154 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
155 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
156 | # install all needed dependencies.
157 | #Pipfile.lock
158 |
159 | # celery beat schedule file
160 | celerybeat-schedule
161 |
162 | # SageMath parsed files
163 | *.sage.py
164 |
165 | # Spyder project settings
166 | .spyderproject
167 | .spyproject
168 |
169 | # Rope project settings
170 | .ropeproject
171 |
172 | # Mr Developer
173 | .mr.developer.cfg
174 | .project
175 | .pydevproject
176 |
177 | # mkdocs documentation
178 | /site
179 |
180 | # mypy
181 | .mypy_cache/
182 | .dmypy.json
183 | dmypy.json
184 |
185 | # Pyre type checker
186 | .pyre/
187 |
188 | # End of https://www.gitignore.io/api/python,pycharm+iml
189 |
190 | .idea
191 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Reinforcement Learning for Path Following
2 |
3 | _Consider the task of a problem attempting to follow a path in a constrained environment with only a few lines to follow. We attempt this using end-to-end reinforcement learning and explore two algorithms for doing so: Deep Deterministic Policy Gradients (DDPG) and Proximal Policy Optimisation (PPO). We further explore different problem formulations to learn a path-following controller or the velocities of the agent directly, and report our findings._
4 |
5 | ## Report
6 |
7 | The PDF containing the project's Report can be found here.
8 |
9 |
10 |
11 | ## Presentation slides
12 |
13 | Slides used for the project's presentation can be found [here](https://slides.com/jurajmicko/mrs-project7-presentation/).
14 |
15 | ## Authors
16 | * Juraj Micko (jm2186@cam.ac.uk)
17 | * Kwot Sin Lee (ksl36@cam.ac.uk)
18 | * Wilson Suen (wss28@cam.ac.uk)
19 |
--------------------------------------------------------------------------------
/Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/Report.pdf
--------------------------------------------------------------------------------
/assets/report-frontpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/assets/report-frontpage.png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | warnings.filterwarnings('ignore')
3 |
4 | import numpy as np
5 | import gym
6 |
7 | from keras.layers import Input
8 | from keras.optimizers import Adam
9 |
10 | from rl.agents import DDPGAgent
11 | from rl.memory import SequentialMemory
12 | from rl.random import OrnsteinUhlenbeckProcess
13 |
14 | from models import actor, critic
15 |
16 | ############
17 | # Setup
18 | ############
19 | ENV_NAME = 'robot_env:robot-env-path-v0'
20 |
21 | # Get the environment and extract the number of actions.
22 | env = gym.make(ENV_NAME)
23 | np.random.seed(123)
24 | env.seed(123)
25 | nb_actions = env.action_space.shape[0]
26 |
27 | ################
28 | # Parameters
29 | ################
30 | # Training parameters
31 | batch_size = 64
32 | lr = 1e-3
33 | max_episode_steps = 100
34 | limit = 100000
35 | nb_steps = 10000
36 |
37 | # Agent parameters
38 | num_steps_warmup_critic = 100
39 | num_steps_warmup_actor = 100
40 | gamma = 0.99
41 |
42 | ###############
43 | # Models
44 | ###############
45 | action_input = Input(shape=(nb_actions,), name='action_input')
46 | observation_input = Input(shape=(batch_size,) + env.observation_space.shape, name='observation_input')
47 | num_feat = 32
48 |
49 | # print(env.observation_space.shape)
50 | # print(nb_actions)
51 | # assert False
52 |
53 | actor = actor.build_actor(
54 | batch_size=batch_size,
55 | nb_actions=nb_actions,
56 | env=env)
57 |
58 | critic = critic.build_critic(
59 | action_input=action_input,
60 | observation_input=observation_input)
61 |
62 | # Optimizer
63 | opt = Adam(lr=lr, clipnorm=1.0)
64 | # opt = Adam(lr=lr, clipnorm=0.01)
65 |
66 | # Build and compile agent
67 | memory = SequentialMemory(
68 | limit=limit,
69 | window_length=batch_size)
70 |
71 | random_process = OrnsteinUhlenbeckProcess(
72 | size=nb_actions,
73 | theta=.15,
74 | mu=0.,
75 | sigma=.3)
76 |
77 | agent = DDPGAgent(
78 | nb_actions=nb_actions,
79 | actor=actor,
80 | critic=critic,
81 | critic_action_input=action_input,
82 | memory=memory,
83 | nb_steps_warmup_critic=num_steps_warmup_critic,
84 | nb_steps_warmup_actor=num_steps_warmup_actor,
85 | random_process=random_process,
86 | gamma=gamma,
87 | target_model_update=1e-2)
88 |
89 | agent.compile(opt, metrics=['mae'])
90 |
91 | history = agent.fit(
92 | env,
93 | nb_steps=30000,
94 | visualize=True,
95 | verbose=0,
96 | nb_max_episode_steps=max_episode_steps)
97 |
98 | print(history)
99 |
100 | # After training is done, we save the final weights.
101 | agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
102 |
103 | #%%
104 |
105 | # Finally, evaluate our algorithm for 5 episodes.
106 | agent.test(
107 | env,
108 | nb_episodes=5,
109 | visualize=True,
110 | nb_max_episode_steps=max_episode_steps)
111 |
--------------------------------------------------------------------------------
/models/actor.py:
--------------------------------------------------------------------------------
1 | from keras.initializers import RandomNormal
2 | from keras.layers import Dense, Flatten
3 | from keras.models import Sequential
4 |
5 |
6 | def build_actor(batch_size, nb_actions, env):
7 | """
8 | Docs
9 | """
10 | init = RandomNormal(mean=0.0, stddev=0.2, seed=None)
11 | actor = Sequential([
12 | Flatten(input_shape=(batch_size,) + env.observation_space.shape),
13 | Dense(8, activation='relu', kernel_initializer=init),
14 | Dense(8, activation='relu', kernel_initializer=init),
15 | Dense(nb_actions, activation='tanh', kernel_initializer=init),
16 | ])
17 | print(actor.summary())
18 |
19 | return actor
20 |
--------------------------------------------------------------------------------
/models/critic.py:
--------------------------------------------------------------------------------
1 | from keras.initializers import RandomNormal
2 | from keras.layers import Dense, Flatten, Concatenate
3 | from keras.models import Model
4 |
5 |
6 | def build_critic(action_input, observation_input):
7 | """
8 | Docs
9 | """
10 | init = RandomNormal(mean=0.0, stddev=0.2, seed=None)
11 | flattened_observation = Flatten()(observation_input)
12 | x = Concatenate()([action_input, flattened_observation])
13 | x = Dense(8, activation='relu', kernel_initializer=init)(x)
14 | x = Dense(8, activation='relu', kernel_initializer=init)(x)
15 | x = Dense(1, activation='linear', kernel_initializer=init)(x)
16 | critic = Model(inputs=[action_input, observation_input], outputs=x)
17 | print(critic.summary())
18 |
19 | return critic
20 |
--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pylab as plt
2 | import gym
3 | import numpy as np
4 |
5 | ENV_NAME = 'robot_env:robot-env-v0'
6 |
7 | env = gym.make(ENV_NAME)
8 | np.random.seed(123)
9 | env.seed(123)
10 | nb_actions = env.action_space.shape[0]
11 |
12 | MIN_X, MAX_X = (-5, 5)
13 | MIN_Y, MAX_Y = (-5, 5)
14 |
15 | def get_output(observation):
16 | u = 1
17 | w = 1
18 | action = [observation[0],observation[1]]
19 | '''
20 | feed observation into the agent to obtain the output
21 | '''
22 | return action
23 |
24 | ax = plt.axes()
25 | ax.set(xlim=(MIN_X, MAX_X), ylim=(MIN_Y, MAX_Y))
26 |
27 | split = 30
28 | X_grid, Y_grid = np.meshgrid(np.linspace(MIN_X, MAX_X, split),
29 | np.linspace(MIN_Y, MAX_Y, split))
30 | U = np.zeros_like(X_grid)
31 | V = np.zeros_like(U)
32 |
33 | for X in range(len(X_grid)):
34 | for Y in range (len(X_grid[0])):
35 | cur_X = X * (MAX_X - MIN_X) / (split-1) + MIN_X
36 | cur_Y = Y * (MAX_Y - MIN_Y) / (split-1) + MIN_Y
37 | action = get_output([cur_X, cur_Y])
38 | U[X, Y] = action[0]
39 | V[X, Y] = action[1]
40 | #ax.arrow(cur_X, cur_Y, cur_X, cur_Y, head_width=0.05, head_length=0.1)
41 | ax.quiver(X_grid, Y_grid, U, V, units='width')
42 | plt.savefig('potential_template.png')
43 | plt.show()
44 |
--------------------------------------------------------------------------------
/robot-env/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/README.md
--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 1.0
2 | Name: robot-env
3 | Version: 0.0.1
4 | Summary: UNKNOWN
5 | Home-page: UNKNOWN
6 | Author: UNKNOWN
7 | Author-email: UNKNOWN
8 | License: UNKNOWN
9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 |
--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | robot_env.egg-info/PKG-INFO
4 | robot_env.egg-info/SOURCES.txt
5 | robot_env.egg-info/dependency_links.txt
6 | robot_env.egg-info/requires.txt
7 | robot_env.egg-info/top_level.txt
--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 |
--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/robot-env/robot_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 |
3 | register(
4 | id='robot-env-v0',
5 | entry_point='robot_env.envs:RobotEnv',
6 | )
7 | register(
8 | id='robot-env-path-v0',
9 | entry_point='robot_env.envs:RobotEnvPath',
10 | )
11 | register(
12 | id='robot-env-controller-v0',
13 | entry_point='robot_env.envs:RobotEnvController',
14 | )
15 |
--------------------------------------------------------------------------------
/robot-env/robot_env/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from robot_env.envs.robot_env import RobotEnv
2 | from robot_env.envs.robot_env_path import RobotEnvPath
3 | from robot_env.envs.robot_env_controller import RobotEnvController
4 |
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/assets/robot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/robot.png
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/assets/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/target.png
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | from gym import spaces
4 |
5 | from .simulation import Simulation
6 | from .viewer import Viewer
7 |
8 |
9 | class RobotEnv(gym.Env):
10 | metadata = {
11 | 'render.modes': ['human', 'rgb_array'],
12 | 'video.frames_per_second': 30
13 | }
14 |
15 | def __init__(self):
16 | """
17 | Action spaces
18 | 1. forward u
19 | 2. change in rotational w
20 |
21 | Observation Space:
22 | 1. forward u
23 | 2. rotational w
24 | 3. x
25 | 4. y
26 | 5. theta heading
27 | """
28 | self.dt = 0.1 # Time delta per step
29 | self.sim = Simulation(self.dt)
30 | self.get_virtual_position = lambda: self.sim.position
31 | self.seed(0)
32 | self.testItr = 0
33 |
34 | # Boundaries of the environment
35 | self.MIN_SPEED = 0.2
36 | self.MAX_SPEED = 1.
37 | self.MAX_THETA = 2.
38 |
39 | # Environment parameters
40 | self.path = np.array([])
41 | self.goal_pos = np.array([3, 0]) # goal position
42 | self.dist_threshold = 0.5 # how close to goal = reach goal
43 |
44 | # Action and observation spaces
45 | self.action_space = spaces.Box(
46 | low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32),
47 | high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32),
48 | dtype=np.float32)
49 |
50 | self.observation_space = spaces.Box(
51 | low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32),
52 | high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32),
53 | dtype=np.float32)
54 |
55 | # Visualisation variables
56 | self.viewer = None
57 |
58 | def _is_goal_reached(self):
59 | """
60 | Check if goal is reached.
61 | """
62 | return self._get_goal_dist() < self.dist_threshold
63 |
64 | def _is_done(self):
65 | return self.sim.is_invalid() or self._is_goal_reached()
66 |
67 | def _get_dist(self, p1: np.ndarray, p2: np.ndarray):
68 | return np.linalg.norm(p1 - p2)
69 |
70 | def _get_goal_dist(self):
71 | return self._get_dist(self.get_virtual_position(), self.goal_pos)
72 |
73 | def _get_reward(self):
74 | u, w = self.sim.speed
75 | x, y = self.sim.position
76 | theta = self.sim.theta
77 |
78 | reward_distance = 0
79 |
80 | next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt)
81 | next_pos = np.array([next_x, next_y])
82 | if self._get_goal_dist() < 2.:
83 | reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos(
84 | np.clip(self.sim.theta * 2, -np.pi, np.pi))
85 | else:
86 | reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos)
87 |
88 | reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1
89 | if reward_directional < 0:
90 | reward_directional *= 4
91 | if reward_directional < -np.pi * 2:
92 | reward_directional = -np.pi * 2
93 |
94 | reward = reward_distance + reward_directional - np.abs(w) * 0.1
95 | # Check correctness
96 | if self._is_goal_reached():
97 | reward += 25 / self.sim.time
98 |
99 | return reward
100 |
101 | def _get_observation(self):
102 | x, y = self.get_virtual_position()
103 | theta = self.sim.theta
104 |
105 | goal_relative = np.array([
106 | [np.cos(-theta), -np.sin(-theta)],
107 | [np.sin(-theta), np.cos(-theta)]
108 | ]).dot(self.goal_pos - np.array([x, y]))
109 |
110 | return goal_relative
111 |
112 | def step(self, action: np.ndarray):
113 | """
114 | Args:
115 | - action (tuple): u and change in v.
116 | Returns:
117 | - observation (object):
118 | - reward (float):
119 | - done (boolean):
120 | - info (dict):
121 | """
122 |
123 | u, w = action
124 | u = (np.tanh(u) + 1) / 2 * (
125 | self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED # normalize the range of action is -1.5 to 1.5
126 | w = np.tanh(w) * self.MAX_THETA # normalize
127 |
128 | self.sim.step(np.array([u, w]))
129 |
130 | return (self._get_observation()), (self._get_reward()), self._is_done(), {}
131 |
132 | def _print_info(self, reward):
133 | frequency = 50
134 | if self._is_done() or self.sim.ticks % np.round(1 / self.dt / frequency) == 0:
135 | u, w = self.sim.speed
136 | x, y = self.sim.position
137 |
138 | print(f"T {self.sim.time}: Pos ({x:.4f}, {y:.4f}), action ({u:.4f}, {w:.4f}), reward {reward}")
139 |
140 | def reset(self):
141 | self.sim.reset_mode(self.testItr)
142 | self.testItr += 1
143 | if self.testItr > 200 and self.testItr % 10 == 0:
144 | self.goal_pos = np.array([np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9, np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9])
145 | return self._get_observation()
146 |
147 | def render(self, mode='human'):
148 | if self.viewer is None:
149 | self.viewer = Viewer(self)
150 |
151 | return self.viewer.render(mode)
152 |
153 | def _abs_to_rel(self, vec):
154 | theta = self.sim.theta
155 | return np.array([
156 | [np.cos(-theta), -np.sin(-theta)],
157 | [np.sin(-theta), np.cos(-theta)]
158 | ]).dot(vec)
159 |
160 | def get_cp_vector(self):
161 | rg = self.goal_pos - self.sim.position
162 | sg = self.goal_pos - self.sim.start_state[[2,3]]
163 | sg_norm = sg / np.linalg.norm(sg)
164 | pg = rg.dot(sg_norm) * sg_norm
165 | rp = rg - pg
166 | return self._abs_to_rel(rp)
167 |
168 | def get_rough_direction(self):
169 | return self._abs_to_rel(self.goal_pos - self.get_virtual_position())
170 |
171 | def close(self):
172 | if self.viewer:
173 | self.viewer.viewer.close()
174 | self.viewer = None
175 |
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env_controller.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | from gym import spaces
4 | from simple_pid import PID
5 |
6 | from .robot_env import RobotEnv
7 | from .robot_env_path import RobotEnvPath
8 |
9 |
10 | class RobotEnvController(RobotEnvPath):
11 | metadata = {
12 | 'render.modes': ['human', 'rgb_array'],
13 | 'video.frames_per_second': 30
14 | }
15 |
16 | def __init__(self):
17 | super().__init__()
18 | self.pid = PID(1., 1., 1.,
19 | sample_time=None)
20 | self.epsilon = 0.1
21 | self.get_virtual_position = self.get_holonomic_point
22 |
23 | n_params = 3
24 | self.action_space = spaces.Box(
25 | low=np.array([-np.inf] * n_params, dtype=np.float32),
26 | high=np.array([np.inf] * n_params, dtype=np.float32),
27 | dtype=np.float32)
28 |
29 | def step(self, action: np.ndarray):
30 | """
31 | :param action: [kp, ki, kd]
32 | :return: observation
33 | """
34 | # Set PID parameters
35 | self.pid.tunings = [0., 0., 0.]
36 |
37 | vector_forward = self.get_rough_direction()
38 | vector_perpendicular = self.get_cp_vector()
39 | sign = np.sign(np.cross(vector_forward, vector_perpendicular))
40 | vector_perpendicular_oriented = vector_perpendicular * sign
41 |
42 | # Use PID
43 | error_signed = np.linalg.norm(vector_perpendicular) * sign
44 |
45 | direction_forward = vector_forward / np.linalg.norm(vector_forward)
46 | direction_perpendicular = vector_perpendicular_oriented / np.abs(error_signed) if np.abs(error_signed) > 1e-7 else direction_forward
47 |
48 | desired_vector = -self.pid(error_signed) * direction_perpendicular + direction_forward
49 |
50 | direction_desired = desired_vector / np.linalg.norm(desired_vector)
51 |
52 | # Feedback linearisation
53 | theta = 0.
54 | u, w = np.array([
55 | [1., 0.],
56 | [0., 1. / self.epsilon]]
57 | ).dot(np.array([
58 | [np.cos(theta), np.sin(theta)],
59 | [-np.sin(theta), np.cos(theta)]
60 | ])).dot(
61 | direction_desired
62 | )
63 |
64 | # Step (observation, reward, done, info)
65 | return super().step(np.array([u, w]))
66 |
67 | def get_holonomic_point(self):
68 | """Returns absolute coordinates of the robot's holonomic point"""
69 | theta = self.sim.theta
70 | direction = np.array([np.cos(theta), np.sin(theta)])
71 | return self.sim.position + self.epsilon * direction
72 |
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env_path.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 | from gym import spaces
4 |
5 | from .simulation import Simulation
6 | from .viewer import Viewer
7 |
8 |
9 | class RobotEnvPath(gym.Env):
10 | metadata = {
11 | 'render.modes': ['human', 'rgb_array'],
12 | 'video.frames_per_second': 30
13 | }
14 |
15 | def __init__(self):
16 | """
17 | Action spaces
18 | 1. forward u
19 | 2. change in rotational w
20 |
21 | Observation Space:
22 | 1. forward u
23 | 2. rotational w
24 | 3. x
25 | 4. y
26 | 5. theta heading
27 | """
28 | self.dt = 0.1 # Time delta per step
29 | self.sim = Simulation(self.dt)
30 | self.seed(0)
31 | self.testItr = 0
32 | self.path_pointer = -1.
33 |
34 | # Boundaries of the environment
35 | self.MIN_SPEED = 0.2
36 | self.MAX_SPEED = 1.
37 | self.MAX_THETA = 2.
38 |
39 | # Environment parameters
40 | self.path = np.array([[1,0],[1,1],[1,2],[2,2],[3,2],[3,3],[4,4]])
41 | self.goal_pos = np.array([3, 0]) # goal position
42 | self.dist_threshold = 0.5 # how close to goal = reach goal
43 |
44 | # Action and observation spaces
45 | self.action_space = spaces.Box(
46 | low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32),
47 | high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32),
48 | dtype=np.float32)
49 |
50 | self.observation_space = spaces.Box(
51 | low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32),
52 | high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32),
53 | dtype=np.float32)
54 |
55 | # Visualisation variables
56 | self.viewer = None
57 |
58 | def _is_goal_reached(self):
59 | """
60 | Check if goal is reached.
61 | """
62 | return self._get_goal_dist() < self.dist_threshold
63 |
64 | def _is_done(self):
65 | return self.sim.is_invalid() or self._is_goal_reached()
66 |
67 | def _get_dist(self, p1: np.ndarray, p2: np.ndarray):
68 | return np.linalg.norm(p1 - p2)
69 |
70 | def _get_goal_dist(self):
71 | return self._get_dist(self.sim.position, self.goal_pos)
72 |
73 | def _get_reward(self):
74 | u, w = self.sim.speed
75 | x, y = self.sim.position
76 | theta = self.sim.theta
77 |
78 | reward_distance = 0
79 |
80 | next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt)
81 | next_pos = np.array([next_x, next_y])
82 | if self._get_goal_dist() < 2.:
83 | reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos(
84 | np.clip(self.sim.theta * 2, -np.pi, np.pi))
85 | else:
86 | reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos)
87 |
88 | reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1
89 | if reward_directional < 0:
90 | reward_directional *= 4
91 | if reward_directional < -np.pi * 2:
92 | reward_directional = -np.pi * 2
93 |
94 | reward = reward_distance + reward_directional - np.abs(w) * 0.1
95 | # Check correctness
96 | if self._is_goal_reached():
97 | reward += 25 / self.sim.time
98 |
99 | return reward
100 |
101 | def _get_observation(self):
102 | x, y = self.sim.position
103 | theta = self.sim.theta
104 |
105 | goal_relative = np.array([
106 | [np.cos(-theta), -np.sin(-theta)],
107 | [np.sin(-theta), np.cos(-theta)]
108 | ]).dot(self.goal_pos - np.array([x, y]))
109 |
110 | return goal_relative
111 |
112 | def step(self, action: np.ndarray):
113 | """
114 | Args:
115 | - action (tuple): u and change in v.
116 | Returns:
117 | - observation (object):
118 | - reward (float):
119 | - done (boolean):
120 | - info (dict):
121 | """
122 |
123 | u, w = action
124 | u = (np.tanh(u) + 1) / 2 * (
125 | self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED # normalize the range of action is -1.5 to 1.5
126 | w = np.tanh(w) * self.MAX_THETA # normalize
127 |
128 | self.sim.step(np.array([u, w]))
129 |
130 | if self.path_pointer>=0:
131 | while self._get_dist(self.sim.position, self.goal_pos) < 1. and not self.path_pointer>=len(self.path)-1:
132 | self.path_pointer += .05
133 | if self.path_pointer>len(self.path)-1:
134 | self.path_pointer = len(self.path)-1
135 | progress = self.path_pointer - int(self.path_pointer)
136 | new_goal_pos = self.path[int(self.path_pointer)] * (1 - progress) + progress * self.path[int(np.ceil(self.path_pointer))]
137 | self.goal_pos = new_goal_pos
138 |
139 | return (self._get_observation()), (self._get_reward()), self._is_done(), {}
140 |
141 | def get_closest_directions(self):
142 | min_dist = -1
143 | prev = [0,0]
144 | min_direction_curr = [0,0]
145 | min_direction_next = [0,0]
146 | for i in range(np.ceil(self.path_pointer+0.05)):
147 | if i==0:
148 | min_dist = self._get_dist(self.sim.position, self.path[i])
149 | min_direction_curr = self.path[i]
150 | min_direction_next = self.path[i]
151 | prev = min_direction_curr
152 | if i>=len(self.path)-1:
153 | if self._get_dist(self.sim.position, self.path[i]) self.FIELD_SIZE or y < -self.FIELD_SIZE or y > self.FIELD_SIZE
22 |
23 | @property
24 | def speed(self):
25 | return self._state[[0, 1]]
26 |
27 | @property
28 | def position(self):
29 | return self._state[[2, 3]]
30 |
31 | @property
32 | def theta(self):
33 | return self._state[4]
34 |
35 | def step(self, action):
36 | next_u, next_w = action
37 |
38 | u, w, x, y, theta = self._state
39 |
40 | # Update state
41 | new_theta = theta + w * self.dt
42 | while new_theta > np.pi:
43 | new_theta -= np.pi * 2
44 | while new_theta < -np.pi:
45 | new_theta += np.pi * 2
46 | new_x = x + u * np.cos(theta) * self.dt
47 | new_y = y + u * np.sin(theta) * self.dt
48 |
49 | self.ticks += 1
50 |
51 | self._state = np.array([next_u, next_w, new_x, new_y, new_theta])
52 |
53 | def reset(self):
54 | rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
55 | rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
56 | rand_theta = np.random.uniform(-np.pi, np.pi)
57 |
58 | self._state = np.array([
59 | 0,
60 | 0,
61 | rand_x,
62 | rand_y,
63 | rand_theta
64 | ])
65 | self.start_state = self._state.copy()
66 |
67 |
68 | def reset_mode(self, itr, mode = 'gradual'):
69 | rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
70 | rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
71 | rand_theta = np.random.uniform(-np.pi, np.pi)
72 |
73 | if itr < 30:
74 | self._state = np.array([
75 | 0,
76 | 0,
77 | 0,
78 | 0,
79 | 0
80 | ])
81 | elif itr < 100:
82 | self._state = np.array([
83 | 0,
84 | 0,
85 | rand_x,
86 | rand_y,
87 | 0
88 | ])
89 | elif itr < 150:
90 | self._state = np.array([
91 | 0,
92 | 0,
93 | rand_x * 2,
94 | rand_y * 2,
95 | 0
96 | ])
97 | else:
98 | self._state = np.array([
99 | 0,
100 | 0,
101 | rand_x * 2,
102 | rand_y * 2,
103 | rand_theta
104 | ])
--------------------------------------------------------------------------------
/robot-env/robot_env/envs/viewer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from os import path
3 | from gym.envs.classic_control import rendering
4 |
5 | class Viewer:
6 | def __init__(self, env):
7 | self.env = env
8 | self.sim = env.sim
9 |
10 | self.pathTrace = 30
11 | self.pathTraceSpace = 2
12 | self.pathTraceSpaceCounter = 0
13 | self.path = np.zeros([self.pathTrace, 2])
14 | self.pathPtr = 0
15 |
16 | # Set the display window size and range
17 | self.viewer = rendering.Viewer(500, 500)
18 | self.viewer.set_bounds(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE,
19 | self.sim.FIELD_SIZE) # Scale (X,X,Y,Y)
20 |
21 | # Create the robot
22 | fname = path.join(path.dirname(__file__), "assets/robot.png")
23 | self.robotobj = rendering.Image(fname, .25, .25)
24 | self.robot_t = rendering.Transform()
25 | self.robotobj.add_attr(self.robot_t)
26 |
27 | # Create the target goal
28 | fname = path.join(path.dirname(__file__), "assets/target.png")
29 | self.targetobj = rendering.Image(fname, .3, .3)
30 | self.targetobj.set_color(255, 0, 0)
31 | self.target_t = rendering.Transform()
32 | self.targetobj.add_attr(self.target_t)
33 |
34 | # Create the goal location
35 | #self.goalobj = rendering.make_circle(.1)
36 | #self.goalobj.set_color(255, 0, 0)
37 | #self.goal_t = rendering.Transform()
38 | #self.goalobj.add_attr(self.goal_t)
39 | #self.viewer.add_geom(self.goalobj)
40 | #self.goal_t.set_translation(*self.env.goal_pos)
41 |
42 | # Create trace path
43 | self.traceobj = []
44 | self.traceobj_t = []
45 | for i in range(self.pathTrace):
46 | self.traceobj.append(rendering.make_circle(.02 + .03 * i / self.pathTrace))
47 | # print(.5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace, i / self.pathTrace)
48 | self.traceobj[i].set_color(.5 - .5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace,
49 | i / self.pathTrace) # Setting the color gradiant for path
50 | self.traceobj_t.append(rendering.Transform())
51 | self.traceobj[i].add_attr(self.traceobj_t[i])
52 | self.traceobj_t[i].set_translation(-2 + i * 0.05, 0)
53 | self.viewer.add_geom(self.traceobj[i])
54 |
55 | self.goalPathobj = []
56 | self.goalPathobj_t = []
57 | for i in range (1,len(self.env.path)):
58 | self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i]))
59 | self.goalPathobj[i-1].set_color(1.,0,0)
60 | self.viewer.add_geom(self.goalPathobj[i-1])
61 |
62 | def render(self, mode='human'):
63 | # Draw the robot
64 | self.viewer.add_onetime(self.robotobj)
65 | self.robot_t.set_translation(self.sim.position[0], self.sim.position[1])
66 | self.robot_t.set_rotation(self.sim.theta - np.pi / 2)
67 |
68 | self.viewer.add_onetime(self.targetobj)
69 | self.target_t.set_translation(*self.env.goal_pos)
70 |
71 | # Update trace
72 | self.pathTraceSpaceCounter = (self.pathTraceSpaceCounter + 1) % self.pathTraceSpace
73 | if self.pathTraceSpaceCounter == 0:
74 | self.path[self.pathPtr][0], self.path[self.pathPtr][1] = (self.sim.position[0], self.sim.position[1])
75 | self.pathPtr = (self.pathPtr + 1) % self.pathTrace
76 | for i in range(self.pathTrace):
77 | counter = (i + self.pathPtr) % self.pathTrace
78 | self.traceobj_t[i].set_translation(self.path[counter][0], self.path[counter][1])
79 |
80 | self.viewer.geoms = self.viewer.geoms[:self.pathTrace]
81 | self.goalPathobj = []
82 | self.goalPathobj_t = []
83 | for i in range (1,len(self.env.path)):
84 | self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i]))
85 | self.goalPathobj[i-1].set_color(1.,0,0)
86 | self.viewer.add_geom(self.goalPathobj[i-1])
87 | #self.goal_t.set_translation(*self.env.goal_pos)
88 | output = self.viewer.render(return_rgb_array=mode == 'rgb_array')
89 |
90 | return output
91 |
--------------------------------------------------------------------------------
/robot-env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name='robot-env',
4 | version='0.0.1',
5 | install_requires=['gym'] # And any other dependencies foo needs
6 | )
--------------------------------------------------------------------------------
/spinning.py:
--------------------------------------------------------------------------------
1 | # from spinup.utils.run_utils import ExperimentGrid
2 | # from spinup import ppo_pytorch
3 | # import torch
4 |
5 | # if __name__ == '__main__':
6 | # import argparse
7 | # parser = argparse.ArgumentParser()
8 | # parser.add_argument('--cpu', type=int, default=4)
9 | # parser.add_argument('--num_runs', type=int, default=3)
10 | # args = parser.parse_args()
11 |
12 | # eg = ExperimentGrid(name='ppo-pyt-bench')
13 | # eg.add('env_name', 'CartPole-v0', '', True)
14 | # eg.add('seed', [10*i for i in range(args.num_runs)])
15 | # eg.add('epochs', 10)
16 | # eg.add('steps_per_epoch', 4000)
17 | # eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid')
18 | # eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
19 | # eg.run(ppo_pytorch, num_cpu=args.cpu)
20 |
21 |
22 | #========
23 |
24 | from spinup.utils.run_utils import ExperimentGrid
25 | from spinup import ppo_pytorch, ddpg_pytorch, sac_pytorch
26 | import torch
27 | import gym
28 |
29 | if __name__ == '__main__':
30 | ############
31 | # Setup
32 | ############
33 | ENV_NAME = 'robot_env:robot-env-v0'
34 | #ENV_NAME = 'robot_env:robot-env-controller-v0'
35 | env = gym.make(ENV_NAME)
36 | feat = 32
37 |
38 | import argparse
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('--cpu', type=int, default=4)
41 | parser.add_argument('--num_runs', type=int, default=3)
42 | parser.add_argument('--algo', required=True)
43 | parser.add_argument('--name', default=None)
44 | parser.add_argument('--epochs', type=int, default=50)
45 | args = parser.parse_args()
46 |
47 | if args.name is None:
48 | args.name = args.algo
49 |
50 | eg = ExperimentGrid(name=args.name)
51 | eg.add('env_name', ENV_NAME, '', True)
52 | eg.add('seed', [10*i for i in range(args.num_runs)])
53 | eg.add('epochs', args.epochs)
54 | eg.add('steps_per_epoch', 4000)
55 | # eg.add('ac_kwargs:hidden_sizes', [(feat,), (feat*2,feat*2)], 'hid')
56 | # eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
57 | eg.add('ac_kwargs:hidden_sizes', [(feat*2,feat*2)], 'hid')
58 | eg.add('ac_kwargs:activation', [torch.nn.ReLU], '')
59 |
60 | if args.algo == 'ppo':
61 | eg.run(ppo_pytorch, num_cpu=args.cpu)
62 |
63 | elif args.algo == 'ddpg':
64 | eg.run(ddpg_pytorch, num_cpu=args.cpu)
65 |
66 | elif args.algo == 'sac':
67 | eg.run(sac_pytorch, num_cpu=args.cpu)
68 |
69 | else:
70 | raise ValueError("invalid algo.")
71 |
--------------------------------------------------------------------------------