├── .gitignore
├── README.md
├── Report.pdf
├── assets
    └── report-frontpage.png
├── main.py
├── models
    ├── actor.py
    └── critic.py
├── plot.py
├── robot-env
    ├── README.md
    ├── robot_env.egg-info
    │   ├── PKG-INFO
    │   ├── SOURCES.txt
    │   ├── dependency_links.txt
    │   ├── requires.txt
    │   └── top_level.txt
    ├── robot_env
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-36.pyc
    │   └── envs
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-36.pyc
    │   │       ├── robot_env.cpython-36.pyc
    │   │       └── robot_extrahard_env.cpython-36.pyc
    │   │   ├── assets
    │   │       ├── robot.png
    │   │       └── target.png
    │   │   ├── robot_env.py
    │   │   ├── robot_env_controller.py
    │   │   ├── robot_env_path.py
    │   │   ├── simulation.py
    │   │   └── viewer.py
    └── setup.py
└── spinning.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python,pycharm+iml
  3 | # Edit at https://www.gitignore.io/?templates=python,pycharm+iml
  4 | 
  5 | ### PyCharm+iml ###
  6 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
  7 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  8 | 
  9 | # User-specific stuff
 10 | .idea/**/workspace.xml
 11 | .idea/**/tasks.xml
 12 | .idea/**/usage.statistics.xml
 13 | .idea/**/dictionaries
 14 | .idea/**/shelf
 15 | 
 16 | # Generated files
 17 | .idea/**/contentModel.xml
 18 | 
 19 | # Sensitive or high-churn files
 20 | .idea/**/dataSources/
 21 | .idea/**/dataSources.ids
 22 | .idea/**/dataSources.local.xml
 23 | .idea/**/sqlDataSources.xml
 24 | .idea/**/dynamic.xml
 25 | .idea/**/uiDesigner.xml
 26 | .idea/**/dbnavigator.xml
 27 | 
 28 | # Gradle
 29 | .idea/**/gradle.xml
 30 | .idea/**/libraries
 31 | 
 32 | # Gradle and Maven with auto-import
 33 | # When using Gradle or Maven with auto-import, you should exclude module files,
 34 | # since they will be recreated, and may cause churn.  Uncomment if using
 35 | # auto-import.
 36 | # .idea/modules.xml
 37 | # .idea/*.iml
 38 | # .idea/modules
 39 | # *.iml
 40 | # *.ipr
 41 | 
 42 | # CMake
 43 | cmake-build-*/
 44 | 
 45 | # Mongo Explorer plugin
 46 | .idea/**/mongoSettings.xml
 47 | 
 48 | # File-based project format
 49 | *.iws
 50 | 
 51 | # IntelliJ
 52 | out/
 53 | 
 54 | # mpeltonen/sbt-idea plugin
 55 | .idea_modules/
 56 | 
 57 | # JIRA plugin
 58 | atlassian-ide-plugin.xml
 59 | 
 60 | # Cursive Clojure plugin
 61 | .idea/replstate.xml
 62 | 
 63 | # Crashlytics plugin (for Android Studio and IntelliJ)
 64 | com_crashlytics_export_strings.xml
 65 | crashlytics.properties
 66 | crashlytics-build.properties
 67 | fabric.properties
 68 | 
 69 | # Editor-based Rest Client
 70 | .idea/httpRequests
 71 | 
 72 | # Android studio 3.1+ serialized cache file
 73 | .idea/caches/build_file_checksums.ser
 74 | 
 75 | ### PyCharm+iml Patch ###
 76 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
 77 | 
 78 | *.iml
 79 | modules.xml
 80 | .idea/misc.xml
 81 | *.ipr
 82 | 
 83 | ### Python ###
 84 | # Byte-compiled / optimized / DLL files
 85 | __pycache__/
 86 | *.py[cod]
 87 | *$py.class
 88 | 
 89 | # C extensions
 90 | *.so
 91 | 
 92 | # Distribution / packaging
 93 | .Python
 94 | build/
 95 | develop-eggs/
 96 | dist/
 97 | downloads/
 98 | eggs/
 99 | .eggs/
100 | lib/
101 | lib64/
102 | parts/
103 | sdist/
104 | var/
105 | wheels/
106 | pip-wheel-metadata/
107 | share/python-wheels/
108 | *.egg-info/
109 | .installed.cfg
110 | *.egg
111 | MANIFEST
112 | 
113 | # PyInstaller
114 | #  Usually these files are written by a python script from a template
115 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
116 | *.manifest
117 | *.spec
118 | 
119 | # Installer logs
120 | pip-log.txt
121 | pip-delete-this-directory.txt
122 | 
123 | # Unit test / coverage reports
124 | htmlcov/
125 | .tox/
126 | .nox/
127 | .coverage
128 | .coverage.*
129 | .cache
130 | nosetests.xml
131 | coverage.xml
132 | *.cover
133 | .hypothesis/
134 | .pytest_cache/
135 | 
136 | # Translations
137 | *.mo
138 | *.pot
139 | 
140 | # Scrapy stuff:
141 | .scrapy
142 | 
143 | # Sphinx documentation
144 | docs/_build/
145 | 
146 | # PyBuilder
147 | target/
148 | 
149 | # pyenv
150 | .python-version
151 | 
152 | # pipenv
153 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
154 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
155 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
156 | #   install all needed dependencies.
157 | #Pipfile.lock
158 | 
159 | # celery beat schedule file
160 | celerybeat-schedule
161 | 
162 | # SageMath parsed files
163 | *.sage.py
164 | 
165 | # Spyder project settings
166 | .spyderproject
167 | .spyproject
168 | 
169 | # Rope project settings
170 | .ropeproject
171 | 
172 | # Mr Developer
173 | .mr.developer.cfg
174 | .project
175 | .pydevproject
176 | 
177 | # mkdocs documentation
178 | /site
179 | 
180 | # mypy
181 | .mypy_cache/
182 | .dmypy.json
183 | dmypy.json
184 | 
185 | # Pyre type checker
186 | .pyre/
187 | 
188 | # End of https://www.gitignore.io/api/python,pycharm+iml
189 | 
190 | .idea
191 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reinforcement Learning for Path Following
 2 | 
 3 | _Consider the task of a problem attempting to follow a path in a constrained environment with only a few lines to follow. We attempt this using end-to-end reinforcement learning and explore two algorithms for doing so: Deep Deterministic Policy Gradients (DDPG) and Proximal Policy Optimisation (PPO). We further explore different problem formulations to learn a path-following controller or the velocities of the agent directly, and report our findings._
 4 | 
 5 | ## Report
 6 | 
 7 | The PDF containing the project's Report can be found <a href="https://github.com/jjurm/path-following-reinforcement-learning/blob/master/Report.pdf">here</a>.
 8 | 
 9 | <a href="https://github.com/jjurm/path-following-reinforcement-learning/blob/master/Report.pdf"><img src="https://github.com/jjurm/path-following-reinforcement-learning/raw/master/assets/report-frontpage.png" width="232" height="300" /></a>
10 | 
11 | ## Presentation slides
12 | 
13 | Slides used for the project's presentation can be found [here](https://slides.com/jurajmicko/mrs-project7-presentation/).
14 | 
15 | ## Authors
16 | * Juraj Micko (jm2186@cam.ac.uk)
17 | * Kwot Sin Lee (ksl36@cam.ac.uk)
18 | * Wilson Suen (wss28@cam.ac.uk)
19 | 


--------------------------------------------------------------------------------
/Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/Report.pdf


--------------------------------------------------------------------------------
/assets/report-frontpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/assets/report-frontpage.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.filterwarnings('ignore')
  3 | 
  4 | import numpy as np
  5 | import gym
  6 | 
  7 | from keras.layers import Input
  8 | from keras.optimizers import Adam
  9 | 
 10 | from rl.agents import DDPGAgent
 11 | from rl.memory import SequentialMemory
 12 | from rl.random import OrnsteinUhlenbeckProcess
 13 | 
 14 | from models import actor, critic
 15 | 
 16 | ############
 17 | #   Setup
 18 | ############
 19 | ENV_NAME = 'robot_env:robot-env-path-v0'
 20 | 
 21 | # Get the environment and extract the number of actions.
 22 | env = gym.make(ENV_NAME)
 23 | np.random.seed(123)
 24 | env.seed(123)
 25 | nb_actions = env.action_space.shape[0]
 26 | 
 27 | ################
 28 | #   Parameters
 29 | ################
 30 | # Training parameters
 31 | batch_size = 64
 32 | lr = 1e-3
 33 | max_episode_steps = 100
 34 | limit = 100000
 35 | nb_steps = 10000
 36 | 
 37 | # Agent parameters
 38 | num_steps_warmup_critic = 100
 39 | num_steps_warmup_actor = 100
 40 | gamma = 0.99
 41 | 
 42 | ###############
 43 | #    Models
 44 | ###############
 45 | action_input = Input(shape=(nb_actions,), name='action_input')
 46 | observation_input = Input(shape=(batch_size,) + env.observation_space.shape, name='observation_input')
 47 | num_feat = 32
 48 | 
 49 | # print(env.observation_space.shape)
 50 | # print(nb_actions)
 51 | # assert False
 52 | 
 53 | actor = actor.build_actor(
 54 |     batch_size=batch_size,
 55 |     nb_actions=nb_actions,
 56 |     env=env)
 57 | 
 58 | critic = critic.build_critic(
 59 |     action_input=action_input,
 60 |     observation_input=observation_input)
 61 | 
 62 | # Optimizer
 63 | opt = Adam(lr=lr, clipnorm=1.0)
 64 | # opt = Adam(lr=lr, clipnorm=0.01)
 65 | 
 66 | # Build and compile agent
 67 | memory = SequentialMemory(
 68 |     limit=limit,
 69 |     window_length=batch_size)
 70 | 
 71 | random_process = OrnsteinUhlenbeckProcess(
 72 |     size=nb_actions,
 73 |     theta=.15,
 74 |     mu=0.,
 75 |     sigma=.3)
 76 | 
 77 | agent = DDPGAgent(
 78 |     nb_actions=nb_actions,
 79 |     actor=actor,
 80 |     critic=critic,
 81 |     critic_action_input=action_input,
 82 |     memory=memory,
 83 |     nb_steps_warmup_critic=num_steps_warmup_critic,
 84 |     nb_steps_warmup_actor=num_steps_warmup_actor,
 85 |     random_process=random_process,
 86 |     gamma=gamma,
 87 |     target_model_update=1e-2)
 88 | 
 89 | agent.compile(opt, metrics=['mae'])
 90 | 
 91 | history = agent.fit(
 92 |     env,
 93 |     nb_steps=30000,
 94 |     visualize=True,
 95 |     verbose=0,
 96 |     nb_max_episode_steps=max_episode_steps)
 97 | 
 98 | print(history)
 99 | 
100 | # After training is done, we save the final weights.
101 | agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
102 | 
103 | #%%
104 | 
105 | # Finally, evaluate our algorithm for 5 episodes.
106 | agent.test(
107 |     env,
108 |     nb_episodes=5,
109 |     visualize=True,
110 |     nb_max_episode_steps=max_episode_steps)
111 | 


--------------------------------------------------------------------------------
/models/actor.py:
--------------------------------------------------------------------------------
 1 | from keras.initializers import RandomNormal
 2 | from keras.layers import Dense, Flatten
 3 | from keras.models import Sequential
 4 | 
 5 | 
 6 | def build_actor(batch_size, nb_actions, env):
 7 |     """
 8 |     Docs
 9 |     """
10 |     init = RandomNormal(mean=0.0, stddev=0.2, seed=None)
11 |     actor = Sequential([
12 |         Flatten(input_shape=(batch_size,) + env.observation_space.shape),
13 |         Dense(8, activation='relu', kernel_initializer=init),
14 |         Dense(8, activation='relu', kernel_initializer=init),
15 |         Dense(nb_actions, activation='tanh', kernel_initializer=init),
16 |     ])
17 |     print(actor.summary())
18 | 
19 |     return actor
20 | 


--------------------------------------------------------------------------------
/models/critic.py:
--------------------------------------------------------------------------------
 1 | from keras.initializers import RandomNormal
 2 | from keras.layers import Dense, Flatten, Concatenate
 3 | from keras.models import Model
 4 | 
 5 | 
 6 | def build_critic(action_input, observation_input):
 7 |     """
 8 |     Docs
 9 |     """
10 |     init = RandomNormal(mean=0.0, stddev=0.2, seed=None)
11 |     flattened_observation = Flatten()(observation_input)
12 |     x = Concatenate()([action_input, flattened_observation])
13 |     x = Dense(8, activation='relu', kernel_initializer=init)(x)
14 |     x = Dense(8, activation='relu', kernel_initializer=init)(x)
15 |     x = Dense(1, activation='linear', kernel_initializer=init)(x)
16 |     critic = Model(inputs=[action_input, observation_input], outputs=x)
17 |     print(critic.summary())
18 | 
19 |     return critic
20 | 


--------------------------------------------------------------------------------
/plot.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pylab as plt
 2 | import gym
 3 | import numpy as np
 4 | 
 5 | ENV_NAME = 'robot_env:robot-env-v0'
 6 | 
 7 | env = gym.make(ENV_NAME)
 8 | np.random.seed(123)
 9 | env.seed(123)
10 | nb_actions = env.action_space.shape[0]
11 | 
12 | MIN_X, MAX_X = (-5, 5)
13 | MIN_Y, MAX_Y = (-5, 5)
14 | 
15 | def get_output(observation):
16 |     u = 1
17 |     w = 1
18 |     action = [observation[0],observation[1]]
19 |     '''
20 |     feed observation into the agent to obtain the output
21 |     '''
22 |     return action
23 | 
24 | ax = plt.axes()
25 | ax.set(xlim=(MIN_X, MAX_X), ylim=(MIN_Y, MAX_Y))
26 | 
27 | split = 30
28 | X_grid, Y_grid = np.meshgrid(np.linspace(MIN_X, MAX_X, split),
29 |                      np.linspace(MIN_Y, MAX_Y, split))
30 | U = np.zeros_like(X_grid)
31 | V = np.zeros_like(U)
32 | 
33 | for X in range(len(X_grid)):
34 |     for Y in range (len(X_grid[0])):
35 |         cur_X = X * (MAX_X - MIN_X) / (split-1) + MIN_X
36 |         cur_Y = Y * (MAX_Y - MIN_Y) / (split-1) + MIN_Y
37 |         action = get_output([cur_X, cur_Y])
38 |         U[X, Y] = action[0]
39 |         V[X, Y] = action[1]
40 |         #ax.arrow(cur_X, cur_Y, cur_X, cur_Y, head_width=0.05, head_length=0.1)
41 | ax.quiver(X_grid, Y_grid, U, V, units='width')
42 | plt.savefig('potential_template.png')
43 | plt.show()
44 |         


--------------------------------------------------------------------------------
/robot-env/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/README.md


--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: robot-env
 3 | Version: 0.0.1
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.py
3 | robot_env.egg-info/PKG-INFO
4 | robot_env.egg-info/SOURCES.txt
5 | robot_env.egg-info/dependency_links.txt
6 | robot_env.egg-info/requires.txt
7 | robot_env.egg-info/top_level.txt


--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | gym
2 | 


--------------------------------------------------------------------------------
/robot-env/robot_env.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | register(
 4 |     id='robot-env-v0',
 5 |     entry_point='robot_env.envs:RobotEnv',
 6 | )
 7 | register(
 8 |     id='robot-env-path-v0',
 9 |     entry_point='robot_env.envs:RobotEnvPath',
10 | )
11 | register(
12 |     id='robot-env-controller-v0',
13 |     entry_point='robot_env.envs:RobotEnvController',
14 | )
15 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from robot_env.envs.robot_env import RobotEnv
2 | from robot_env.envs.robot_env_path import RobotEnvPath
3 | from robot_env.envs.robot_env_controller import RobotEnvController
4 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_env.cpython-36.pyc


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/__pycache__/robot_extrahard_env.cpython-36.pyc


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/assets/robot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/robot.png


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/assets/target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjurm/path-following-reinforcement-learning/c37c357b2e8e959f75b38aa543dda4e7c49edc2d/robot-env/robot_env/envs/assets/target.png


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from gym import spaces
  4 | 
  5 | from .simulation import Simulation
  6 | from .viewer import Viewer
  7 | 
  8 | 
  9 | class RobotEnv(gym.Env):
 10 |     metadata = {
 11 |         'render.modes': ['human', 'rgb_array'],
 12 |         'video.frames_per_second': 30
 13 |     }
 14 | 
 15 |     def __init__(self):
 16 |         """
 17 |         Action spaces
 18 |             1. forward u
 19 |             2. change in rotational w
 20 | 
 21 |         Observation Space:
 22 |             1. forward u
 23 |             2. rotational w
 24 |             3. x
 25 |             4. y
 26 |             5. theta heading
 27 |         """
 28 |         self.dt = 0.1  # Time delta per step
 29 |         self.sim = Simulation(self.dt)
 30 |         self.get_virtual_position = lambda: self.sim.position
 31 |         self.seed(0)
 32 |         self.testItr = 0
 33 | 
 34 |         # Boundaries of the environment
 35 |         self.MIN_SPEED = 0.2
 36 |         self.MAX_SPEED = 1.
 37 |         self.MAX_THETA = 2.
 38 | 
 39 |         # Environment parameters
 40 |         self.path = np.array([])
 41 |         self.goal_pos = np.array([3, 0])  # goal position
 42 |         self.dist_threshold = 0.5  # how close to goal = reach goal
 43 | 
 44 |         # Action and observation spaces
 45 |         self.action_space = spaces.Box(
 46 |             low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32),
 47 |             high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32),
 48 |             dtype=np.float32)
 49 | 
 50 |         self.observation_space = spaces.Box(
 51 |             low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32),
 52 |             high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32),
 53 |             dtype=np.float32)
 54 | 
 55 |         # Visualisation variables
 56 |         self.viewer = None
 57 | 
 58 |     def _is_goal_reached(self):
 59 |         """
 60 |         Check if goal is reached.
 61 |         """
 62 |         return self._get_goal_dist() < self.dist_threshold
 63 | 
 64 |     def _is_done(self):
 65 |         return self.sim.is_invalid() or self._is_goal_reached()
 66 | 
 67 |     def _get_dist(self, p1: np.ndarray, p2: np.ndarray):
 68 |         return np.linalg.norm(p1 - p2)
 69 | 
 70 |     def _get_goal_dist(self):
 71 |         return self._get_dist(self.get_virtual_position(), self.goal_pos)
 72 | 
 73 |     def _get_reward(self):
 74 |         u, w = self.sim.speed
 75 |         x, y = self.sim.position
 76 |         theta = self.sim.theta
 77 | 
 78 |         reward_distance = 0
 79 | 
 80 |         next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt)
 81 |         next_pos = np.array([next_x, next_y])
 82 |         if self._get_goal_dist() < 2.:
 83 |             reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos(
 84 |                 np.clip(self.sim.theta * 2, -np.pi, np.pi))
 85 |         else:
 86 |             reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos)
 87 | 
 88 |         reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1
 89 |         if reward_directional < 0:
 90 |             reward_directional *= 4
 91 |             if reward_directional < -np.pi * 2:
 92 |                 reward_directional = -np.pi * 2
 93 | 
 94 |         reward = reward_distance + reward_directional - np.abs(w) * 0.1
 95 |         # Check correctness
 96 |         if self._is_goal_reached():
 97 |             reward += 25 / self.sim.time
 98 | 
 99 |         return reward
100 | 
101 |     def _get_observation(self):
102 |         x, y = self.get_virtual_position()
103 |         theta = self.sim.theta
104 | 
105 |         goal_relative = np.array([
106 |             [np.cos(-theta), -np.sin(-theta)],
107 |             [np.sin(-theta), np.cos(-theta)]
108 |         ]).dot(self.goal_pos - np.array([x, y]))
109 | 
110 |         return goal_relative
111 | 
112 |     def step(self, action: np.ndarray):
113 |         """
114 |         Args:
115 |             - action (tuple): u and change in v.
116 |         Returns:
117 |             - observation (object):
118 |             - reward (float):
119 |             - done (boolean):
120 |             - info (dict):
121 |         """
122 | 
123 |         u, w = action
124 |         u = (np.tanh(u) + 1) / 2 * (
125 |                 self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED  # normalize the range of action is -1.5 to 1.5
126 |         w = np.tanh(w) * self.MAX_THETA  # normalize
127 | 
128 |         self.sim.step(np.array([u, w]))
129 | 
130 |         return (self._get_observation()), (self._get_reward()), self._is_done(), {}
131 | 
132 |     def _print_info(self, reward):
133 |         frequency = 50
134 |         if self._is_done() or self.sim.ticks % np.round(1 / self.dt / frequency) == 0:
135 |             u, w = self.sim.speed
136 |             x, y = self.sim.position
137 | 
138 |             print(f"T {self.sim.time}: Pos ({x:.4f}, {y:.4f}), action ({u:.4f}, {w:.4f}), reward {reward}")
139 | 
140 |     def reset(self):
141 |         self.sim.reset_mode(self.testItr)
142 |         self.testItr += 1
143 |         if self.testItr > 200 and self.testItr % 10 == 0:
144 |             self.goal_pos = np.array([np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9, np.random.uniform(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE) * .9])
145 |         return self._get_observation()
146 | 
147 |     def render(self, mode='human'):
148 |         if self.viewer is None:
149 |             self.viewer = Viewer(self)
150 |             
151 |         return self.viewer.render(mode)
152 | 
153 |     def _abs_to_rel(self, vec):
154 |         theta = self.sim.theta
155 |         return np.array([
156 |             [np.cos(-theta), -np.sin(-theta)],
157 |             [np.sin(-theta), np.cos(-theta)]
158 |         ]).dot(vec)
159 | 
160 |     def get_cp_vector(self):
161 |         rg = self.goal_pos - self.sim.position
162 |         sg = self.goal_pos - self.sim.start_state[[2,3]]
163 |         sg_norm = sg / np.linalg.norm(sg)
164 |         pg = rg.dot(sg_norm) * sg_norm
165 |         rp = rg - pg
166 |         return self._abs_to_rel(rp)
167 | 
168 |     def get_rough_direction(self):
169 |         return self._abs_to_rel(self.goal_pos - self.get_virtual_position())
170 | 
171 |     def close(self):
172 |         if self.viewer:
173 |             self.viewer.viewer.close()
174 |             self.viewer = None
175 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env_controller.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | from gym import spaces
 4 | from simple_pid import PID
 5 | 
 6 | from .robot_env import RobotEnv
 7 | from .robot_env_path import RobotEnvPath
 8 | 
 9 | 
10 | class RobotEnvController(RobotEnvPath):
11 |     metadata = {
12 |         'render.modes': ['human', 'rgb_array'],
13 |         'video.frames_per_second': 30
14 |     }
15 | 
16 |     def __init__(self):
17 |         super().__init__()
18 |         self.pid = PID(1., 1., 1.,
19 |                        sample_time=None)
20 |         self.epsilon = 0.1
21 |         self.get_virtual_position = self.get_holonomic_point
22 | 
23 |         n_params = 3
24 |         self.action_space = spaces.Box(
25 |             low=np.array([-np.inf] * n_params, dtype=np.float32),
26 |             high=np.array([np.inf] * n_params, dtype=np.float32),
27 |             dtype=np.float32)
28 | 
29 |     def step(self, action: np.ndarray):
30 |         """
31 |         :param action: [kp, ki, kd]
32 |         :return: observation
33 |         """
34 |         # Set PID parameters
35 |         self.pid.tunings = [0., 0., 0.]
36 | 
37 |         vector_forward = self.get_rough_direction()
38 |         vector_perpendicular = self.get_cp_vector()
39 |         sign = np.sign(np.cross(vector_forward, vector_perpendicular))
40 |         vector_perpendicular_oriented = vector_perpendicular * sign
41 | 
42 |         # Use PID
43 |         error_signed = np.linalg.norm(vector_perpendicular) * sign
44 | 
45 |         direction_forward = vector_forward / np.linalg.norm(vector_forward)
46 |         direction_perpendicular = vector_perpendicular_oriented / np.abs(error_signed) if np.abs(error_signed) > 1e-7 else direction_forward
47 | 
48 |         desired_vector = -self.pid(error_signed) * direction_perpendicular + direction_forward
49 | 
50 |         direction_desired = desired_vector / np.linalg.norm(desired_vector)
51 | 
52 |         # Feedback linearisation
53 |         theta = 0.
54 |         u, w = np.array([
55 |             [1., 0.],
56 |             [0., 1. / self.epsilon]]
57 |         ).dot(np.array([
58 |             [np.cos(theta), np.sin(theta)],
59 |             [-np.sin(theta), np.cos(theta)]
60 |         ])).dot(
61 |             direction_desired
62 |         )
63 | 
64 |         # Step (observation, reward, done, info)
65 |         return super().step(np.array([u, w]))
66 | 
67 |     def get_holonomic_point(self):
68 |         """Returns absolute coordinates of the robot's holonomic point"""
69 |         theta = self.sim.theta
70 |         direction = np.array([np.cos(theta), np.sin(theta)])
71 |         return self.sim.position + self.epsilon * direction
72 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/robot_env_path.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from gym import spaces
  4 | 
  5 | from .simulation import Simulation
  6 | from .viewer import Viewer
  7 | 
  8 | 
  9 | class RobotEnvPath(gym.Env):
 10 |     metadata = {
 11 |         'render.modes': ['human', 'rgb_array'],
 12 |         'video.frames_per_second': 30
 13 |     }
 14 | 
 15 |     def __init__(self):
 16 |         """
 17 |         Action spaces
 18 |             1. forward u
 19 |             2. change in rotational w
 20 | 
 21 |         Observation Space:
 22 |             1. forward u
 23 |             2. rotational w
 24 |             3. x
 25 |             4. y
 26 |             5. theta heading
 27 |         """
 28 |         self.dt = 0.1  # Time delta per step
 29 |         self.sim = Simulation(self.dt)
 30 |         self.seed(0)
 31 |         self.testItr = 0
 32 |         self.path_pointer = -1.
 33 | 
 34 |         # Boundaries of the environment
 35 |         self.MIN_SPEED = 0.2
 36 |         self.MAX_SPEED = 1.
 37 |         self.MAX_THETA = 2.
 38 | 
 39 |         # Environment parameters
 40 |         self.path = np.array([[1,0],[1,1],[1,2],[2,2],[3,2],[3,3],[4,4]])
 41 |         self.goal_pos = np.array([3, 0])  # goal position
 42 |         self.dist_threshold = 0.5  # how close to goal = reach goal
 43 | 
 44 |         # Action and observation spaces
 45 |         self.action_space = spaces.Box(
 46 |             low=np.array([self.MIN_SPEED, -self.MAX_THETA], dtype=np.float32),
 47 |             high=np.array([self.MAX_SPEED, self.MAX_THETA], dtype=np.float32),
 48 |             dtype=np.float32)
 49 | 
 50 |         self.observation_space = spaces.Box(
 51 |             low=np.array([-self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE], dtype=np.float32),
 52 |             high=np.array([self.sim.FIELD_SIZE, self.sim.FIELD_SIZE], dtype=np.float32),
 53 |             dtype=np.float32)
 54 | 
 55 |         # Visualisation variables
 56 |         self.viewer = None
 57 | 
 58 |     def _is_goal_reached(self):
 59 |         """
 60 |         Check if goal is reached.
 61 |         """
 62 |         return self._get_goal_dist() < self.dist_threshold
 63 | 
 64 |     def _is_done(self):
 65 |         return self.sim.is_invalid() or self._is_goal_reached()
 66 | 
 67 |     def _get_dist(self, p1: np.ndarray, p2: np.ndarray):
 68 |         return np.linalg.norm(p1 - p2)
 69 | 
 70 |     def _get_goal_dist(self):
 71 |         return self._get_dist(self.sim.position, self.goal_pos)
 72 | 
 73 |     def _get_reward(self):
 74 |         u, w = self.sim.speed
 75 |         x, y = self.sim.position
 76 |         theta = self.sim.theta
 77 | 
 78 |         reward_distance = 0
 79 | 
 80 |         next_x, next_y = (x + u * np.cos(theta) * self.sim.dt, y + u * np.cos(theta) * self.dt)
 81 |         next_pos = np.array([next_x, next_y])
 82 |         if self._get_goal_dist() < 2.:
 83 |             reward_distance += (2.1 - self._get_dist(next_pos, self.goal_pos)) ** 4 * np.cos(
 84 |                 np.clip(self.sim.theta * 2, -np.pi, np.pi))
 85 |         else:
 86 |             reward_distance = self._get_goal_dist() - self._get_dist(next_pos, self.goal_pos)
 87 | 
 88 |         reward_directional = (np.pi - np.abs(self.sim.theta) * 5) * 0.1
 89 |         if reward_directional < 0:
 90 |             reward_directional *= 4
 91 |             if reward_directional < -np.pi * 2:
 92 |                 reward_directional = -np.pi * 2
 93 | 
 94 |         reward = reward_distance + reward_directional - np.abs(w) * 0.1
 95 |         # Check correctness
 96 |         if self._is_goal_reached():
 97 |             reward += 25 / self.sim.time
 98 | 
 99 |         return reward
100 | 
101 |     def _get_observation(self):
102 |         x, y = self.sim.position
103 |         theta = self.sim.theta
104 | 
105 |         goal_relative = np.array([
106 |             [np.cos(-theta), -np.sin(-theta)],
107 |             [np.sin(-theta), np.cos(-theta)]
108 |         ]).dot(self.goal_pos - np.array([x, y]))
109 | 
110 |         return goal_relative
111 | 
112 |     def step(self, action: np.ndarray):
113 |         """
114 |         Args:
115 |             - action (tuple): u and change in v.
116 |         Returns:
117 |             - observation (object):
118 |             - reward (float):
119 |             - done (boolean):
120 |             - info (dict):
121 |         """
122 | 
123 |         u, w = action
124 |         u = (np.tanh(u) + 1) / 2 * (
125 |                 self.MAX_SPEED - self.MIN_SPEED) + self.MIN_SPEED  # normalize the range of action is -1.5 to 1.5
126 |         w = np.tanh(w) * self.MAX_THETA  # normalize
127 | 
128 |         self.sim.step(np.array([u, w]))
129 |         
130 |         if self.path_pointer>=0:
131 |             while self._get_dist(self.sim.position, self.goal_pos) < 1. and not self.path_pointer>=len(self.path)-1:
132 |                 self.path_pointer += .05
133 |                 if self.path_pointer>len(self.path)-1:
134 |                     self.path_pointer = len(self.path)-1
135 |                 progress = self.path_pointer - int(self.path_pointer)
136 |                 new_goal_pos = self.path[int(self.path_pointer)] * (1 - progress) + progress * self.path[int(np.ceil(self.path_pointer))]
137 |                 self.goal_pos = new_goal_pos
138 | 
139 |         return (self._get_observation()), (self._get_reward()), self._is_done(), {}
140 | 
141 |     def get_closest_directions(self):
142 |         min_dist = -1
143 |         prev = [0,0]
144 |         min_direction_curr = [0,0]
145 |         min_direction_next = [0,0]
146 |         for i in range(np.ceil(self.path_pointer+0.05)):
147 |             if i==0:
148 |                 min_dist = self._get_dist(self.sim.position, self.path[i])
149 |                 min_direction_curr = self.path[i]
150 |                 min_direction_next = self.path[i]
151 |                 prev = min_direction_curr
152 |             if i>=len(self.path)-1:
153 |                 if self._get_dist(self.sim.position, self.path[i])<min_dist:
154 |                     min_direction_curr = self.path[i]
155 |                     min_direction_next = self.path[i]
156 |             else:
157 |                 for j in range(20):
158 |                     tmp_position = self.path[i] * (1 - j/20) + (j/20) * self.path[i+1]
159 |                     if prev == min_direction_curr:
160 |                         min_direction_next = tmp_position
161 |                     if self._get_dist(self.sim.position, tmp_position)<min_dist:
162 |                         min_dist = self._get_dist(self.sim.position, tmp_position)
163 |                         min_direction_curr = tmp_position
164 |                     prev = tmp_position
165 |         return min_direction_curr - self.sim.position , min_direction_next - min_direction_curr
166 | 
167 |     def _print_info(self, reward):
168 |         frequency = 50
169 |         if self._is_done() or self.sim.ticks % np.round(1 / self.dt / frequency) == 0:
170 |             u, w = self.sim.speed
171 |             x, y = self.sim.position
172 | 
173 |             print(f"T {self.sim.time}: Pos ({x:.4f}, {y:.4f}), action ({u:.4f}, {w:.4f}), reward {reward}")
174 | 
175 |     def reset(self):
176 |         self.sim.reset()
177 |         self.path = [np.random.uniform(low=-self.FIELD_SIZE *.8, high=self.FIELD_SIZE*.8, size=2)]
178 |         while len(self.path) < 7:
179 |             curr = np.random.uniform(low=-self.FIELD_SIZE*.9, high=self.FIELD_SIZE*.9, size=2)
180 |             while abs(curr[0] - self.path[-1][0]) < 1. or abs(curr[1] - self.path[-1][1]) < 0.5:
181 |                 curr = np.random.uniform(low=-self.FIELD_SIZE*.9, high=self.FIELD_SIZE*.9, size=2)
182 |             self.path.append(curr)
183 |         self.path_pointer = 0.
184 |         self.goal_pos = self.path[int(self.path_pointer)]
185 |         return self._get_observation()
186 | 
187 |     def render(self, mode='human'):
188 |         if self.viewer is None:
189 |             self.viewer = Viewer(self)
190 |         self.viewer.render(mode)
191 | 
192 |     def close(self):
193 |         if self.viewer:
194 |             self.viewer.viewer.close()
195 |             self.viewer = None
196 | 
197 | 


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/simulation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Simulation:
  5 | 
  6 |     def __init__(self, dt):
  7 |         self.FIELD_SIZE = 5.
  8 |         self._state = np.array([0., 0., 0., 0., 0.])
  9 |         self.ticks = 0
 10 |         self.dt = dt
 11 | 
 12 |     @property
 13 |     def time(self):
 14 |         return round(self.ticks * self.dt, 4)
 15 | 
 16 |     def is_invalid(self):
 17 |         """
 18 |         Check if out of bounds.
 19 |         """
 20 |         _, _, x, y, _ = self._state
 21 |         return x < -self.FIELD_SIZE or x > self.FIELD_SIZE or y < -self.FIELD_SIZE or y > self.FIELD_SIZE
 22 | 
 23 |     @property
 24 |     def speed(self):
 25 |         return self._state[[0, 1]]
 26 | 
 27 |     @property
 28 |     def position(self):
 29 |         return self._state[[2, 3]]
 30 | 
 31 |     @property
 32 |     def theta(self):
 33 |         return self._state[4]
 34 | 
 35 |     def step(self, action):
 36 |         next_u, next_w = action
 37 | 
 38 |         u, w, x, y, theta = self._state
 39 | 
 40 |         # Update state
 41 |         new_theta = theta + w * self.dt
 42 |         while new_theta > np.pi:
 43 |             new_theta -= np.pi * 2
 44 |         while new_theta < -np.pi:
 45 |             new_theta += np.pi * 2
 46 |         new_x = x + u * np.cos(theta) * self.dt
 47 |         new_y = y + u * np.sin(theta) * self.dt
 48 | 
 49 |         self.ticks += 1
 50 | 
 51 |         self._state = np.array([next_u, next_w, new_x, new_y, new_theta])
 52 | 
 53 |     def reset(self):
 54 |         rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
 55 |         rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
 56 |         rand_theta = np.random.uniform(-np.pi, np.pi)
 57 | 
 58 |         self._state = np.array([
 59 |             0,
 60 |             0,
 61 |             rand_x,
 62 |             rand_y,
 63 |             rand_theta
 64 |         ])
 65 |         self.start_state = self._state.copy()
 66 | 
 67 | 
 68 |     def reset_mode(self, itr, mode = 'gradual'):
 69 |         rand_x = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
 70 |         rand_y = np.random.uniform(-self.FIELD_SIZE, self.FIELD_SIZE) / 5.
 71 |         rand_theta = np.random.uniform(-np.pi, np.pi)
 72 | 
 73 |         if itr < 30:
 74 |             self._state = np.array([
 75 |                 0,
 76 |                 0,
 77 |                 0,
 78 |                 0,
 79 |                 0
 80 |             ])
 81 |         elif itr < 100:
 82 |             self._state = np.array([
 83 |                 0,
 84 |                 0,
 85 |                 rand_x,
 86 |                 rand_y,
 87 |                 0
 88 |             ])
 89 |         elif itr < 150:
 90 |             self._state = np.array([
 91 |                 0,
 92 |                 0,
 93 |                 rand_x * 2,
 94 |                 rand_y * 2,
 95 |                 0
 96 |             ])
 97 |         else:
 98 |             self._state = np.array([
 99 |                 0,
100 |                 0,
101 |                 rand_x * 2,
102 |                 rand_y * 2,
103 |                 rand_theta
104 |             ])


--------------------------------------------------------------------------------
/robot-env/robot_env/envs/viewer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from os import path
 3 | from gym.envs.classic_control import rendering
 4 | 
 5 | class Viewer:
 6 |     def __init__(self, env):
 7 |         self.env = env
 8 |         self.sim = env.sim
 9 | 
10 |         self.pathTrace = 30
11 |         self.pathTraceSpace = 2
12 |         self.pathTraceSpaceCounter = 0
13 |         self.path = np.zeros([self.pathTrace, 2])
14 |         self.pathPtr = 0
15 | 
16 |         # Set the display window size and range
17 |         self.viewer = rendering.Viewer(500, 500)
18 |         self.viewer.set_bounds(-self.sim.FIELD_SIZE, self.sim.FIELD_SIZE, -self.sim.FIELD_SIZE,
19 |                                self.sim.FIELD_SIZE)  # Scale (X,X,Y,Y)
20 | 
21 |         # Create the robot
22 |         fname = path.join(path.dirname(__file__), "assets/robot.png")
23 |         self.robotobj = rendering.Image(fname, .25, .25)
24 |         self.robot_t = rendering.Transform()
25 |         self.robotobj.add_attr(self.robot_t)
26 |         
27 |         # Create the target goal
28 |         fname = path.join(path.dirname(__file__), "assets/target.png")
29 |         self.targetobj = rendering.Image(fname, .3, .3)
30 |         self.targetobj.set_color(255, 0, 0)
31 |         self.target_t = rendering.Transform()
32 |         self.targetobj.add_attr(self.target_t)
33 | 
34 |         # Create the goal location
35 |         #self.goalobj = rendering.make_circle(.1)
36 |         #self.goalobj.set_color(255, 0, 0)
37 |         #self.goal_t = rendering.Transform()
38 |         #self.goalobj.add_attr(self.goal_t)
39 |         #self.viewer.add_geom(self.goalobj)
40 |         #self.goal_t.set_translation(*self.env.goal_pos)
41 | 
42 |         # Create trace path
43 |         self.traceobj = []
44 |         self.traceobj_t = []
45 |         for i in range(self.pathTrace):
46 |             self.traceobj.append(rendering.make_circle(.02 + .03 * i / self.pathTrace))
47 |             # print(.5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace, i / self.pathTrace)
48 |             self.traceobj[i].set_color(.5 - .5 * i / self.pathTrace, 1. - .5 * i / self.pathTrace,
49 |                                        i / self.pathTrace)  # Setting the color gradiant for path
50 |             self.traceobj_t.append(rendering.Transform())
51 |             self.traceobj[i].add_attr(self.traceobj_t[i])
52 |             self.traceobj_t[i].set_translation(-2 + i * 0.05, 0)
53 |             self.viewer.add_geom(self.traceobj[i])
54 |             
55 |         self.goalPathobj = []
56 |         self.goalPathobj_t = []
57 |         for i in range (1,len(self.env.path)):
58 |             self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i]))
59 |             self.goalPathobj[i-1].set_color(1.,0,0)
60 |             self.viewer.add_geom(self.goalPathobj[i-1])
61 | 
62 |     def render(self, mode='human'):
63 |         # Draw the robot
64 |         self.viewer.add_onetime(self.robotobj)
65 |         self.robot_t.set_translation(self.sim.position[0], self.sim.position[1])
66 |         self.robot_t.set_rotation(self.sim.theta - np.pi / 2)
67 |         
68 |         self.viewer.add_onetime(self.targetobj)
69 |         self.target_t.set_translation(*self.env.goal_pos)
70 | 
71 |         # Update trace
72 |         self.pathTraceSpaceCounter = (self.pathTraceSpaceCounter + 1) % self.pathTraceSpace
73 |         if self.pathTraceSpaceCounter == 0:
74 |             self.path[self.pathPtr][0], self.path[self.pathPtr][1] = (self.sim.position[0], self.sim.position[1])
75 |             self.pathPtr = (self.pathPtr + 1) % self.pathTrace
76 |             for i in range(self.pathTrace):
77 |                 counter = (i + self.pathPtr) % self.pathTrace
78 |                 self.traceobj_t[i].set_translation(self.path[counter][0], self.path[counter][1])
79 | 
80 |         self.viewer.geoms = self.viewer.geoms[:self.pathTrace]
81 |         self.goalPathobj = []
82 |         self.goalPathobj_t = []
83 |         for i in range (1,len(self.env.path)):
84 |             self.goalPathobj.append(rendering.Line(self.env.path[i-1],self.env.path[i]))
85 |             self.goalPathobj[i-1].set_color(1.,0,0)
86 |             self.viewer.add_geom(self.goalPathobj[i-1])
87 |         #self.goal_t.set_translation(*self.env.goal_pos)
88 |         output = self.viewer.render(return_rgb_array=mode == 'rgb_array')
89 | 
90 |         return output
91 | 


--------------------------------------------------------------------------------
/robot-env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='robot-env',
4 |       version='0.0.1',
5 |       install_requires=['gym']  # And any other dependencies foo needs
6 | )


--------------------------------------------------------------------------------
/spinning.py:
--------------------------------------------------------------------------------
 1 | # from spinup.utils.run_utils import ExperimentGrid
 2 | # from spinup import ppo_pytorch
 3 | # import torch
 4 | 
 5 | # if __name__ == '__main__':
 6 | #     import argparse
 7 | #     parser = argparse.ArgumentParser()
 8 | #     parser.add_argument('--cpu', type=int, default=4)
 9 | #     parser.add_argument('--num_runs', type=int, default=3)
10 | #     args = parser.parse_args()
11 | 
12 | #     eg = ExperimentGrid(name='ppo-pyt-bench')
13 | #     eg.add('env_name', 'CartPole-v0', '', True)
14 | #     eg.add('seed', [10*i for i in range(args.num_runs)])
15 | #     eg.add('epochs', 10)
16 | #     eg.add('steps_per_epoch', 4000)
17 | #     eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid')
18 | #     eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
19 | #     eg.run(ppo_pytorch, num_cpu=args.cpu)
20 | 
21 | 
22 | #========
23 | 
24 | from spinup.utils.run_utils import ExperimentGrid
25 | from spinup import ppo_pytorch, ddpg_pytorch, sac_pytorch
26 | import torch
27 | import gym
28 | 
29 | if __name__ == '__main__':
30 |     ############
31 |     #   Setup
32 |     ############
33 |     ENV_NAME = 'robot_env:robot-env-v0'
34 |     #ENV_NAME = 'robot_env:robot-env-controller-v0'
35 |     env = gym.make(ENV_NAME)
36 |     feat = 32
37 | 
38 |     import argparse
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--cpu', type=int, default=4)
41 |     parser.add_argument('--num_runs', type=int, default=3)
42 |     parser.add_argument('--algo', required=True)
43 |     parser.add_argument('--name', default=None)
44 |     parser.add_argument('--epochs', type=int, default=50)
45 |     args = parser.parse_args()
46 | 
47 |     if args.name is None:
48 |         args.name = args.algo
49 | 
50 |     eg = ExperimentGrid(name=args.name)
51 |     eg.add('env_name', ENV_NAME, '', True)
52 |     eg.add('seed', [10*i for i in range(args.num_runs)])
53 |     eg.add('epochs', args.epochs)
54 |     eg.add('steps_per_epoch', 4000)
55 |     # eg.add('ac_kwargs:hidden_sizes', [(feat,), (feat*2,feat*2)], 'hid')
56 |     # eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
57 |     eg.add('ac_kwargs:hidden_sizes', [(feat*2,feat*2)], 'hid')
58 |     eg.add('ac_kwargs:activation', [torch.nn.ReLU], '')
59 | 
60 |     if args.algo == 'ppo':
61 |         eg.run(ppo_pytorch, num_cpu=args.cpu)
62 | 
63 |     elif args.algo == 'ddpg':
64 |         eg.run(ddpg_pytorch, num_cpu=args.cpu)
65 | 
66 |     elif args.algo == 'sac':
67 |         eg.run(sac_pytorch, num_cpu=args.cpu)
68 | 
69 |     else:
70 |         raise ValueError("invalid algo.")
71 | 


--------------------------------------------------------------------------------