├── README.md
├── gym_auv
    ├── .DS_Store
    ├── envs
    │   ├── .DS_Store
    │   ├── __init__.py
    │   ├── pathfollowing.py
    │   ├── pathcolav.py
    │   └── colav.py
    ├── objects
    │   ├── .DS_Store
    │   ├── obstacles.py
    │   ├── path.py
    │   └── auv.py
    ├── utils
    │   ├── geomutils.py
    │   └── constants.py
    └── __init__.py
├── setup.py
└── .gitignore


/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gym_auv/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/camillasterud/gym-auv/HEAD/gym_auv/.DS_Store


--------------------------------------------------------------------------------
/gym_auv/envs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/camillasterud/gym-auv/HEAD/gym_auv/envs/.DS_Store


--------------------------------------------------------------------------------
/gym_auv/objects/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/camillasterud/gym-auv/HEAD/gym_auv/objects/.DS_Store


--------------------------------------------------------------------------------
/gym_auv/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym_auv.envs.colav import ColavEnv
2 | from gym_auv.envs.pathfollowing import PathFollowingEnv
3 | from gym_auv.envs.pathcolav import PathColavEnv
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='gym_auv',
4 |       version='0.0.1',
5 |       install_requires=['gym']  # And any other dependencies foo needs
6 |      )
7 | 


--------------------------------------------------------------------------------
/gym_auv/utils/geomutils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def princip(angle):
 5 |     return ((angle + np.pi) % (2*np.pi)) - np.pi
 6 | 
 7 | def Rzyx(phi, theta, psi):
 8 |     cphi = np.cos(phi)
 9 |     sphi = np.sin(phi)
10 |     cth = np.cos(theta)
11 |     sth = np.sin(theta)
12 |     cpsi = np.cos(psi)
13 |     spsi = np.sin(psi)
14 | 
15 |     return np.vstack([
16 |         np.hstack([cpsi*cth, -spsi*cphi+cpsi*sth*sphi, spsi*sphi+cpsi*cphi*sth]),
17 |         np.hstack([spsi*cth, cpsi*cphi+sphi*sth*spsi, -cpsi*sphi+sth*spsi*cphi]),
18 |         np.hstack([-sth, cth*sphi, cth*cphi])
19 |     ])
20 | 


--------------------------------------------------------------------------------
/gym_auv/objects/obstacles.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class StaticObstacle():
 4 |     def __init__(self, position, radius, color=(0.6, 0, 0)):
 5 |         self.color = color
 6 |         if not isinstance(position, np.ndarray):
 7 |             position = np.array(position)
 8 |         if radius < 0:
 9 |             raise ValueError
10 |         self.radius = radius
11 |         self.position = position.flatten()
12 | 
13 |     def step(self):
14 |         pass
15 | 
16 |     def draw(self, viewer, color=None):
17 |         viewer.draw_circle(self.position, self.radius,
18 |                            color=self.color if color is None else color)
19 | 


--------------------------------------------------------------------------------
/gym_auv/__init__.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.registration import register
 2 | 
 3 | CONFIG_COLAV = {
 4 |     "reward_ds": 1,
 5 |     "reward_closeness": -0.5,
 6 |     "reward_speed_error": -0.08,
 7 |     "reward_collision": -0.5,
 8 |     "nobstacles": 20,
 9 |     "obst_detection_range": 40,
10 |     "obst_reward_range": 15,
11 |     "t_step": 0.1,
12 |     "cruise_speed": 1.5,
13 |     "goal_dist": 400,
14 |     "reward_rudderchange": 0,
15 |     "min_reward": -500
16 | }
17 | 
18 | CONFIG_PATHFOLLOWING = {
19 |     "reward_ds": 1,
20 |     "reward_speed_error": -0.08,
21 |     "reward_cross_track_error": -0.5,
22 |     "t_step": 0.1,
23 |     "cruise_speed": 1.5,
24 |     "la_dist": 10,
25 |     "goal_dist": 400,
26 |     "reward_rudderchange": 0,
27 |     "min_reward": -500
28 | }
29 | 
30 | CONFIG_PATHCOLAV = {
31 |     "reward_ds": 1,
32 |     "reward_speed_error": -0.08,
33 |     "reward_cross_track_error": -0.5,
34 |     "reward_closeness": -0.5,
35 |     "reward_collision": -0.5,
36 |     "nobstacles": 20,
37 |     "obst_detection_range": 40,
38 |     "obst_reward_range": 15,
39 |     "t_step": 0.1,
40 |     "cruise_speed": 1.5,
41 |     "la_dist": 10,
42 |     "goal_dist": 400,
43 |     "reward_rudderchange": 0,
44 |     "min_reward": -500
45 | }
46 | 
47 | register(
48 |     id='COLAV-v0',
49 |     entry_point='gym_auv.envs:ColavEnv',
50 |     kwargs={'env_config': CONFIG_COLAV}
51 | )
52 | 
53 | register(
54 |     id='PathFollowing-v0',
55 |     entry_point='gym_auv.envs:PathFollowingEnv',
56 |     kwargs={'env_config': CONFIG_PATHFOLLOWING}
57 | )
58 | 
59 | register(
60 |     id='PathColav-v0',
61 |     entry_point='gym_auv.envs:PathColavEnv',
62 |     kwargs={'env_config': CONFIG_PATHCOLAV}
63 | )
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .DS_Store


--------------------------------------------------------------------------------
/gym_auv/utils/constants.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | m = 18
 4 | I_zz = 0
 5 | X_udot = -1
 6 | Y_vdot = -16
 7 | N_rdot = -2.1
 8 | X_u = -2.4
 9 | Y_v = -23
10 | Y_r = 11.5
11 | N_v = -3.1
12 | N_r = -9.7
13 | X_uu = -2.4
14 | Y_vv = -80
15 | Y_rr = 0.3
16 | N_vv = -1.5
17 | N_rr = -9.1
18 | Y_uvb = -0.5*1000*np.pi*1.24*(0.15/2)**2
19 | Y_uvf = -1000*3*0.0064
20 | Y_urf = -0.4*Y_uvf
21 | N_uvb = (-0.65*1.08 + 0.4)*Y_uvb
22 | N_uvf = -0.4*Y_uvf
23 | N_urf = -0.4*Y_urf
24 | Y_uudr = 19.2
25 | N_uudr = -0.4*Y_uudr
26 | 
27 | 
28 | THRUST_MIN_AUV = 0
29 | THRUST_MAX_AUV = 14.0417
30 | RUDDER_MAX_AUV = 25*2*np.pi/360
31 | MAX_SPEED = 2
32 | 
33 | M_RB = np.diag([m, m, I_zz])
34 | 
35 | M_A = -np.diag([X_udot, Y_vdot,N_rdot])
36 | 
37 | M_inv = np.linalg.inv(M_RB + M_A)
38 | 
39 | C_RB = np.array([[0, 0, -m],
40 |                  [0, 0, m],
41 |                  [m, -m, 0]])
42 | C_A = np.array([[0, 0, Y_vdot],
43 |                 [0, 0, -X_udot],
44 |                 [-Y_vdot, X_udot, 0]])
45 | D_lin = -np.array([[X_u, 0, 0],
46 |                    [0, Y_v, Y_r],
47 |                    [0, N_v, N_r]])
48 | 
49 | D_quad = -np.diag([X_uu, Y_vv, N_rr])
50 | 
51 | L_mat = np.array([[0, 0, 0],
52 |                   [0, 30, -7.7],
53 |                   [0, -9.9, 3.1]])
54 | 
55 | def D(nu):
56 |     D = (D_lin
57 |          + D_quad @ np.diag(np.absolute(nu)))
58 |     D[2, 1] += D_quad[2, 1]*abs(nu[1])
59 |     D[1, 2] += D_quad[1, 2]*abs(nu[2])
60 |     return D
61 | 
62 | def B(nu):
63 |     return np.array([
64 |         [1, 0],
65 |         [0, Y_uudr*(nu[0]**2)],
66 |         [0, N_uudr*(nu[0]**2)],
67 |     ])
68 | 
69 | def C(nu):
70 |     C = C_RB + C_A
71 |     C[0,2] = C[0,2]*nu[1]
72 |     C[1,2] = C[1,2]*nu[0]
73 |     C[2,0] = C[2,0]*nu[1]
74 |     C[2,1] = C[2,1]*nu[0]
75 |     return C
76 | 
77 | def L(nu):
78 |     return L_mat*nu[0]
79 | 


--------------------------------------------------------------------------------
/gym_auv/objects/path.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import numpy as np
 3 | import numpy.linalg as linalg
 4 | 
 5 | from scipy import interpolate
 6 | from scipy.optimize import fminbound
 7 | 
 8 | import gym_auv.utils.geomutils as geom
 9 | 
10 | class ParamCurve():
11 |     def __init__(self, waypoints):
12 | 
13 |         for _ in range(3):
14 |             arclengths = arc_len(waypoints)
15 |             path_coords = interpolate.pchip(
16 |                 x=arclengths, y=waypoints, axis=1)
17 |             waypoints = path_coords(
18 |                 np.linspace(arclengths[0], arclengths[-1], 1000))
19 | 
20 |         self.path_coords = path_coords
21 |         self.s_max = arclengths[-1]
22 |         self.length = self.s_max
23 | 
24 |     def __call__(self, arclength):
25 |         return self.path_coords(arclength)
26 | 
27 |     def get_direction(self, arclength):
28 |         arclength = np.clip(arclength, 0.05, self.s_max - 0.05)
29 |         delta_x, delta_y = (self.path_coords(arclength + 0.05)
30 |                             - self.path_coords(arclength - 0.05))
31 |         return geom.princip(np.arctan2(delta_y, delta_x))
32 | 
33 |     def get_endpoint(self):
34 |         return self(self.s_max)
35 | 
36 |     def get_closest_arclength(self, position):
37 |         return fminbound(lambda s: linalg.norm(self(s) - position),
38 |                          x1=0, x2=self.length, xtol=1e-6,
39 |                          maxfun=10000)
40 | 
41 |     def __reversed__(self):
42 |         curve = deepcopy(self)
43 |         path_coords = curve.path_coords
44 |         curve.path_coords = lambda s: path_coords(curve.length-s)
45 |         return curve
46 | 
47 |     def plot(self, ax, s, *opts):
48 |         s = np.array(s)
49 |         z = self(s)
50 |         ax.plot(-z[1, :], z[0, :], *opts)
51 | 
52 | class RandomCurveThroughOrigin(ParamCurve):
53 |     def __init__(self, rng, nwaypoints, length=400):
54 |         angle_init = 2*np.pi*(rng.rand() - 0.5)
55 |         start = np.array([length*np.cos(angle_init),
56 |                           length*np.sin(angle_init)])
57 |         end = -np.array(start)
58 |         waypoints = np.vstack([start, end])
59 |         for waypoint in range(nwaypoints // 2):
60 |             newpoint1 = ((nwaypoints // 2 - waypoint)
61 |                          * start / (nwaypoints // 2 + 1)
62 |                          + length / (nwaypoints // 2 + 1)
63 |                          * (rng.rand()-0.5))
64 |             newpoint2 = ((nwaypoints // 2 - waypoint)
65 |                          * end / (nwaypoints // 2 + 1)
66 |                          + length / (nwaypoints // 2 + 1)
67 |                          * (rng.rand()-0.5))
68 |             waypoints = np.vstack([waypoints[:waypoint+1, :],
69 |                                    newpoint1,
70 |                                    np.array([0, 0]),
71 |                                    newpoint2,
72 |                                    waypoints[-1*waypoint-1:, :]])
73 |         super().__init__(np.transpose(waypoints))
74 | 
75 | 
76 | def arc_len(coords):
77 |     diff = np.diff(coords, axis=1)
78 |     delta_arc = np.sqrt(np.sum(diff ** 2, axis=0))
79 |     return np.concatenate([[0], np.cumsum(delta_arc)])
80 | 


--------------------------------------------------------------------------------
/gym_auv/objects/auv.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements an AUV that is simulated in the horizontal plane.
  3 | """
  4 | import numpy as np
  5 | import numpy.linalg as linalg
  6 | 
  7 | import gym_auv.utils.constants as const
  8 | import gym_auv.utils.geomutils as geom
  9 | 
 10 | class AUV2D():
 11 |     """
 12 |     Creates an environment with a vessel, goal and obstacles.
 13 | 
 14 |     Attributes
 15 |     ----------
 16 |     path_taken : np.array
 17 |         Array of size (?, 2) discribing the path the AUV has taken.
 18 |     radius : float
 19 |         The maximum distance from the center of the AUV to its edge
 20 |         in meters.
 21 |     t_step : float
 22 |         The simulation timestep.
 23 |     input : np.array
 24 |         The current input. [propeller_input, rudder_position].
 25 |     """
 26 |     def __init__(self, t_step, init_pos, width=1):
 27 |         """
 28 |         The __init__ method declares all class atributes.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         t_step : float
 33 |             The simulation timestep to be used to simulate this AUV.
 34 |         init_pos : np.array
 35 |             The initial position of the vessel [x, y, psi], where
 36 |             psi is the initial heading of the AUV.
 37 |         width : float
 38 |             The maximum distance from the center of the AUV to its edge
 39 |             in meters. Defaults to 1.
 40 |         """
 41 |         self._state = np.hstack([init_pos, [0, 0, 0]])
 42 |         self.prev_states = self._state
 43 |         self.radius = width
 44 |         self.t_step = t_step
 45 |         self.input = [0, 0]
 46 | 
 47 |     def step(self, action):
 48 |         """
 49 |         Steps the vessel self.t_step seconds forward.
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         action : np.array
 54 |             [propeller_input, rudder_position], where
 55 |             0 <= propeller_input <= 1 and -1 <= rudder_position <= 1.
 56 |         """
 57 |         self.input = np.array([_surge(action[0]), _steer(action[1])])
 58 |         self._sim()
 59 | 
 60 |         self.prev_states = np.vstack([self.prev_states,
 61 |                                       self._state])
 62 | 
 63 |     def _sim(self):
 64 |         psi = self._state[2]
 65 |         nu = self._state[3:]
 66 | 
 67 |         eta_dot = geom.Rzyx(0, 0, geom.princip(psi)).dot(nu)
 68 |         nu_dot = const.M_inv.dot(const.B(nu).dot(self.input)
 69 |                                  - const.D(nu).dot(nu)
 70 |                                  - const.C(nu).dot(nu)
 71 |                                  - const.L(nu).dot(nu))
 72 |         state_dot = np.concatenate([eta_dot, nu_dot])
 73 |         self._state += state_dot*self.t_step
 74 |         self._state[2] = geom.princip(self._state[2])
 75 | 
 76 |     @property
 77 |     def position(self):
 78 |         """
 79 |         Returns an array holding the position of the AUV in cartesian
 80 |         coordinates.
 81 |         """
 82 |         return self._state[0:2]
 83 | 
 84 |     @property
 85 |     def heading(self):
 86 |         """
 87 |         Returns the heading of the AUV wrt true north.
 88 |         """
 89 |         return self._state[2]
 90 | 
 91 |     @property
 92 |     def velocity(self):
 93 |         """
 94 |         Returns the surge and sway velocity of the AUV.
 95 |         """
 96 |         return self._state[3:5]
 97 | 
 98 |     @property
 99 |     def yawrate(self):
100 |         """
101 |         Returns the rate of rotation about the z-axis.
102 |         """
103 |         return self._state[5]
104 | 
105 |     @property
106 |     def max_speed(self):
107 |         """
108 |         Returns the max speed of the AUV.
109 |         """
110 |         return const.MAX_SPEED
111 | 
112 | 
113 | def _surge(surge):
114 |     surge = np.clip(surge, 0, 1)
115 |     return (surge*(const.THRUST_MAX_AUV - const.THRUST_MIN_AUV)
116 |             + const.THRUST_MIN_AUV)
117 | 
118 | def _steer(steer):
119 |     steer = np.clip(steer, -1, 1)
120 |     return steer*const.RUDDER_MAX_AUV
121 | 


--------------------------------------------------------------------------------
/gym_auv/envs/pathfollowing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements the AUV gym environment through the AUVenv class.
  3 | """
  4 | 
  5 | import gym
  6 | from gym.utils import seeding
  7 | import numpy as np
  8 | import numpy.linalg as linalg
  9 | 
 10 | import gym_auv.utils.geomutils as geom
 11 | from gym_auv.objects.auv import AUV2D
 12 | from gym_auv.objects.path import RandomCurveThroughOrigin
 13 | 
 14 | class PathFollowingEnv(gym.Env):
 15 |     """
 16 |     Creates an environment with a vessel and a path.
 17 |     Attributes
 18 |     ----------
 19 |     config : dict
 20 |         The configuration disctionary specifying rewards,
 21 |         look ahead distance, simulation timestep and desired cruise
 22 |         speed.
 23 |     nstates : int
 24 |         The number of state variables passed to the agent.
 25 |     vessel : gym_auv.objects.auv.AUV2D
 26 |         The AUV that is controlled by the agent.
 27 |     path : gym_auv.objects.path.RandomCurveThroughOrigin
 28 |         The path to be followed.
 29 |     np_random : np.random.RandomState
 30 |         Random number generator.
 31 |     reward : float
 32 |         The accumulated reward
 33 |     path_prog : float
 34 |         Progression along the path in terms of arc length covered.
 35 |     past_actions : np.array
 36 |         All actions that have been perfomed.
 37 |     action_space : gym.spaces.Box
 38 |         The action space. Consists of two floats that must take on
 39 |         values between -1 and 1.
 40 |     observation_space : gym.spaces.Box
 41 |         The observation space. Consists of
 42 |         self.nstates + self.nsectors floats that must be between
 43 |         0 and 1.
 44 |     """
 45 | 
 46 |     metadata = {}
 47 | 
 48 |     def __init__(self, env_config):
 49 |         """
 50 |         The __init__ method declares all class atributes and calls
 51 |         the self.reset() to intialize them properly.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         env_config : dict
 56 |             Configuration parameters for the environment.
 57 |             Must have the following members:
 58 |             reward_ds
 59 |                 The reward for progressing ds along the path in
 60 |                 one timestep. reward += reward_ds*ds.
 61 |             reward_speed_error
 62 |                 reward += reward_speed_error*speed_error where the
 63 |                 speed error is abs(speed-cruise_speed)/max_speed.
 64 |             reward_cross_track_error
 65 |                 reward += reward_cross_track_error*cross_track_error
 66 |             la_dist
 67 |                 The look ahead distance.
 68 |             t_step
 69 |                 The timestep
 70 |             cruise_speed
 71 |                 The desired cruising speed.
 72 |         """
 73 |         self.config = env_config
 74 |         self.nstates = 6
 75 |         nobservations = self.nstates
 76 |         self.vessel = None
 77 |         self.path = None
 78 | 
 79 |         self.np_random = None
 80 | 
 81 |         self.reward = 0
 82 |         self.path_prog = None
 83 |         self.past_actions = None
 84 |         self.past_obs = None
 85 |         self.t_step = None
 86 | 
 87 |         self.reset()
 88 | 
 89 |         self.action_space = gym.spaces.Box(low=np.array([0, -1]),
 90 |                                            high=np.array([1, 1]),
 91 |                                            dtype=np.float32)
 92 |         self.observation_space = gym.spaces.Box(
 93 |             low=np.array([-1]*nobservations),
 94 |             high=np.array([1]*nobservations),
 95 |             dtype=np.float32)
 96 | 
 97 |     def step(self, action):
 98 |         """
 99 |         Simulates the environment for one timestep when action
100 |         is performed
101 | 
102 |         Parameters
103 |         ----------
104 |         action : np.array
105 |             [propeller_input, rudder_position].
106 |         Returns
107 |         -------
108 |         obs : np.array
109 |             Observation of the environment after action is performed.
110 |         step_reward : double
111 |             The reward for performing action at his timestep.
112 |         done : bool
113 |             If True the episode is ended.
114 |         info : dict
115 |             Empty, is included because it is required of the
116 |             OpenAI Gym frameowrk.
117 |         """
118 |         action = np.clip(action, np.array([0, -1]), np.array([1, 1]))
119 |         self.past_actions = np.vstack([self.past_actions, action])
120 |         self.vessel.step(action)
121 | 
122 |         prog = self.path.get_closest_arclength(self.vessel.position)
123 |         self.path_prog = np.append(self.path_prog, prog)
124 | 
125 |         obs = self.observe()
126 |         self.past_obs = np.vstack([self.past_obs, obs])
127 |         done, step_reward = self.step_reward()
128 |         info = {}
129 | 
130 |         return obs, step_reward, done, info
131 | 
132 |     def step_reward(self):
133 |         """
134 |         Calculates the step_reward and decides whether the episode
135 |         should be ended.
136 | 
137 |         Parameters
138 |         ----------
139 |         obs : np.array
140 |             The observation of the environment.
141 |         Returns
142 |         -------
143 |         done : bool
144 |             If True the episode is ended.
145 |         step_reward : double
146 |             The reward for performing action at his timestep.
147 |         """
148 |         done = False
149 |         step_reward = 0
150 |         delta_path_prog = self.path_prog[-1] - self.path_prog[-2]
151 |         max_prog = self.config["cruise_speed"]*self.t_step
152 |         speed_error = ((linalg.norm(self.vessel.velocity)
153 |                         - self.config["cruise_speed"])
154 |                        /self.vessel.max_speed)
155 |         step_reward += (np.clip(delta_path_prog/max_prog, -1, 1)
156 |                         *self.config["reward_ds"])
157 |         step_reward += (abs(self.past_obs[-1, -1])
158 |                         *self.config["reward_cross_track_error"])
159 |         step_reward += (max(speed_error, 0)
160 |                         *self.config["reward_speed_error"])
161 | 
162 |         dist_to_endpoint = linalg.norm(self.vessel.position
163 |                                        - self.path.get_endpoint())
164 | 
165 |         self.reward += step_reward
166 | 
167 |         if (self.reward < self.config["min_reward"]
168 |                 or abs(self.path_prog[-1] - self.path.length) < 2
169 |                 or dist_to_endpoint < 5):
170 |             done = True
171 | 
172 |         return done, step_reward
173 | 
174 |     def generate(self):
175 |         """
176 |         Sets up the environment. Generates the path and
177 |         initialises the AUV.
178 |         """
179 |         nwaypoints = int(np.floor(9*self.np_random.rand() + 2))
180 |         self.path = RandomCurveThroughOrigin(self.np_random,
181 |                                              nwaypoints, 400)
182 | 
183 |         init_pos = self.path(0)
184 |         init_angle = self.path.get_direction(0)
185 | 
186 |         init_pos[0] += 50*(self.np_random.rand()-0.5)
187 |         init_pos[1] += 50*(self.np_random.rand()-0.5)
188 |         init_angle = geom.princip(init_angle
189 |                                   + 2*np.pi*(self.np_random.rand()-0.5))
190 |         self.t_step = self.config["t_step"]
191 |         self.vessel = AUV2D(self.t_step,
192 |                             np.hstack([init_pos, init_angle]))
193 |         self.path_prog = np.array([
194 |             self.path.get_closest_arclength(self.vessel.position)])
195 | 
196 |     def reset(self):
197 |         """
198 |         Resets the environment by reseeding and calling self.generate.
199 | 
200 |         Returns
201 |         -------
202 |         obs : np.array
203 |             The initial observation of the environment.
204 |         """
205 |         self.vessel = None
206 |         self.path = None
207 |         self.reward = 0
208 |         self.path_prog = None
209 |         self.past_actions = np.array([[0, 0]])
210 |         self.t_step = None
211 | 
212 |         if self.np_random is None:
213 |             self.seed()
214 | 
215 |         self.generate()
216 |         obs = self.observe()
217 |         self.past_obs = np.array([obs])
218 |         return obs
219 | 
220 |     def observe(self):
221 |         """
222 |         Generates the observation of the environment.
223 |         Parameters
224 |         ----------
225 |         action : np.array
226 |             [propeller_input, rudder_position].
227 | 
228 |         Returns
229 |         -------
230 |         obs : np.array
231 |             [
232 |             surge velocity,
233 |             sway velocity,
234 |             yawrate,
235 |             heading error,
236 |             cross track error,
237 |             propeller_input,
238 |             rudder_position,
239 |             ]
240 |             All observations are between -1 and 1.
241 |         """
242 |         la_heading = self.path.get_direction(
243 |             self.path_prog[-1] + self.config["la_dist"])
244 |         heading_error_la = float(geom.princip(la_heading
245 |                                               - self.vessel.heading))
246 |         path_position = (self.path(self.path_prog[-1]
247 |                                    + self.config["la_dist"])
248 |                          - self.vessel.position)
249 |         target_heading = np.arctan2(path_position[1], path_position[0])
250 |         heading_error = float(geom.princip(target_heading
251 |                                            - self.vessel.heading))
252 |         path_direction = self.path.get_direction(self.path_prog[-1])
253 |         cross_track_error = geom.Rzyx(0, 0, -path_direction).dot(
254 |             np.hstack([self.path(self.path_prog[-1])
255 |                        - self.vessel.position, 0]))[1]
256 | 
257 |         obs = np.zeros((self.nstates,))
258 | 
259 |         obs[0] = np.clip(self.vessel.velocity[0]
260 |                          /self.vessel.max_speed, -1, 1)
261 |         obs[1] = np.clip(self.vessel.velocity[1]
262 |                          /0.26, -1, 1)
263 |         obs[2] = np.clip(self.vessel.yawrate/0.55, -1, 1)
264 |         obs[3] = np.clip(heading_error_la/np.pi, -1, 1)
265 |         obs[4] = np.clip(heading_error/np.pi, -1, 1)
266 |         obs[5] = np.clip(cross_track_error
267 |                          /25, -1, 1)
268 | 
269 |         return obs
270 | 
271 | 
272 |     def seed(self, seed=5):
273 |         self.np_random, seed = seeding.np_random(seed)
274 |         return [seed]
275 | 
276 |     def render(self, mode='human'):
277 |         pass


--------------------------------------------------------------------------------
/gym_auv/envs/pathcolav.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements the AUV gym environment through the AUVenv class.
  3 | """
  4 | 
  5 | import gym
  6 | from gym.utils import seeding
  7 | import numpy as np
  8 | import numpy.linalg as linalg
  9 | 
 10 | import gym_auv.utils.geomutils as geom
 11 | from gym_auv.objects.auv import AUV2D
 12 | from gym_auv.objects.path import RandomCurveThroughOrigin
 13 | from gym_auv.objects.obstacles import StaticObstacle
 14 | 
 15 | class PathColavEnv(gym.Env):
 16 |     """
 17 |     Creates an environment with a vessel and a path.
 18 |     Attributes
 19 |     ----------
 20 |     config : dict
 21 |         The configuration disctionary specifying rewards,
 22 |         look ahead distance, simulation timestep and desired cruise
 23 |         speed.
 24 |     nstates : int
 25 |         The number of state variables passed to the agent.
 26 |     vessel : gym_auv.objects.auv.AUV2D
 27 |         The AUV that is controlled by the agent.
 28 |     path : gym_auv.objects.path.RandomCurveThroughOrigin
 29 |         The path to be followed.
 30 |     np_random : np.random.RandomState
 31 |         Random number generator.
 32 |     reward : float
 33 |         The accumulated reward
 34 |     path_prog : float
 35 |         Progression along the path in terms of arc length covered.
 36 |     past_actions : np.array
 37 |         All actions that have been perfomed.
 38 |     action_space : gym.spaces.Box
 39 |         The action space. Consists of two floats that must take on
 40 |         values between -1 and 1.
 41 |     observation_space : gym.spaces.Box
 42 |         The observation space. Consists of
 43 |         self.nstates + self.nsectors floats that must be between
 44 |         0 and 1.
 45 |     """
 46 | 
 47 |     metadata = {}
 48 | 
 49 |     def __init__(self, env_config):
 50 |         """
 51 |         The __init__ method declares all class atributes and calls
 52 |         the self.reset() to intialize them properly.
 53 | 
 54 |         Parameters
 55 |         ----------
 56 |         env_config : dict
 57 |             Configuration parameters for the environment.
 58 |             Must have the following members:
 59 |             reward_ds
 60 |                 The reward for progressing ds along the path in
 61 |                 one timestep. reward += reward_ds*ds.
 62 |             reward_speed_error
 63 |                 reward += reward_speed_error*speed_error where the
 64 |                 speed error is abs(speed-cruise_speed)/max_speed.
 65 |             reward_cross_track_error
 66 |                 reward += reward_cross_track_error*cross_track_error
 67 |             la_dist
 68 |                 The look ahead distance.
 69 |             t_step
 70 |                 The timestep
 71 |             cruise_speed
 72 |                 The desired cruising speed.
 73 |         """
 74 |         self.config = env_config
 75 |         self.nstates = 6
 76 |         self.nsectors = 16
 77 |         nobservations = self.nstates + self.nsectors
 78 |         self.vessel = None
 79 |         self.path = None
 80 |         self.obstacles = None
 81 | 
 82 |         self.np_random = None
 83 | 
 84 |         self.reward = 0
 85 |         self.path_prog = None
 86 |         self.past_actions = None
 87 |         self.past_obs = None
 88 |         self.t_step = None
 89 | 
 90 |         self.reset()
 91 | 
 92 |         self.action_space = gym.spaces.Box(low=np.array([0, -1]),
 93 |                                            high=np.array([1, 1]),
 94 |                                            dtype=np.float32)
 95 |         low_obs = [-1]*nobservations
 96 |         high_obs = [1]*nobservations
 97 |         low_obs[self.nstates - 1] = -10000
 98 |         high_obs[self.nstates - 1] = 10000
 99 |         self.observation_space = gym.spaces.Box(
100 |             low=np.array(low_obs),
101 |             high=np.array(high_obs),
102 |             dtype=np.float32)
103 | 
104 |     def step(self, action):
105 |         """
106 |         Simulates the environment for one timestep when action
107 |         is performed
108 | 
109 |         Parameters
110 |         ----------
111 |         action : np.array
112 |             [propeller_input, rudder_position].
113 |         Returns
114 |         -------
115 |         obs : np.array
116 |             Observation of the environment after action is performed.
117 |         step_reward : double
118 |             The reward for performing action at his timestep.
119 |         done : bool
120 |             If True the episode is ended.
121 |         info : dict
122 |             Empty, is included because it is required of the
123 |             OpenAI Gym frameowrk.
124 |         """
125 |         self.past_actions = np.vstack([self.past_actions, action])
126 |         self.vessel.step(action)
127 | 
128 |         prog = self.path.get_closest_arclength(self.vessel.position)
129 |         self.path_prog = np.append(self.path_prog, prog)
130 | 
131 |         obs = self.observe()
132 |         self.past_obs = np.vstack([self.past_obs, obs])
133 |         done, step_reward, info = self.step_reward()
134 | 
135 |         return obs, step_reward, done, info
136 | 
137 |     def step_reward(self):
138 |         """
139 |         Calculates the step_reward and decides whether the episode
140 |         should be ended.
141 | 
142 |         Parameters
143 |         ----------
144 |         obs : np.array
145 |             The observation of the environment.
146 |         delta_path_prog : double
147 |             How much the vessel has progressed along the path
148 |             the last timestep.
149 |         Returns
150 |         -------
151 |         done : bool
152 |             If True the episode is ended.
153 |         step_reward : double
154 |             The reward for performing action at his timestep.
155 |         """
156 |         done = False
157 |         step_reward = 0
158 |         info = {"collision": False}
159 |         progress = self.path_prog[-1] - self.path_prog[-2]
160 |         max_prog = self.config["cruise_speed"]*self.t_step
161 |         speed_error = ((linalg.norm(self.vessel.velocity)
162 |                         - self.config["cruise_speed"])
163 |                        /self.vessel.max_speed)
164 |         step_reward += (np.clip(progress/max_prog, -1, 1)
165 |                         *self.config["reward_ds"])
166 |         step_reward += (abs(self.past_obs[-1, self.nstates - 1])
167 |                         *self.config["reward_cross_track_error"])
168 |         step_reward += (max(speed_error, 0)
169 |                         *self.config["reward_speed_error"])
170 | 
171 |         dist_to_endpoint = linalg.norm(self.vessel.position
172 |                                        - self.path.get_endpoint())
173 | 
174 |         if (self.reward < self.config["min_reward"]
175 |                 or abs(self.path_prog[-1] - self.path.length) < 2
176 |                 or dist_to_endpoint < 5):
177 |             done = True
178 | 
179 |         for sector in range(self.nsectors):
180 |             closeness = self.past_obs[-1, self.nstates + sector]
181 |             dist = self.config["obst_detection_range"]*(1 - closeness)
182 |             reward_range = self.config["obst_reward_range"]
183 |             if closeness >= 1:
184 |                 step_reward = self.config["reward_collision"]
185 |                 info["collision"] = True
186 |                 if self.config["end_on_collision"]:
187 |                     done = True
188 |             elif dist < reward_range:
189 |                 step_reward += ((1 - dist/reward_range)**2
190 |                                 *self.config["reward_closeness"])
191 | 
192 |         self.reward += step_reward
193 | 
194 |         return done, step_reward, info
195 | 
196 |     def generate(self):
197 |         """
198 |         Sets up the environment. Generates the path and
199 |         initialises the AUV.
200 |         """
201 |         nwaypoints = int(np.floor(9*self.np_random.rand() + 2))
202 |         self.path = RandomCurveThroughOrigin(self.np_random,
203 |                                              nwaypoints, 400)
204 | 
205 |         init_pos = self.path(0)
206 |         init_angle = self.path.get_direction(0)
207 | 
208 |         init_pos[0] += 50*(self.np_random.rand()-0.5)
209 |         init_pos[1] += 50*(self.np_random.rand()-0.5)
210 |         init_angle = geom.princip(init_angle
211 |                                   + 2*np.pi*(self.np_random.rand()-0.5))
212 |         self.t_step = self.config["t_step"]
213 |         self.vessel = AUV2D(self.t_step,
214 |                             np.hstack([init_pos, init_angle]))
215 |         self.path_prog = np.array([
216 |             self.path.get_closest_arclength(self.vessel.position)])
217 | 
218 |         max_obst_dist = max(linalg.norm(self.path(0)),
219 |                             linalg.norm(self.path.get_endpoint()))
220 | 
221 |         for _ in range(self.config["nobstacles"]):
222 |             obst_dist = 0.85*max_obst_dist*self.np_random.rand()
223 |             obst_ang = 2*np.pi*(self.np_random.rand()-0.5)
224 |             position = (obst_dist*np.array(
225 |                 [np.cos(obst_ang), np.sin(obst_ang)]))
226 |             radius = 15*(self.np_random.rand()+1)
227 |             self.obstacles.append(StaticObstacle(position, radius))
228 | 
229 |     def reset(self):
230 |         """
231 |         Resets the environment by reseeding and calling self.generate.
232 | 
233 |         Returns
234 |         -------
235 |         obs : np.array
236 |             The initial observation of the environment.
237 |         """
238 |         self.vessel = None
239 |         self.path = None
240 |         self.obstacles = []
241 |         self.reward = 0
242 |         self.path_prog = None
243 |         self.past_actions = np.array([[0, 0]])
244 |         self.t_step = None
245 | 
246 |         if self.np_random is None:
247 |             self.seed()
248 | 
249 |         self.generate()
250 |         obs = self.observe()
251 |         self.past_obs = np.array([obs])
252 |         return obs
253 | 
254 |     def observe(self):
255 |         """
256 |         Generates the observation of the environment.
257 |         Parameters
258 |         ----------
259 |         action : np.array
260 |             [propeller_input, rudder_position].
261 | 
262 |         Returns
263 |         -------
264 |         obs : np.array
265 |             [
266 |             surge velocity,
267 |             sway velocity,
268 |             yawrate,
269 |             heading error,
270 |             cross track error,
271 |             propeller_input,
272 |             rudder_position,
273 |             ]
274 |             All observations are between -1 and 1.
275 |         """
276 |         la_heading = self.path.get_direction(
277 |             self.path_prog[-1] + self.config["la_dist"])
278 |         heading_error_la = float(geom.princip(la_heading
279 |                                               - self.vessel.heading))
280 |         path_position = (self.path(self.path_prog[-1]
281 |                                    + self.config["la_dist"])
282 |                          - self.vessel.position)
283 |         target_heading = np.arctan2(path_position[1], path_position[0])
284 |         heading_error = float(geom.princip(target_heading
285 |                                            - self.vessel.heading))
286 |         path_direction = self.path.get_direction(self.path_prog[-1])
287 |         cross_track_error = geom.Rzyx(0, 0, -path_direction).dot(
288 |             np.hstack([self.path(self.path_prog[-1])
289 |                        - self.vessel.position, 0]))[1]
290 | 
291 |         obs = np.zeros((self.nstates + self.nsectors,))
292 | 
293 |         obs[0] = np.clip(self.vessel.velocity[0]
294 |                          /self.vessel.max_speed, -1, 1)
295 |         obs[1] = np.clip(self.vessel.velocity[1]
296 |                          /0.26, -1, 1)
297 |         obs[2] = np.clip(self.vessel.yawrate/0.55, -1, 1)
298 |         obs[3] = np.clip(heading_error_la/np.pi, -1, 1)
299 |         obs[4] = np.clip(heading_error/np.pi, -1, 1)
300 |         obs[5] = np.clip(cross_track_error/50, -10000, 10000)
301 | 
302 |         obst_range = self.config["obst_detection_range"]
303 |         for obst in self.obstacles:
304 |             distance_vec = geom.Rzyx(0, 0, -self.vessel.heading).dot(
305 |                 np.hstack([obst.position - self.vessel.position, 0]))
306 |             dist = linalg.norm(distance_vec)
307 |             if dist < obst_range + obst.radius + self.vessel.radius:
308 |                 ang = ((float(np.arctan2(
309 |                     distance_vec[1], distance_vec[0]))
310 |                         + np.pi) / (2*np.pi))
311 |                 closeness = 1 - np.clip((dist - self.vessel.radius
312 |                                          - obst.radius)/obst_range,
313 |                                         0, 1)
314 |                 isector = (self.nstates
315 |                            + int(np.floor(ang*self.nsectors)))
316 |                 if isector == self.nstates + self.nsectors:
317 |                     isector = self.nstates
318 |                 if obs[isector] < closeness:
319 |                     obs[isector] = closeness
320 | 
321 |         return obs
322 | 
323 | 
324 |     def seed(self, seed=5):
325 |         self.np_random, seed = seeding.np_random(seed)
326 |         return [seed]
327 | 
328 |     def render(self, mode='human'):
329 |         pass


--------------------------------------------------------------------------------
/gym_auv/envs/colav.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module implements the AUV gym environment through the AUVenv class.
  3 | """
  4 | import numpy as np
  5 | import numpy.linalg as linalg
  6 | 
  7 | import gym
  8 | from gym.utils import seeding
  9 | 
 10 | import gym_auv.utils.geomutils as geom
 11 | from gym_auv.objects.auv import AUV2D
 12 | from gym_auv.objects.obstacles import StaticObstacle
 13 | 
 14 | class ColavEnv(gym.Env):
 15 |     """
 16 |     Creates an environment with a vessel, goal and obstacles.
 17 | 
 18 |     Attributes
 19 |     ----------
 20 |     config : dict
 21 |         The configuration dictionary.
 22 |     nstates : int
 23 |         The number of state variables passed to the agent.
 24 |     nsectors : int
 25 |         The number of obstacle detection sectors around the vessel.
 26 |     vessel : gym_auv.objects.auv.AUV2D
 27 |         The AUV that is controlled by the agent.
 28 |     goal : np.array
 29 |         The goal position.
 30 |     np_random : np.random.RandomState
 31 |         Random number generator.
 32 |     obstacles : list
 33 |         List of obstacles of type
 34 |         gym_auv.objects.obstacles.StaticObstacle.
 35 |     reward : float
 36 |         The accumulated reward.
 37 |     goal_dist : float
 38 |         The distance to the goal.
 39 |     last_action : np.array
 40 |         The last action that was preformed.
 41 |     steps : int
 42 |         Number of timesteps passed.
 43 |     action_space : gym.spaces.Box
 44 |         The action space. Consists of two floats, where the first, the
 45 |         propeller input can be between 0 and 1, and the second, the
 46 |         rudder position, can be between -1 and 1.
 47 |     observation_space : gym.spaces.Box
 48 |         The observation space. Consists of
 49 |         self.nstates + self.nsectors floats that must be between
 50 |         0 and 1.
 51 |     """
 52 | 
 53 |     metadata = {}
 54 | 
 55 |     def __init__(self, env_config):
 56 |         """
 57 |         The __init__ method declares all class atributes and calls
 58 |         the self.reset() to intialize them properly.
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         env_config : dict
 63 |             Configuration parameters for the environment.
 64 |             Must have the following members:
 65 |             reward_ds
 66 |                 The reward for moving one unit of length towards the
 67 |                 goal.
 68 |             reward_closeness
 69 |                 reward += reward_closeness*closeness for the closest
 70 |                 obstacle within each sector.
 71 |             reward_speed_error
 72 |                 reward += reward_speed_error*speed_error where the
 73 |                 speed error is abs(speed-cruise_speed)/max_speed.
 74 |             reward_collision
 75 |                 The reward for colliding with an obstacle.
 76 |                 reward += reward_collisions
 77 |             nobstacles
 78 |                 The number of obstacles.
 79 |             obst_detection_range
 80 |                 The maximum distance at which an obstacle can be
 81 |                 detected.
 82 |             obst_reward_range
 83 |                 The distance where closeness to an obstacle starts
 84 |                 getting punished.
 85 |             t_step
 86 |                 The simulation timestep.
 87 |             cruise_speed
 88 |                 The desired cruising speed.
 89 |             goal_dist
 90 |                 The distance from the initial vessel position to
 91 |                 the goal.
 92 |             reward_rudderchange
 93 |                 The reward for changing the rudder position.
 94 |                 reward += reward_rudderchange*rudderchange where
 95 |                 0 <= rudderchange<= 1.
 96 |             min_reward
 97 |                 The minimum reward the vessel can accumulate. If the
 98 |                 accumulated reward is less than min_reward, the episode
 99 |                 ends.
100 |         """
101 |         self.config = env_config
102 |         self.nstates = 4
103 |         self.nsectors = 16
104 |         nobservations = self.nstates + self.nsectors
105 |         self.vessel = None
106 |         self.goal = None
107 | 
108 |         self.np_random = None
109 |         self.obstacles = None
110 | 
111 |         self.reward = 0
112 |         self.goal_dist = None
113 |         self.past_obs = None
114 |         self.past_actions = None
115 |         self.t_step = None
116 | 
117 |         self.reset()
118 | 
119 |         self.action_space = gym.spaces.Box(low=np.array([0, -1]),
120 |                                            high=np.array([1, 1]),
121 |                                            dtype=np.float32)
122 |         self.observation_space = gym.spaces.Box(
123 |             low=np.array([-1]*nobservations),
124 |             high=np.array([1]*nobservations),
125 |             dtype=np.float32)
126 | 
127 |     def step(self, action):
128 |         """
129 |         Simulates the environment for one timestep when action
130 |         is performed
131 | 
132 |         Parameters
133 |         ----------
134 |         action : np.array
135 |             [propeller_input, rudder_position].
136 | 
137 |         Returns
138 |         -------
139 |         obs : np.array
140 |             Observation of the environment after action is performed.
141 |         step_reward : double
142 |             The reward for performing action at his timestep.
143 |         done : bool
144 |             If True the episode is ended.
145 |         info : dict
146 |             Empty, is included because it is required of the
147 |             OpenAI Gym frameowrk.
148 |         """
149 |         self.past_actions = np.vstack([self.past_actions, action])
150 |         self.vessel.step(action)
151 | 
152 |         last_dist = self.goal_dist
153 |         self.goal_dist = linalg.norm(self.vessel.position - self.goal)
154 |         progress = last_dist - self.goal_dist
155 | 
156 |         obs = self.observe()
157 |         self.past_obs = np.vstack([self.past_obs, obs])
158 |         done, step_reward, info = self.step_reward(progress)
159 | 
160 |         return obs, step_reward, done, info
161 | 
162 |     def step_reward(self, progress):
163 |         """
164 |         Calculates the step_reward and decides whether the episode
165 |         should be ended.
166 | 
167 |         Parameters
168 |         ----------
169 |         obs : np.array
170 |             The observation of the environment.
171 |         progress : double
172 |             How much the vessel has moved towards the goal in the
173 |             last timestep.
174 |         Returns
175 |         -------
176 |         done : bool
177 |             If True the episode is ended.
178 |         step_reward : double
179 |             The reward for performing action at his timestep.
180 |         """
181 |         done = False
182 |         step_reward = 0
183 |         info = {"collision": False}
184 | 
185 |         max_prog = self.config["cruise_speed"]*self.t_step
186 |         speed_error = ((linalg.norm(self.vessel.velocity)
187 |                         - self.config["cruise_speed"])
188 |                        /self.vessel.max_speed)
189 |         step_reward += (np.clip(progress/max_prog, -1, 1)
190 |                         *self.config["reward_ds"])
191 |         step_reward += (max(speed_error, 0)
192 |                         *self.config["reward_speed_error"])
193 | 
194 |         for sector in range(self.nsectors):
195 |             closeness = self.past_obs[-1, self.nstates + sector]
196 |             dist = self.config["obst_detection_range"]*(1 - closeness)
197 |             reward_range = self.config["obst_reward_range"]
198 |             if closeness >= 1:
199 |                 step_reward = self.config["reward_collision"]
200 |                 info["collision"] = True
201 |                 if self.config["end_on_collision"]:
202 |                     done = True
203 |             elif dist < reward_range:
204 |                 step_reward += ((1 - dist/reward_range)
205 |                                 *self.config["reward_closeness"])
206 | 
207 |         self.reward += step_reward
208 | 
209 |         if (self.reward < self.config["min_reward"]
210 |                 or self.goal_dist > 3*self.config["goal_dist"]
211 |                 or self.past_actions.shape[0] >= 20000
212 |                 or abs(self.goal_dist) < 5):
213 |             done = True
214 | 
215 |         return done, step_reward, info
216 | 
217 |     def generate(self):
218 |         """
219 |         Sets up the environment. Places the goal and obstacles and
220 |         creates the AUV.
221 |         """
222 |         init_pos = np.array([0, 0])
223 |         init_angle = 2*np.pi*(self.np_random.rand()-0.5)
224 |         goal_angle = 2*np.pi*(self.np_random.rand()-0.5)
225 |         self.t_step = self.config["t_step"]
226 | 
227 |         self.goal = self.config["goal_dist"]*np.array(
228 |             [np.cos(goal_angle), np.sin(goal_angle)])
229 |         self.vessel = AUV2D(self.t_step,
230 |                             np.hstack([init_pos, init_angle]))
231 | 
232 |         for _ in range(self.config["nobstacles"]):
233 |             obst_dist = (0.75*self.config["goal_dist"]
234 |                          *(self.np_random.rand() + 0.2))
235 |             obst_ang = (goal_angle
236 |                         + 2*np.pi*(self.np_random.rand()-0.5))
237 |             position = (obst_dist*np.array(
238 |                 [np.cos(obst_ang), np.sin(obst_ang)]))
239 |             if linalg.norm(position) < 50:
240 |                 position[0] = np.sign(position[0])*50
241 |                 if position[0] == 0:
242 |                     position[0] = 50
243 |             radius = 15*(self.np_random.rand()+0.5)
244 |             self.obstacles.append(StaticObstacle(position, radius))
245 | 
246 |     def reset(self):
247 |         """
248 |         Resets the environment by reseeding and calling self.generate.
249 | 
250 |         Returns
251 |         -------
252 |         obs : np.array
253 |             The initial observation of the environment.
254 |         """
255 |         self.vessel = None
256 |         self.goal = None
257 |         self.obstacles = []
258 |         self.reward = 0
259 |         self.goal_dist = self.config["goal_dist"]
260 |         self.past_actions = np.array([[0, 0]])
261 |         self.t_step = None
262 |         if self.np_random is None:
263 |             self.seed()
264 | 
265 |         self.generate()
266 | 
267 |         obs = self.observe()
268 |         self.past_obs = np.array([obs])
269 |         return obs
270 | 
271 |     def observe(self):
272 |         """
273 |         Generates the observation of the environment.
274 |         Parameters
275 |         ----------
276 |         action : np.array
277 |             [propeller_input, rudder_position].
278 | 
279 |         Returns
280 |         -------
281 |         obs : np.array
282 |             [
283 |             surge velocity,
284 |             sway velocity,
285 |             heading error,
286 |             distance to goal,
287 |             propeller_input,
288 |             rudder_positione,
289 |             self.nsectors*[closeness to closest obstacle in sector]
290 |             ]
291 |             All observations are between -1 and 1.
292 |         """
293 |         obst_range = self.config["obst_detection_range"]
294 | 
295 |         goal_vector = self.goal - self.vessel.position
296 |         goal_direction = np.arctan2(goal_vector[1], goal_vector[0])
297 |         heading_error = float(geom.princip(goal_direction
298 |                                            - self.vessel.heading))
299 |         obs = np.zeros((self.nstates + self.nsectors,))
300 | 
301 |         obs[0] = np.clip(self.vessel.velocity[0]
302 |                          /self.vessel.max_speed, -1, 1)
303 |         obs[1] = np.clip(self.vessel.velocity[1] / 0.26, -1, 1)
304 |         obs[2] = np.clip(self.vessel.yawrate / 0.55, -1, 1)
305 |         obs[3] = np.clip(heading_error / np.pi, -1, 1)
306 |         for obst in self.obstacles:
307 |             distance_vec = geom.Rzyx(0, 0, -self.vessel.heading).dot(
308 |                 np.hstack([obst.position - self.vessel.position, 0]))
309 |             dist = linalg.norm(distance_vec)
310 |             if dist < obst_range + obst.radius + self.vessel.radius:
311 |                 ang = ((float(np.arctan2(
312 |                     distance_vec[1], distance_vec[0]))
313 |                         + np.pi) / (2*np.pi))
314 |                 closeness = 1 - np.clip((dist - self.vessel.radius
315 |                                          - obst.radius)/obst_range,
316 |                                         0, 1)
317 |                 isector = (self.nstates
318 |                            + int(np.floor(ang*self.nsectors)))
319 |                 if isector == self.nstates + self.nsectors:
320 |                     isector = self.nstates
321 |                 if obs[isector] < closeness:
322 |                     obs[isector] = closeness
323 |         return obs
324 | 
325 | 
326 |     def seed(self, seed=5):
327 |         """
328 |         Sets the self.np_random random number generator and
329 |         returns a random seed.
330 |         """
331 |         self.np_random, seed = seeding.np_random(seed)
332 |         return [seed]
333 | 
334 |     def render(self, mode='human'):
335 |         """
336 |         Not implemented.
337 |         """
338 |         return
339 | 


--------------------------------------------------------------------------------