├── .gitignore
├── APF.py
├── LICENSE
├── README.md
├── config
    ├── config_DQN.json
    └── config_IQN.json
├── demonstration.png
├── env_visualizer.py
├── marinenav_env
    ├── __init__.py
    ├── envs
    │   ├── __init__.py
    │   ├── marinenav_env.py
    │   └── utils
    │   │   └── robot.py
    └── setup.py
├── policy
    ├── DQN_model.py
    ├── IQN_model.py
    ├── agent.py
    ├── replay_buffer.py
    └── trainer.py
├── pretrained_models
    ├── DQN
    │   └── seed_9
    │   │   ├── dqn_constructor_params.json
    │   │   └── dqn_network_params.pth
    └── IQN
    │   └── seed_9
    │       ├── constructor_params.json
    │       └── network_params.pth
├── run_experiments.py
├── scripts
    ├── plot_eval_returns.py
    ├── visualize_eval_episode.py
    └── visualize_exp_episode.py
├── system_requirements
├── thirdparty
    └── RVO.py
└── train_rl_agents.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode*
2 | training*
3 | test*
4 | experiment*
5 | __pycache__
6 | marine_env/marine_env.egg-info/
7 | *.png
8 | *.mp4


--------------------------------------------------------------------------------
/APF.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import copy
  3 | 
  4 | class APF_agent:
  5 | 
  6 |     def __init__(self, a, w):
  7 |         self.k_att = 50.0 # attractive force constant
  8 |         self.k_rep = 500.0 # repulsive force constant
  9 |         self.k_v = 1.0 # velocity force constant
 10 |         self.m = 500 # robot weight (kg)
 11 |         self.d0 = 15.0 # obstacle distance threshold (m)
 12 |         self.n = 2 # power constant of repulsive force
 13 |         self.min_vel = 1.0 # if velocity is lower than the threshold, mandate acceleration 
 14 | 
 15 |         self.a = a # available linear acceleration (action 1)
 16 |         self.w = w # available angular velocity (action 2)
 17 | 
 18 |         self.GAMMA = 0.99
 19 | 
 20 |     def position_force(self,position,radius,goal):
 21 |         d_obs = np.linalg.norm(position) - radius - 0.8
 22 |         d_goal = np.linalg.norm(goal)
 23 | 
 24 |         # repulsive force component to move away from the obstacle 
 25 |         mag_1 = self.k_rep * ((1/d_obs)-(1/self.d0)) * (d_goal ** self.n)/(d_obs ** 2)
 26 |         dir_1 = -1.0 * position / np.linalg.norm(position)
 27 |         F_rep_1 = mag_1 * dir_1
 28 | 
 29 |         # repulsive force component to move towards the goal
 30 |         mag_2 = (self.n / 2) * self.k_rep * (((1/d_obs)-(1/self.d0))**2) * (d_goal ** (self.n-1))
 31 |         dir_2 = -1.0 * goal / d_goal
 32 |         F_rep_2 = mag_2 * dir_2
 33 | 
 34 |         return F_rep_1+F_rep_2
 35 |     
 36 |     def velocity_force(self,v_ao,position,radius):
 37 |         d_obs = np.linalg.norm(position) - radius - 0.8
 38 |         
 39 |         mag = -self.k_v * v_ao / d_obs
 40 |         dir = position / np.linalg.norm(position)
 41 |         
 42 |         F_rep = mag * dir
 43 | 
 44 |         return F_rep
 45 | 
 46 |     def act(self, observation):
 47 |         assert len(observation) == 39, "The state size does not equal 39"
 48 | 
 49 |         obs_array = np.array(observation)
 50 |         
 51 |         ego = obs_array[:4]
 52 |         static = obs_array[4:19]
 53 |         dynamic = obs_array[19:] 
 54 | 
 55 |         # compute attractive force
 56 |         F_att = self.k_att * ego[:2]
 57 | 
 58 |         # compute total repulsive force from sonar reflections
 59 |         F_rep = np.zeros(2)
 60 |         goal = ego[:2]
 61 |         velocity = ego[2:]
 62 | 
 63 |         # force from static obstacles
 64 |         for i in range(0,len(static),3):
 65 |             if np.abs(static[i]) < 1e-3 and np.abs(static[i+1]) < 1e-3:
 66 |                 # padding
 67 |                 continue
 68 | 
 69 |             F_rep += self.position_force(static[i:i+2],static[i+2],goal)
 70 | 
 71 |         # force from dynamic obstacles
 72 |         for i in range(0,len(dynamic),4):
 73 |             if np.abs(dynamic[i]) < 1e-3 and np.abs(dynamic[i+1]) < 1e-3:
 74 |                 # padding
 75 |                 continue
 76 | 
 77 |             e_ao = dynamic[i:i+2]/np.linalg.norm(dynamic[i:i+2])
 78 |             v_ao = np.dot(velocity-dynamic[i+2:i+4],e_ao)
 79 |             if v_ao >= 0.0:
 80 |                 # agent is moving towards dynamic obstacles
 81 |                 F_rep += self.position_force(dynamic[i:i+2],0.8,goal)
 82 |                 F_rep += self.velocity_force(v_ao,dynamic[i:i+2],0.8)
 83 | 
 84 |         # select angular velocity action 
 85 |         F_total = F_att + F_rep
 86 |         V_angle = 0.0
 87 |         if np.linalg.norm(velocity) > 1e-03:
 88 |             V_angle = np.arctan2(velocity[1],velocity[0])
 89 |         F_angle = np.arctan2(F_total[1],F_total[0])
 90 | 
 91 |         diff_angle = F_angle - V_angle
 92 |         while diff_angle < -np.pi:
 93 |             diff_angle += 2 * np.pi
 94 |         while diff_angle >= np.pi:
 95 |             diff_angle -= 2 * np.pi
 96 | 
 97 |         w_idx = np.argmin(np.abs(self.w-diff_angle))
 98 |         
 99 |         # select linear acceleration action
100 |         a_total = F_total / self.m
101 |         V_dir = np.array([1.0,0.0])
102 |         if np.linalg.norm(velocity) > 1e-03:
103 |             V_dir = velocity / np.linalg.norm(velocity)
104 |         a_proj = np.dot(a_total,V_dir)
105 |  
106 |         a = copy.deepcopy(self.a)
107 |         if np.linalg.norm(velocity) < self.min_vel:
108 |             # if the velocity is small, mandate acceleration
109 |             a[a<=0.0] = -np.inf
110 |         a_diff = a-a_proj
111 |         a_idx = np.argmin(np.abs(a_diff))
112 | 
113 |         return a_idx * len(self.w) + w_idx
114 | 
115 |         
116 |         
117 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Robust Field Autonomy Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multi Robot Distributional RL Navigation
 2 | 
 3 | This repository contains the code implementation of our ICRA 2024 paper [here](https://arxiv.org/abs/2402.11799). We proposed a Distributional Reinforcement Learning (Distributional RL) based decentralized multi-robot collision avoidance method for Autonomous Surface Vehicles, which can work safely in congested waters with unknown current disturbances. Each vehicle equipped with the trained agent makes independent action decisions based on (1) ego observation of surrounding objects including static obstacles and other vehicles, and (2) level of risk sensitivity. The performance of our approach is shown in the video [here](https://robustfieldautonomylab.github.io/Lin_ICRA24_Video.mp4).
 4 | 
 5 | <p align="center">
 6 | <img width="800" height="500" src="demonstration.png"> 
 7 | </p>
 8 | 
 9 | If you find this repository useful, please cite our paper
10 | ```
11 | @INPROCEEDINGS{10611668,
12 |   author={Lin, Xi and Huang, Yewei and Chen, Fanfei and Englot, Brendan},
13 |   booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)}, 
14 |   title={Decentralized Multi-Robot Navigation for Autonomous Surface Vehicles with Distributional Reinforcement Learning}, 
15 |   year={2024},
16 |   volume={},
17 |   number={},
18 |   pages={8327-8333},
19 |   doi={10.1109/ICRA57147.2024.10611668}}
20 | ```
21 | 
22 | ## Train RL Agents
23 | 
24 | Our proposed Distributional RL based policy model (IQN), and a traditional RL based model (DQN) used for comparison, can be trained by running the following command. You can also skip this training section and run experiments with the provided pretrained models.  
25 | 
26 | ```
27 | python train_rl_agents.py -C CONFIG_FILE [-P NUM_PROCS] [-D DEVICE]
28 | ```
29 | 
30 | Replace CONFIG_FILE with the path to the training config file. Example training config files are provided in config directory.   
31 | 
32 | -P and -D flags are optional. -P can be used to specify the number of processes to be run in parallel when training multiple models. -D can be used to specify the CPU or GPU intended to be used in training.
33 | 
34 | ## Evaluate Learning Performance
35 | 
36 | We provide scripts for plotting learning performance and visualizing evaluation episodes of RL agents trained by you.
37 | 
38 | To plot learning curves, set data_dir, seeds, and eval_agents according to the corresponding trained RL models, and run the command
39 | 
40 | ```
41 | python plot_eval_returns.py
42 | ```
43 | 
44 | To visualize an evaluation episode, set eval_id and eval_episode according to the corresponding evaluation config file, and run the command
45 | 
46 | ```
47 | python visualize_eval_episode.py
48 | ```
49 | 
50 | ## Run Experiments
51 | 
52 | Run the following command to perform evaluation experiments. If you want to evaluate RL agents trained on your own, set save_dir in run_experiments.py to the stored directories.
53 | 
54 | ```
55 | python run_experiments.py
56 | ```
57 | 
58 | To visualize an experiment episode, set schedule_id, agent_id, ep_id according to the experiment result file, and run the command
59 | 
60 | ```
61 | python visualize_exp_episode.py
62 | ```
63 | 


--------------------------------------------------------------------------------
/config/config_DQN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "seed":[9],
 3 |     "total_timesteps":6000000,
 4 |     "eval_freq":60000,
 5 |     "save_dir":"/home/rfal/Multi_Robot_Distributional_RL_Navigation/training_data",
 6 |     "training_schedule":
 7 |     {
 8 |         "timesteps":[0,1000000,2000000,3000000,4000000,5000000],
 9 |         "num_cooperative":[3,5,7,7,7,7],
10 |         "num_non_cooperative":[0,0,0,0,0,0],
11 |         "num_cores":[4,6,8,8,8,8],
12 |         "num_obstacles":[0,0,0,6,8,10],
13 |         "min_start_goal_dis":[30.0,35.0,40.0,40.0,40.0,40.0]
14 |     },
15 |     "eval_schedule":
16 |     {
17 |         "num_episodes": [10,10,10,10,10,10], 
18 |         "num_cooperative": [3,5,7,3,5,7], 
19 |         "num_non_cooperative": [0,0,0,0,0,0], 
20 |         "num_cores": [4,6,8,4,6,8], 
21 |         "num_obstacles": [0,0,0,6,8,10], 
22 |         "min_start_goal_dis": [40.0,40.0,40.0,40.0,40.0,40.0]
23 |     },
24 |     "use_iqn":false
25 | }


--------------------------------------------------------------------------------
/config/config_IQN.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "seed":[9],
 3 |     "total_timesteps":6000000,
 4 |     "eval_freq":60000,
 5 |     "save_dir":"/home/rfal/Multi_Robot_Distributional_RL_Navigation/training_data",
 6 |     "training_schedule":
 7 |     {
 8 |         "timesteps":[0,1000000,2000000,3000000,4000000,5000000],
 9 |         "num_cooperative":[3,5,7,7,7,7],
10 |         "num_non_cooperative":[0,0,0,0,0,0],
11 |         "num_cores":[4,6,8,8,8,8],
12 |         "num_obstacles":[0,0,0,6,8,10],
13 |         "min_start_goal_dis":[30.0,35.0,40.0,40.0,40.0,40.0]
14 |     },
15 |     "eval_schedule":
16 |     {
17 |         "num_episodes": [10,10,10,10,10,10], 
18 |         "num_cooperative": [3,5,7,3,5,7], 
19 |         "num_non_cooperative": [0,0,0,0,0,0], 
20 |         "num_cores": [4,6,8,4,6,8], 
21 |         "num_obstacles": [0,0,0,6,8,10], 
22 |         "min_start_goal_dis": [40.0,40.0,40.0,40.0,40.0,40.0]
23 |     },
24 |     "use_iqn":true
25 | }


--------------------------------------------------------------------------------
/demonstration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobustFieldAutonomyLab/Multi_Robot_Distributional_RL_Navigation/889bfed3aded1fc2edc1967090517f22e14de7a0/demonstration.png


--------------------------------------------------------------------------------
/env_visualizer.py:
--------------------------------------------------------------------------------
  1 | import marinenav_env.envs.marinenav_env as marinenav_env
  2 | import numpy as np
  3 | import matplotlib as mpl
  4 | import matplotlib.pyplot as plt
  5 | import matplotlib.cm as cm
  6 | import matplotlib.animation as animation
  7 | import copy
  8 | import scipy.spatial
  9 | import gym
 10 | import json
 11 | import os
 12 | 
 13 | class EnvVisualizer:
 14 | 
 15 |     def __init__(self, 
 16 |                  seed:int=0, 
 17 |                  draw_envs:bool=False, # Mode 2: plot the envrionment
 18 |                  draw_traj:bool=False, # Mode 3: plot final trajectories given action sequences
 19 |                  video_plots:bool=False, # Mode 4: Generate plots for a video
 20 |                  plot_dist:bool=False, # If return distributions are needed (for IQN agent) in the video
 21 |                  plot_qvalues:bool=False, # If Q values are needed in the video
 22 |                  dpi:int=96, # Monitor DPI
 23 |                  ): 
 24 |         self.env = marinenav_env.MarineNavEnv2(seed)
 25 |         self.env.reset()
 26 |         self.fig = None # figure for visualization
 27 |         self.axis_graph = None # sub figure for the map
 28 |         self.robots_plot = []
 29 |         self.robots_last_pos = []
 30 |         self.robots_traj_plot = []
 31 |         self.LiDAR_beams_plot = []
 32 |         self.axis_title = None # sub figure for title
 33 |         self.axis_action = None # sub figure for action command and steer data
 34 |         self.axis_goal = None # sub figure for relative goal measurment
 35 |         self.axis_perception = None # sub figure for perception output
 36 |         self.axis_dvl = None # sub figure for DVL measurement
 37 |         self.axis_dist = [] # sub figure(s) for return distribution of actions
 38 |         self.axis_qvalues = None # subfigure for Q values of actions
 39 | 
 40 |         self.episode_actions = [] # action sequence load from episode data
 41 |         self.episode_actions_quantiles = None
 42 |         self.episode_actions_taus = None
 43 | 
 44 |         self.plot_dist = plot_dist # draw return distribution of actions
 45 |         self.plot_qvalues = plot_qvalues # draw Q values of actions
 46 |         self.draw_envs = draw_envs # draw only the envs
 47 |         self.draw_traj = draw_traj # draw only final trajectories
 48 |         self.video_plots = video_plots # draw video plots
 49 |         self.plots_save_dir = None # video plots save directory 
 50 |         self.dpi = dpi # monitor DPI
 51 |         self.agent_name = None # agent name
 52 |         self.agent = None # agent (IQN or DQN for plot data)
 53 |         
 54 |         self.configs = None # evaluation configs
 55 |         self.episodes = None # evaluation episodes to visualize 
 56 | 
 57 |     def init_visualize(self,
 58 |                        env_configs=None # used in Mode 2
 59 |                        ):
 60 |         
 61 |         # initialize subplot for the map, robot state and sensor measurments
 62 |         if self.draw_envs:
 63 |             # Mode 2: plot the envrionment
 64 |             if env_configs is None:
 65 |                 self.fig, self.axis_graph = plt.subplots(1,1,figsize=(8,8))
 66 |             else:
 67 |                 num = len(env_configs)
 68 |                 if num % 3 == 0:
 69 |                     self.fig, self.axis_graphs = plt.subplots(int(num/3),3,figsize=(8*3,8*int(num/3))) 
 70 |                 else:
 71 |                     self.fig, self.axis_graphs = plt.subplots(1,num,figsize=(8*num,8))
 72 |         elif self.draw_traj:
 73 |             if self.plot_dist:
 74 |                 self.fig = plt.figure(figsize=(24,16))
 75 |                 spec = self.fig.add_gridspec(5,6)
 76 | 
 77 |                 self.axis_graph = self.fig.add_subplot(spec[:,:4])
 78 |                 self.axis_perception = self.fig.add_subplot(spec[:2,4:])
 79 |                 self.axis_dist.append(self.fig.add_subplot(spec[2:,4]))
 80 |                 self.axis_dist.append(self.fig.add_subplot(spec[2:,5]))
 81 |             else:
 82 |                 # Mode 3: plot final trajectories given action sequences
 83 |                 self.fig, self.axis_graph = plt.subplots(figsize=(16,16))
 84 |         elif self.video_plots:
 85 |             # Mode 4: Generate 1080p video plots
 86 |             w = 1920
 87 |             h = 1080
 88 |             self.fig = plt.figure(figsize=(w/self.dpi,h/self.dpi),dpi=self.dpi)
 89 |             if self.agent_name == "adaptive_IQN":
 90 |                 spec = self.fig.add_gridspec(5,6)
 91 |                 
 92 |                 # self.axis_title = self.fig.add_subplot(spec[0,:])
 93 |                 # self.axis_title.text(-0.9,0.,"adaptive IQN performance",fontweight="bold",fontsize=45)
 94 | 
 95 |                 self.axis_graph = self.fig.add_subplot(spec[:,:4])
 96 |                 self.axis_graph.set_title("adaptive IQN performance",fontweight="bold",fontsize=30)
 97 | 
 98 |                 self.axis_perception = self.fig.add_subplot(spec[0:2,4:])
 99 |                 self.axis_dist.append(self.fig.add_subplot(spec[2:,4]))
100 |                 self.axis_dist.append(self.fig.add_subplot(spec[2:,5]))
101 |             elif self.agent_name == "IQN":
102 |                 spec = self.fig.add_gridspec(5,12)
103 |                 
104 |                 # self.axis_title = self.fig.add_subplot(spec[0,:])
105 |                 # self.axis_title.text(-0.9,0.,"IQN performance",fontweight="bold",fontsize=45)
106 | 
107 |                 self.axis_graph = self.fig.add_subplot(spec[:,:8])
108 |                 self.axis_graph.set_title("IQN performance",fontweight="bold",fontsize=30)
109 | 
110 |                 self.axis_perception = self.fig.add_subplot(spec[0:2,8:])
111 |                 self.axis_dist.append(self.fig.add_subplot(spec[2:,9:11]))
112 |             elif self.agent_name == "DQN":
113 |                 spec = self.fig.add_gridspec(5,12)
114 |                 
115 |                 self.axis_graph = self.fig.add_subplot(spec[:,:8])
116 |                 self.axis_graph.set_title("DQN performance",fontweight="bold",fontsize=30)
117 | 
118 |                 self.axis_perception = self.fig.add_subplot(spec[0:2,8:])
119 |                 self.axis_qvalues = self.fig.add_subplot(spec[2:,9:11])
120 |             else:
121 |                 name = ""
122 |                 if self.agent_name == "APF":
123 |                     name = "Artificial Potential Field"
124 |                 elif self.agent_name == "RVO":
125 |                     name = "Reciprocal Velocity Obstacle" 
126 |                 spec = self.fig.add_gridspec(5,3)
127 | 
128 |                 self.axis_graph = self.fig.add_subplot(spec[:,:2])
129 |                 self.axis_graph.set_title(f"{name} performance",fontweight="bold",fontsize=30)
130 | 
131 |                 self.axis_perception = self.fig.add_subplot(spec[:2,2])
132 |                 self.axis_action = self.fig.add_subplot(spec[2:,2])
133 | 
134 |             # self.axis_title.set_xlim([-1.0,1.0])
135 |             # self.axis_title.set_ylim([-1.0,1.0])
136 |             # self.axis_title.set_xticks([])
137 |             # self.axis_title.set_yticks([])
138 |             # self.axis_title.spines["left"].set_visible(False)
139 |             # self.axis_title.spines["top"].set_visible(False)
140 |             # self.axis_title.spines["right"].set_visible(False)
141 |             # self.axis_title.spines["bottom"].set_visible(False)
142 |         else:
143 |             # Mode 1 (default): Display an episode
144 |             self.fig = plt.figure(figsize=(32,16))
145 |             spec = self.fig.add_gridspec(5,4)
146 |             self.axis_graph = self.fig.add_subplot(spec[:,:2])
147 |             # self.axis_goal = self.fig.add_subplot(spec[0,2])
148 |             self.axis_perception = self.fig.add_subplot(spec[1:3,2])
149 |             # self.axis_dvl = self.fig.add_subplot(spec[3:,2])
150 |             # self.axis_observation = self.fig.add_subplot(spec[:,3])
151 | 
152 |             ### temp for ploting head figure ###
153 |             # self.fig, self.axis_graph = plt.subplots(1,1,figsize=(16,16))
154 |             # # self.fig, self.axis_perception = plt.subplots(1,1,figsize=(8,8))
155 | 
156 |         if self.draw_envs and env_configs is not None:
157 |             for i,env_config in enumerate(env_configs):
158 |                 self.load_env_config(env_config)
159 |                 if len(env_configs) % 3 == 0:
160 |                     self.plot_graph(self.axis_graphs[int(i/3),i%3])
161 |                 else:
162 |                     self.plot_graph(self.axis_graphs[i])
163 |         else:
164 |             self.plot_graph(self.axis_graph)
165 | 
166 |     def plot_graph(self,axis):
167 |         # plot current velocity in the map
168 |         # if self.draw_envs:
169 |         #     x_pos = list(np.linspace(0.0,self.env.width,100))
170 |         #     y_pos = list(np.linspace(0.0,self.env.height,100))
171 |         # else:
172 |         #     x_pos = list(np.linspace(-2.5,self.env.width+2.5,110))
173 |         #     y_pos = list(np.linspace(-2.5,self.env.height+2.5,110))
174 | 
175 |         x_pos = list(np.linspace(0.0,self.env.width,100))
176 |         y_pos = list(np.linspace(0.0,self.env.height,100))
177 | 
178 |         pos_x = []
179 |         pos_y = []
180 |         arrow_x = []
181 |         arrow_y = []
182 |         speeds = np.zeros((len(x_pos),len(y_pos)))
183 |         for m,x in enumerate(x_pos):
184 |             for n,y in enumerate(y_pos):
185 |                 v = self.env.get_velocity(x,y)
186 |                 speed = np.clip(np.linalg.norm(v),0.1,10)
187 |                 pos_x.append(x)
188 |                 pos_y.append(y)
189 |                 arrow_x.append(v[0])
190 |                 arrow_y.append(v[1])
191 |                 speeds[n,m] = np.log(speed)
192 | 
193 | 
194 |         cmap = cm.Blues(np.linspace(0,1,20))
195 |         cmap = mpl.colors.ListedColormap(cmap[10:,:-1])
196 | 
197 |         axis.contourf(x_pos,y_pos,speeds,cmap=cmap)
198 |         axis.quiver(pos_x, pos_y, arrow_x, arrow_y, width=0.001,scale_units='xy',scale=2.0)
199 | 
200 |         # if not self.draw_envs:
201 |         #     # plot the evaluation boundary
202 |         #     boundary = np.array([[0.0,0.0],
203 |         #                         [self.env.width,0.0],
204 |         #                         [self.env.width,self.env.height],
205 |         #                         [0.0,self.env.height],
206 |         #                         [0.0,0.0]])
207 |         #     axis.plot(boundary[:,0],boundary[:,1],color = 'r',linestyle="-.",linewidth=3)
208 | 
209 |         # plot obstacles in the map
210 |         l = True
211 |         for obs in self.env.obstacles:
212 |             if l:
213 |                 axis.add_patch(mpl.patches.Circle((obs.x,obs.y),radius=obs.r,color='m'))
214 |                 l = False
215 |             else:
216 |                 axis.add_patch(mpl.patches.Circle((obs.x,obs.y),radius=obs.r,color='m'))
217 | 
218 |         axis.set_aspect('equal')
219 |         # if self.draw_envs:
220 |         #     axis.set_xlim([0.0,self.env.width])
221 |         #     axis.set_ylim([0.0,self.env.height])
222 |         # else:
223 |         #     axis.set_xlim([-2.5,self.env.width+2.5])
224 |         #     axis.set_ylim([-2.5,self.env.height+2.5])
225 |         axis.set_xlim([0.0,self.env.width])
226 |         axis.set_ylim([0.0,self.env.height])
227 |         axis.set_xticks([])
228 |         axis.set_yticks([])
229 | 
230 |         # plot start and goal state of each robot
231 |         for idx,robot in enumerate(self.env.robots):
232 |             if not self.draw_envs:
233 |                 axis.scatter(robot.start[0],robot.start[1],marker="o",color="yellow",s=200,zorder=6)
234 |                 axis.text(robot.start[0]-1,robot.start[1]+1,str(idx),color="yellow",fontsize=25,zorder=8)
235 |             axis.scatter(robot.goal[0],robot.goal[1],marker="*",color="yellow",s=650,zorder=6)
236 |             axis.text(robot.goal[0]-1,robot.goal[1]+1,str(idx),color="yellow",fontsize=25,zorder=8)
237 |             self.robots_last_pos.append([])
238 |             self.robots_traj_plot.append([])
239 | 
240 |         self.plot_robots(axis)
241 |     
242 |     def plot_robots(self,axis,traj_color=None):
243 |         if not self.draw_envs:
244 |             for robot_plot in self.robots_plot:
245 |                 robot_plot.remove()
246 |             self.robots_plot.clear()
247 | 
248 |         robot_scale = 1.5
249 |         for i,robot in enumerate(self.env.robots):
250 |             if robot.deactivated:
251 |                 continue
252 | 
253 |             d = np.matrix([[0.5*robot_scale*robot.length],[0.5*robot_scale*robot.width]])
254 |             rot = np.matrix([[np.cos(robot.theta),-np.sin(robot.theta)], \
255 |                             [np.sin(robot.theta),np.cos(robot.theta)]])
256 |             d_r = rot * d
257 |             xy = (robot.x-d_r[0,0],robot.y-d_r[1,0])
258 | 
259 |             angle_d = robot.theta / np.pi * 180
260 |             
261 |             if self.draw_traj:
262 |                 robot.check_reach_goal()
263 |                 c = "lime" if robot.reach_goal else 'r'
264 |             else:
265 |                 c = 'lime'
266 |                 # draw robot velocity (add initial length to avoid being hidden by the robot plot)
267 |                 robot_r = 0.5*np.linalg.norm(np.array([robot.length,robot.width]))
268 |                 init_len = robot_scale * robot_r + 0.1
269 |                 velocity_len = np.linalg.norm(robot.velocity)
270 |                 scaled_len = (velocity_len + init_len) / velocity_len
271 |                 self.robots_plot.append(axis.quiver(robot.x,robot.y,scaled_len*robot.velocity[0],scaled_len*robot.velocity[1], \
272 |                                                     color="r",width=0.005,headlength=5,headwidth=3,scale_units='xy',scale=1))
273 | 
274 |             # draw robot
275 |             self.robots_plot.append(axis.add_patch(mpl.patches.Rectangle(xy,robot_scale*robot.length, \
276 |                                                               robot_scale*robot.width, color=c, \
277 |                                                               angle=angle_d,zorder=7)))
278 |             # if not self.draw_envs:
279 |             #     # draw robot perception range
280 |             #     self.robots_plot.append(axis.add_patch(mpl.patches.Circle((robot.x,robot.y), \
281 |             #                                                     robot.perception.range, color=c,
282 |             #                                                     alpha=0.2)))
283 |             # robot id
284 |             self.robots_plot.append(axis.text(robot.x-1,robot.y+1,str(i),color="yellow",fontsize=25,zorder=8))
285 | 
286 |             if not self.draw_envs:
287 |                 if self.robots_last_pos[i] != []:
288 |                     h = axis.plot((self.robots_last_pos[i][0],robot.x),
289 |                                 (self.robots_last_pos[i][1],robot.y),
290 |                                 color='tab:orange' if traj_color is None else traj_color[i],
291 |                                 linewidth=3.0)
292 |                     self.robots_traj_plot[i].append(h)
293 |                 
294 |                 self.robots_last_pos[i] = [robot.x, robot.y]
295 | 
296 |     def plot_action_and_steer_state(self,action):
297 |         self.axis_action.clear()
298 | 
299 |         a,w = self.env.robots[0].actions[action]
300 | 
301 |         if self.video_plots:
302 |             self.axis_action.text(1,3,"action",fontsize=25)
303 |             self.axis_action.text(1,2,f"a: {a:.2f}",fontsize=20)
304 |             self.axis_action.text(1,1,f"w: {w:.2f}",fontsize=20)
305 |             self.axis_action.set_xlim([0,2.5])
306 |             self.axis_action.set_ylim([0,4])
307 |         else:
308 |             x_pos = 0.15
309 |             self.axis_action.text(x_pos,6,"Steer actions",fontweight="bold",fontsize=15)
310 |             self.axis_action.text(x_pos,5,f"Acceleration (m/s^2): {a:.2f}",fontsize=15)
311 |             self.axis_action.text(x_pos,4,f"Angular velocity (rad/s): {w:.2f}",fontsize=15)
312 |             
313 |             # robot steer state
314 |             self.axis_action.text(x_pos,2,"Steer states",fontweight="bold",fontsize=15)
315 |             self.axis_action.text(x_pos,1,f"Forward speed (m/s): {self.env.robot.speed:.2f}",fontsize=15)
316 |             self.axis_action.text(x_pos,0,f"Orientation (rad): {self.env.robot.theta:.2f}",fontsize=15)
317 |             self.axis_action.set_ylim([-1,7])
318 | 
319 |         self.axis_action.set_xticks([])
320 |         self.axis_action.set_yticks([])
321 |         self.axis_action.spines["left"].set_visible(False)
322 |         self.axis_action.spines["top"].set_visible(False)
323 |         self.axis_action.spines["right"].set_visible(False)
324 |         self.axis_action.spines["bottom"].set_visible(False)
325 | 
326 |     def plot_measurements(self,robot_idx,R_matrix=None):
327 |         self.axis_perception.clear()
328 |         # self.axis_observation.clear()
329 |         # self.axis_dvl.clear()
330 |         # self.axis_goal.clear()
331 | 
332 |         rob = self.env.robots[robot_idx]
333 | 
334 |         # if rob.reach_goal:
335 |         #     print(f"robot {robot_idx} reached goal, no measurements are available!")
336 |         #     return
337 | 
338 |         legend_size = 12
339 |         font_size = 15
340 |         
341 |         rob.perception_output(self.env.obstacles,self.env.robots)
342 | 
343 |         # plot detected objects in the robot frame (rotate x-axis by 90 degree (upward) in the plot)
344 |         range_c = 'g' if rob.cooperative else 'r'
345 |         self.axis_perception.add_patch(mpl.patches.Circle((0,0), \
346 |                                        rob.perception.range, color=range_c, \
347 |                                        alpha = 0.2))
348 |         
349 |         # plot self velocity (add initial length to avoid being hidden by the robot plot)
350 |         robot_scale = 1.5
351 |         robot_r = 0.5*np.linalg.norm(np.array([rob.length,rob.width]))
352 |         init_len = robot_scale * robot_r
353 |         velocity_len = np.linalg.norm(rob.velocity)
354 |         scaled_len = (velocity_len + init_len) / velocity_len
355 | 
356 |         abs_velocity_r = rob.perception.observation["self"][2:]
357 |         self.axis_perception.quiver(0.0,0.0,-scaled_len*abs_velocity_r[1],scaled_len*abs_velocity_r[0], \
358 |                                    color='r',width=0.008,headlength=5,headwidth=3,scale_units='xy',scale=1)
359 |         
360 |         robot_c = 'g'
361 |         self.axis_perception.add_patch(mpl.patches.Rectangle((-0.5*robot_scale*rob.width,-0.5*robot_scale*rob.length), \
362 |                                        robot_scale*rob.width,robot_scale*rob.length, color=robot_c))
363 | 
364 |         x_pos = 0
365 |         y_pos = 0
366 |         relation_pos = [[0.0,0.0]]
367 | 
368 |         for i,obs in enumerate(rob.perception.observation["static"]):
369 |             # rotate by 90 degree 
370 |             self.axis_perception.add_patch(mpl.patches.Circle((-obs[1],obs[0]), \
371 |                                            obs[2], color="m"))
372 |             relation_pos.append([-obs[1],obs[0]])
373 |             # include into observation info
374 |             # self.axis_observation.text(x_pos,y_pos,f"position: ({obs[0]:.2f},{obs[1]:.2f}), radius: {obs[2]:.2f}")
375 |             # y_pos += 1
376 | 
377 |         # self.axis_observation.text(x_pos,y_pos,"Static obstacles",fontweight="bold",fontsize=15)
378 |         # y_pos += 2
379 | 
380 |         if rob.cooperative:
381 |             # for i,obj_history in enumerate(rob.perception.observation["dynamic"].values()):
382 |             for i,obj in enumerate(rob.perception.observation["dynamic"]):
383 |                 # plot the current position
384 |                 # pos = obj_history[-1][:2]
385 | 
386 |                 # plot velocity (rotate by 90 degree)
387 |                 velocity_len = np.linalg.norm(rob.velocity)
388 |                 scaled_len = (velocity_len + init_len) / velocity_len
389 |                 self.axis_perception.quiver(-obj[1],obj[0],-scaled_len*obj[3],scaled_len*obj[2],color="r", \
390 |                                             width=0.008,headlength=5,headwidth=3,scale_units='xy',scale=1)
391 |                 
392 |                 # plot position (rotate by 90 degree)
393 |                 self.axis_perception.add_patch(mpl.patches.Circle((-obj[1],obj[0]), \
394 |                                             rob.detect_r, color="g"))
395 |                 relation_pos.append([-obj[1],obj[0]])
396 |                 
397 |                 # include history into observation info
398 |                 # self.axis_observation.text(x_pos,y_pos,f"position: ({obj[0]:.2f},{obj[1]:.2f}), velocity: ({obj[2]:.2f},{obj[3]:.2f})")
399 |                 # y_pos += 1
400 |             
401 |             # self.axis_observation.text(x_pos,y_pos,"Other Robots",fontweight="bold",fontsize=15)
402 |             # y_pos += 2
403 |         
404 | 
405 |         if R_matrix is not None:
406 |             # plot relation matrix
407 |             length = len(R_matrix)
408 |             assert len(relation_pos) == length, "The number of objects do not match size of the relation matrix"
409 |             for i in range(length):
410 |                 for j in range(length):
411 |                     self.axis_perception.plot([relation_pos[i][0],relation_pos[j][0]], \
412 |                                               [relation_pos[i][1],relation_pos[j][1]],
413 |                                               linewidth=2*R_matrix[i][j],color='k',zorder=0)
414 | 
415 |         type = "cooperative" if rob.cooperative else "non-cooperative"
416 |         # self.axis_observation.text(x_pos,y_pos,f"Showing the observation of robot {robot_idx} ({type})",fontweight="bold",fontsize=20)
417 | 
418 |         self.axis_perception.set_xlim([-rob.perception.range-1,rob.perception.range+1])
419 |         self.axis_perception.set_ylim([-rob.perception.range-1,rob.perception.range+1])
420 |         self.axis_perception.set_aspect('equal')
421 |         self.axis_perception.set_title(f'Robot {robot_idx}',fontsize=25)
422 | 
423 |         self.axis_perception.set_xticks([])
424 |         self.axis_perception.set_yticks([])
425 |         self.axis_perception.spines["left"].set_visible(False)
426 |         self.axis_perception.spines["top"].set_visible(False)
427 |         self.axis_perception.spines["right"].set_visible(False)
428 |         self.axis_perception.spines["bottom"].set_visible(False)
429 | 
430 |         # self.axis_observation.set_ylim([-1,y_pos+1])
431 |         # self.axis_observation.set_xticks([])
432 |         # self.axis_observation.set_yticks([])
433 |         # self.axis_observation.spines["left"].set_visible(False)
434 |         # self.axis_observation.spines["top"].set_visible(False)
435 |         # self.axis_observation.spines["right"].set_visible(False)
436 |         # self.axis_observation.spines["bottom"].set_visible(False)
437 | 
438 | 
439 |         # # plot robot velocity in the robot frame (rotate x-axis by 90 degree (upward) in the plot)
440 |         # h1 = self.axis_dvl.arrow(0.0,0.0,0.0,1.0, \
441 |         #                color='k', \
442 |         #                width = 0.02, \
443 |         #                head_width = 0.08, \
444 |         #                head_length = 0.12, \
445 |         #                length_includes_head=True, \
446 |         #                label='steering direction')
447 |         # # rotate by 90 degree
448 |         # h2 = self.axis_dvl.arrow(0.0,0.0,-abs_velocity_r[1],abs_velocity_r[0], \
449 |         #                color='r',width=0.02, head_width = 0.08, \
450 |         #                head_length = 0.12, length_includes_head=True, \
451 |         #                label='velocity wrt seafloor')
452 |         # x_range = np.max([2,np.abs(abs_velocity_r[1])])
453 |         # y_range = np.max([2,np.abs(abs_velocity_r[0])])
454 |         # mpl.rcParams["font.size"]=12
455 |         # self.axis_dvl.set_xlim([-x_range,x_range])
456 |         # self.axis_dvl.set_ylim([-1,y_range])
457 |         # self.axis_dvl.set_aspect('equal')
458 |         # self.axis_dvl.legend(handles=[h1,h2],loc='lower center',fontsize=legend_size)
459 |         # self.axis_dvl.set_title('Velocity Measurement',fontsize=font_size)
460 | 
461 |         # self.axis_dvl.set_xticks([])
462 |         # self.axis_dvl.set_yticks([])
463 |         # self.axis_dvl.spines["left"].set_visible(False)
464 |         # self.axis_dvl.spines["top"].set_visible(False)
465 |         # self.axis_dvl.spines["right"].set_visible(False)
466 |         # self.axis_dvl.spines["bottom"].set_visible(False)
467 | 
468 | 
469 |         # # give goal position info in the robot frame
470 |         # goal_r = rob.perception.observation["self"][:2]
471 |         # x1 = 0.07
472 |         # x2 = x1 + 0.13
473 |         # self.axis_goal.text(x1,0.5,"Goal Position (Relative)",fontsize=font_size)
474 |         # self.axis_goal.text(x2,0.25,f"({goal_r[0]:.2f}, {goal_r[1]:.2f})",fontsize=font_size)
475 | 
476 |         # self.axis_goal.set_xticks([])
477 |         # self.axis_goal.set_yticks([])
478 |         # self.axis_goal.spines["left"].set_visible(False)
479 |         # self.axis_goal.spines["top"].set_visible(False)
480 |         # self.axis_goal.spines["right"].set_visible(False)
481 |         # self.axis_goal.spines["bottom"].set_visible(False)
482 | 
483 |     def plot_return_dist(self,action):
484 |         for axis in self.axis_dist:
485 |             axis.clear()
486 |         
487 |         dist_interval = 1
488 |         mean_bar = 0.35
489 |         idx = 0
490 | 
491 |         xlim = [np.inf,-np.inf]
492 |         for idx, cvar in enumerate(action["cvars"]):
493 |             ylabelright=[]
494 | 
495 |             quantiles = np.array(action["quantiles"][idx])
496 | 
497 |             q_means = np.mean(quantiles,axis=0)
498 |             max_a = np.argmax(q_means)
499 |             for i, a in enumerate(self.env.robots[0].actions):
500 |                 q_mean = q_means[i]
501 |                 # q_mean = np.mean(quantiles[:,i])
502 | 
503 |                 ylabelright.append(
504 |                     "\n".join([f"a: {a[0]:.2f}",f"w: {a[1]:.2f}"])
505 |                 )
506 | 
507 |                 # ylabelright.append(f"mean: {q_mean:.2f}")
508 |                 
509 |                 self.axis_dist[idx].axhline(i*dist_interval, color="black", linewidth=2.0, zorder=0)
510 |                 self.axis_dist[idx].scatter(quantiles[:,i], i*np.ones(len(quantiles[:,i]))*dist_interval,color="g", marker="x",s=80,linewidth=3)
511 |                 self.axis_dist[idx].hlines(y=i*dist_interval, xmin=np.min(quantiles[:,i]), xmax=np.max(quantiles[:,i]),zorder=0)
512 |                 if i == max_a:
513 |                     self.axis_dist[idx].vlines(q_mean, ymin=i*dist_interval-mean_bar, ymax=i*dist_interval+mean_bar,color="red",linewidth=5)
514 |                 else:
515 |                     self.axis_dist[idx].vlines(q_mean, ymin=i*dist_interval-mean_bar, ymax=i*dist_interval+mean_bar,color="blue",linewidth=3)
516 | 
517 |             self.axis_dist[idx].tick_params(axis="x", labelsize=15)
518 |             self.axis_dist[idx].set_ylim([-1.0,i+1])
519 |             self.axis_dist[idx].set_yticks([])
520 |             if idx == len(action["cvars"])-1:
521 |                 self.axis_dist[idx].set_yticks(range(0,i+1))
522 |                 self.axis_dist[idx].yaxis.tick_right()
523 |                 self.axis_dist[idx].set_yticklabels(labels=ylabelright,fontsize=15)
524 |             
525 |             if len(action["cvars"]) > 1:
526 |                 if idx == 0:
527 |                     self.axis_dist[idx].set_title("adpative: "+r'$\phi$'+f" = {cvar:.2f}",fontsize=20)
528 |                 else:
529 |                     self.axis_dist[idx].set_title(r'$\phi$'+f" = {cvar:.2f}",fontsize=20)
530 |             else:
531 |                 self.axis_dist[idx].set_title(r'$\phi$'+f" = {cvar:.2f}",fontsize=20)
532 |             
533 |             xlim[0] = min(xlim[0],np.min(quantiles)-5)
534 |             xlim[1] = max(xlim[1],np.max(quantiles)+5)
535 | 
536 |         for idx, cvar in enumerate(action["cvars"]):
537 |             # self.axis_dist[idx].xaxis.set_ticks(np.arange(xlim[0],xlim[1]+1,(xlim[1]-xlim[0])/5))
538 |             self.axis_dist[idx].set_xlim(xlim)
539 | 
540 |     def plot_action_values(self,action):
541 |         self.axis_qvalues.clear()
542 | 
543 |         dist_interval = 1
544 |         mean_bar = 0.35
545 |         ylabelright=[]
546 | 
547 |         q_values = np.array(action["qvalues"])
548 |         max_a = np.argmax(q_values)
549 |         for i, a in enumerate(self.env.robots[0].actions):
550 |             ylabelright.append(
551 |                 "\n".join([f"a: {a[0]:.2f}",f"w: {a[1]:.2f}"])
552 |             )
553 |             self.axis_qvalues.axhline(i*dist_interval, color="black", linewidth=2.0, zorder=0)
554 |             if i == max_a:
555 |                 self.axis_qvalues.vlines(q_values[i], ymin=i*dist_interval-mean_bar, ymax=i*dist_interval+mean_bar,color="red",linewidth=8)
556 |             else:
557 |                 self.axis_qvalues.vlines(q_values[i], ymin=i*dist_interval-mean_bar, ymax=i*dist_interval+mean_bar,color="blue",linewidth=5)
558 |         
559 |         self.axis_qvalues.set_title("action values",fontsize=20)
560 |         self.axis_qvalues.tick_params(axis="x", labelsize=15)
561 |         self.axis_qvalues.set_ylim([-1.0,i+1])
562 |         self.axis_qvalues.set_yticks(range(0,i+1))
563 |         self.axis_qvalues.yaxis.tick_right()
564 |         self.axis_qvalues.set_yticklabels(labels=ylabelright,fontsize=15)
565 |         self.axis_qvalues.set_xlim([np.min(q_values)-5,np.max(q_values)+5])
566 | 
567 |     def one_step(self,actions,robot_idx=0):
568 |         assert len(actions) == len(self.env.robots), "Number of actions not equal number of robots!"
569 |         for i,action in enumerate(actions):
570 |             rob = self.env.robots[i]
571 |             current_velocity = self.env.get_velocity(rob.x, rob.y)
572 |             rob.update_state(action,current_velocity)
573 | 
574 |         self.plot_robots()
575 |         self.plot_measurements(robot_idx)
576 |         # if not self.plot_dist and not self.plot_qvalues:
577 |         #     self.plot_action_and_steer_state(action["action"])
578 |         
579 |         if self.step % self.env.robots[0].N == 0:
580 |             if self.plot_dist:
581 |                 self.plot_return_dist(action)
582 |             elif self.plot_qvalues:
583 |                 self.plot_action_values(action)
584 | 
585 |         self.step += 1
586 | 
587 |     def init_animation(self):
588 |         # plot initial robot position
589 |         self.plot_robots()
590 | 
591 |         # plot initial measurments
592 |         # self.plot_measurements() 
593 | 
594 |     def visualize_control(self,action_sequence,start_idx=0):
595 |         # update robot state and make animation when executing action sequence
596 |         actions = []
597 | 
598 |         # counter for updating distributions plot
599 |         self.step = start_idx
600 |         
601 |         for i,a in enumerate(action_sequence):
602 |             for _ in range(self.env.robots[0].N):
603 |                 # action = {}
604 |                 # action["action"] = a
605 |                 # if self.video_plots:
606 |                 #     if self.plot_dist:
607 |                 #         action["cvars"] = self.episode_actions_cvars[i]
608 |                 #         action["quantiles"] = self.episode_actions_quantiles[i]
609 |                 #         action["taus"] = self.episode_actions_taus[i]
610 |                 #     elif self.plot_qvalues:
611 |                 #         action["qvalues"] = self.episode_actions_values[i]
612 |                 
613 |                 actions.append(a)
614 | 
615 |         if self.video_plots:
616 |             for i,action in enumerate(actions):
617 |                 self.one_step(action)
618 |                 self.fig.savefig(f"{self.plots_save_dir}/step_{self.step}.png",pad_inches=0.2,dpi=self.dpi)
619 |         else:
620 |             # self.animation = animation.FuncAnimation(self.fig, self.one_step,frames=actions, \
621 |             #                                         init_func=self.init_animation,
622 |             #                                         interval=10,repeat=False)
623 |             for i,action in enumerate(actions):
624 |                 self.one_step(action)
625 |             plt.show()
626 | 
627 |     def load_env_config(self,episode_dict):
628 |         episode = copy.deepcopy(episode_dict)
629 | 
630 |         self.env.reset_with_eval_config(episode)
631 | 
632 |         if self.plot_dist:
633 |             # load action cvars, quantiles and taus
634 |             self.episode_actions_cvars = episode["robot"]["actions_cvars"]
635 |             self.episode_actions_quantiles = episode["robot"]["actions_quantiles"]
636 |             self.episode_actions_taus = episode["robot"]["actions_taus"]
637 |         elif self.plot_qvalues:
638 |             # load action values
639 |             self.episode_actions_values = episode["robot"]["actions_values"]
640 | 
641 |     def load_env_config_from_eval_files(self,config_f,eval_f,eval_id,env_id):
642 |         with open(config_f,"r") as f:
643 |             episodes = json.load(f)
644 |         episode = episodes[f"env_{env_id}"]
645 |         eval_file = np.load(eval_f,allow_pickle=True)
646 |         episode["robot"]["action_history"] = copy.deepcopy(eval_file["actions"][eval_id][env_id])
647 |         self.load_env_config(episode)
648 | 
649 |     def load_env_config_from_json_file(self,filename):
650 |         with open(filename,"r") as f:
651 |             episode = json.load(f)
652 |         self.load_env_config(episode)
653 | 
654 |     # def play_episode(self,start_idx=0):
655 |     #     for plot in self.robot_traj_plot:
656 |     #         plot[0].remove()
657 |     #     self.robot_traj_plot.clear()
658 | 
659 |     #     current_v = self.env.get_velocity(self.env.start[0],self.env.start[1])
660 |     #     self.env.robot.reset_state(self.env.start[0],self.env.start[1], current_velocity=current_v)
661 | 
662 |     #     self.init_visualize()
663 | 
664 |     #     self.visualize_control(self.episode_actions,start_idx)
665 | 
666 |     def load_eval_config_and_episode(self,config_file,eval_file):
667 |         with open(config_file,"r") as f:
668 |             self.configs = json.load(f)
669 | 
670 |         self.episodes = np.load(eval_file,allow_pickle=True)
671 | 
672 |     def play_eval_episode(self,eval_id,episode_id,colors,robot_ids=None):
673 |         self.env.reset_with_eval_config(self.configs[episode_id])
674 |         self.init_visualize()
675 |         
676 |         trajectories = self.episodes["trajectories"][eval_id][episode_id]
677 |         
678 |         self.play_episode(trajectories,colors,robot_ids)
679 | 
680 |     def play_episode(self,
681 |                      trajectories,
682 |                      colors,
683 |                      robot_ids=None,
684 |                      max_steps=None,
685 |                      start_step=0):
686 |         
687 |         # sort robots according trajectory lengths
688 |         all_robots = []
689 |         for i,traj in enumerate(trajectories):
690 |             plot_observation = False if robot_ids is None else i in robot_ids
691 |             all_robots.append({"id":i,"traj_len":len(traj),"plot_observation":plot_observation})
692 |         all_robots = sorted(all_robots, key=lambda x: x["traj_len"])
693 |         all_robots[-1]["plot_observation"] = True
694 | 
695 |         if max_steps is None:
696 |             max_steps = all_robots[-1]["traj_len"]-1
697 | 
698 |         robots = []
699 |         for robot in all_robots:
700 |             if robot["plot_observation"] is True:
701 |                 robots.append(robot)
702 | 
703 |         idx = 0
704 |         current_robot_step = 0
705 |         for i in range(max_steps):
706 |             if i >= robots[idx]["traj_len"]:
707 |                 current_robot_step = 0
708 |                 idx += 1
709 |             self.plot_robots(self.axis_graph,colors)
710 |             self.plot_measurements(robots[idx]["id"])
711 |             # action = [actions[j][i] for j in range(len(self.env.robots))]
712 |             # self.env.step(action)
713 | 
714 |             for j,rob in enumerate(self.env.robots):
715 |                 if rob.deactivated:
716 |                     continue
717 |                 rob.x = trajectories[j][i][0]
718 |                 rob.y = trajectories[j][i][1]
719 |                 rob.theta = trajectories[j][i][2]
720 |                 rob.speed = trajectories[j][i][3]
721 |                 rob.velocity = np.array(trajectories[j][i][4:])
722 | 
723 |             if self.video_plots:
724 |                 if current_robot_step % self.env.robots[0].N == 0:
725 |                     action = self.action_data(robots[idx]["id"])
726 |                 if self.agent_name == "adaptive_IQN" or self.agent_name == "IQN":
727 |                     self.plot_return_dist(action)
728 |                 elif self.agent_name == "DQN":
729 |                     self.plot_action_values(action)
730 |                 elif self.agent_name == "APF" or self.agent_name == "RVO":
731 |                     self.plot_action_and_steer_state(action)
732 |                 self.fig.savefig(f"{self.plots_save_dir}/step_{start_step+i}.png",dpi=self.dpi)
733 |             else:
734 |                 plt.pause(0.01)
735 | 
736 |             for j,rob in enumerate(self.env.robots):
737 |                 if i == len(trajectories[j])-1:
738 |                     rob.deactivated = True
739 | 
740 |             current_robot_step += 1
741 | 
742 |     def draw_dist_plot(self,
743 |                        trajectories,
744 |                        robot_id,
745 |                        step_id,
746 |                        colors
747 |                        ):
748 |         
749 |         self.init_visualize()
750 | 
751 |         for i in range(step_id+1):
752 |             self.plot_robots(self.axis_graph,traj_color=colors)
753 |             for j,rob in enumerate(self.env.robots):
754 |                 if rob.deactivated:
755 |                     continue
756 |                 rob.x = trajectories[j][i+1][0]
757 |                 rob.y = trajectories[j][i+1][1]
758 |                 rob.theta = trajectories[j][i+1][2]
759 |                 rob.speed = trajectories[j][i+1][3]
760 |                 rob.velocity = np.array(trajectories[j][i+1][4:])
761 |                 if i+1 == len(trajectories[j])-1:
762 |                     rob.deactivated = True
763 | 
764 |         # plot observation
765 |         self.plot_measurements(robot_id)
766 |         
767 |         action = self.action_data(robot_id)
768 |         self.plot_return_dist(action)
769 | 
770 |         self.fig.savefig("IQN_dist_plot.png",bbox_inches="tight")
771 | 
772 |     def action_data(self, robot_id):
773 |         rob = self.env.robots[robot_id]
774 |         observation,_,_ = rob.perception_output(self.env.obstacles,self.env.robots)
775 | 
776 |         if self.agent_name == "adaptive_IQN":
777 |             # compute total distribution and adaptive CVaR distribution
778 |             a_cvar,quantiles_cvar,_,cvar = self.agent.act_adaptive(observation)
779 |             a_greedy,quantiles_greedy,_ = self.agent.act(observation)
780 | 
781 |             action = dict(action=[a_cvar,a_greedy],
782 |                           cvars=[cvar,1.0],
783 |                           quantiles=[quantiles_cvar[0],quantiles_greedy[0]])
784 |         elif self.agent_name == "IQN":
785 |             a_greedy,quantiles_greedy,_ = self.agent.act(observation)
786 | 
787 |             action = dict(action=[a_greedy],
788 |                           cvars=[1.0],
789 |                           quantiles=[quantiles_greedy[0]])
790 |         elif self.agent_name == "DQN":
791 |             a,qvalues = self.agent.act_dqn(observation)
792 | 
793 |             action = dict(action=a,qvalues=qvalues[0])
794 |         elif self.agent_name == "APF" or self.agent_name == "RVO":
795 |             action = self.agent.act(observation)
796 |         return action
797 | 
798 | 
799 |     def draw_trajectory(self,
800 |                         trajectories,
801 |                         colors,
802 |                         name=None,
803 |                         ):
804 |         # Used in Mode 3
805 |         self.init_visualize()
806 | 
807 |         # select a robot that is active utill the end of the episode
808 |         robot_id = 0
809 |         max_length = 0
810 |         for i,traj in enumerate(trajectories):
811 |             print("rob: ",i," len: ",len(traj))
812 |             if len(traj) > max_length:
813 |                 robot_id = i
814 |                 max_length = len(traj)
815 |         
816 |         print("\n")
817 | 
818 |         for i in range(len(trajectories[robot_id])-1):
819 |             self.plot_robots(self.axis_graph,traj_color=colors)
820 |             for j,rob in enumerate(self.env.robots):
821 |                 if rob.deactivated:
822 |                     continue
823 |                 rob.x = trajectories[j][i+1][0]
824 |                 rob.y = trajectories[j][i+1][1]
825 |                 rob.theta = trajectories[j][i+1][2]
826 |                 rob.speed = trajectories[j][i+1][3]
827 |                 rob.velocity = np.array(trajectories[j][i+1][4:])
828 |                 if i+1 == len(trajectories[j])-1:
829 |                     rob.deactivated = True
830 | 
831 |         # for robot_plot in self.robots_plot:
832 |         #     robot_plot.remove()
833 |         # self.robots_plot.clear()
834 | 
835 |         fig_name = "trajectory_test.png" if name is None else f"trajectory_{name}.png"
836 |         self.fig.savefig(fig_name,bbox_inches="tight")
837 | 
838 |     def draw_video_plots(self,episode,save_dir,start_idx,agent):
839 |         # Used in Mode 4
840 |         self.agent = agent
841 |         self.load_env_config(episode)
842 |         self.plots_save_dir = save_dir
843 |         self.play_episode(start_idx)
844 |         return self.step
845 | 
846 | 


--------------------------------------------------------------------------------
/marinenav_env/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='marinenav_env-v0',
5 |     entry_point='marinenav_env.envs:MarineNavEnv2',
6 | )


--------------------------------------------------------------------------------
/marinenav_env/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from marinenav_env.envs.marinenav_env import MarineNavEnv2 


--------------------------------------------------------------------------------
/marinenav_env/envs/marinenav_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.spatial
  3 | import marinenav_env.envs.utils.robot as robot
  4 | import gym
  5 | import json
  6 | import copy
  7 | 
  8 | class Core:
  9 | 
 10 |     def __init__(self, x:float, y:float, clockwise:bool, Gamma:float):
 11 | 
 12 |         self.x = x  # x coordinate of the vortex core  
 13 |         self.y = y  # y coordinate of the vortex core
 14 |         self.clockwise = clockwise # if the vortex direction is clockwise
 15 |         self.Gamma = Gamma  # circulation strength of the vortex core
 16 | 
 17 | class Obstacle:
 18 | 
 19 |     def __init__(self, x:float, y:float, r:float):
 20 | 
 21 |         self.x = x # x coordinate of the obstacle center
 22 |         self.y = y # y coordinate of the obstacle center
 23 |         self.r = r # radius of the obstacle    
 24 | 
 25 | class MarineNavEnv2(gym.Env):
 26 | 
 27 |     def __init__(self, seed:int=0, schedule:dict=None):
 28 | 
 29 |         self.sd = seed
 30 |         self.rd = np.random.RandomState(seed) # PRNG 
 31 |         
 32 |         # parameter initialization
 33 |         self.width = 50 # x coordinate dimension of the map
 34 |         self.height = 50 # y coordinate dimension of the map
 35 |         self.r = 0.5  # radius of vortex core
 36 |         self.v_rel_max = 1.0 # max allowable speed when two currents flowing towards each other
 37 |         self.p = 0.8 # max allowable relative speed at another vortex core
 38 |         self.v_range = [5,10] # speed range of the vortex (at the edge of core)
 39 |         self.obs_r_range = [1,1] # radius range of the obstacle
 40 |         # self.min_obs_core_dis = 3.0 # minimum distance between a vortex core and an obstacle 
 41 |         self.clear_r = 5.0 # radius of area centered at the start(goal) of each robot,
 42 |                            # where no vortex cores, static obstacles, or the start(goal) of other robots exist
 43 |         self.timestep_penalty = -1.0
 44 |         # self.distance_penalty = -2.0
 45 |         self.collision_penalty = -50.0
 46 |         self.goal_reward = 100.0
 47 |         self.num_cores = 8 # number of vortices
 48 |         self.num_obs = 8 # number of static obstacles
 49 |         self.min_start_goal_dis = 30.0 # minimum distance between start and goal
 50 |         self.num_cooperative = 3 # number of cooperative robots
 51 |         self.num_non_cooperative = 3 # number of non-cooperative robots
 52 | 
 53 |         self.robots = [] # list of robots
 54 |         for _ in range(self.num_cooperative):
 55 |             self.robots.append(robot.Robot(cooperative=True))
 56 |         for _ in range(self.num_non_cooperative):
 57 |             self.robots.append(robot.Robot(cooperative=False))
 58 |         
 59 |         self.cores = [] # list of vortex cores
 60 |         self.obstacles = [] # list of static obstacles
 61 | 
 62 |         self.schedule = schedule # schedule for curriculum learning
 63 |         self.episode_timesteps = 0 # current episode timesteps
 64 |         self.total_timesteps = 0 # learning timesteps
 65 | 
 66 |         # self.set_boundary = False # set boundary of environment
 67 | 
 68 |         self.observation_in_robot_frame = True # return observation in robot frame
 69 |     
 70 |     def get_action_space_dimension(self):
 71 |         return self.robot.compute_actions_dimension()
 72 | 
 73 |     def reset(self):
 74 |         # reset the environment
 75 | 
 76 |         if self.schedule is not None:
 77 |             steps = self.schedule["timesteps"]
 78 |             diffs = np.array(steps) - self.total_timesteps
 79 |             
 80 |             # find the interval the current timestep falls into
 81 |             idx = len(diffs[diffs<=0])-1
 82 | 
 83 |             self.num_cooperative = self.schedule["num_cooperative"][idx]
 84 |             self.num_non_cooperative = self.schedule["num_non_cooperative"][idx]
 85 |             self.num_cores = self.schedule["num_cores"][idx]
 86 |             self.num_obs = self.schedule["num_obstacles"][idx]
 87 |             self.min_start_goal_dis = self.schedule["min_start_goal_dis"][idx]
 88 | 
 89 |             print("\n======== training schedule ========")
 90 |             print("num of cooperative agents: ",self.num_cooperative)
 91 |             print("num of non-cooperative agents: ",self.num_non_cooperative)
 92 |             print("num of cores: ",self.num_cores)
 93 |             print("num of obstacles: ",self.num_obs)
 94 |             print("min start goal dis: ",self.min_start_goal_dis)
 95 |             print("======== training schedule ========\n") 
 96 |         
 97 |         self.episode_timesteps = 0
 98 | 
 99 |         self.cores.clear()
100 |         self.obstacles.clear()
101 |         self.robots.clear()
102 | 
103 |         num_cores = self.num_cores
104 |         num_obs = self.num_obs
105 |         robot_types = [True]*self.num_cooperative + [False]*self.num_non_cooperative
106 |         assert len(robot_types) > 0, "Number of robots is 0!"
107 | 
108 |         ##### generate robots with randomly generated start and goal 
109 |         num_robots = 0
110 |         iteration = 500
111 |         while True:
112 |             start = self.rd.uniform(low = 2.0*np.ones(2), high = np.array([self.width-2.0,self.height-2.0]))
113 |             goal = self.rd.uniform(low = 2.0*np.ones(2), high = np.array([self.width-2.0,self.height-2.0]))
114 |             iteration -= 1
115 |             if self.check_start_and_goal(start,goal):
116 |                 rob = robot.Robot(robot_types[num_robots])
117 |                 rob.start = start
118 |                 rob.goal = goal
119 |                 self.reset_robot(rob)
120 |                 self.robots.append(rob)
121 |                 num_robots += 1
122 |             if iteration == 0 or num_robots == len(robot_types):
123 |                 break
124 | 
125 |         ##### generate vortex with random position, spinning direction and strength
126 |         if num_cores > 0:
127 |             iteration = 500
128 |             while True:
129 |                 center = self.rd.uniform(low = np.zeros(2), high = np.array([self.width,self.height]))
130 |                 direction = self.rd.binomial(1,0.5)
131 |                 v_edge = self.rd.uniform(low = self.v_range[0], high = self.v_range[1])
132 |                 Gamma = 2 * np.pi * self.r * v_edge
133 |                 core = Core(center[0],center[1],direction,Gamma)
134 |                 iteration -= 1
135 |                 if self.check_core(core):
136 |                     self.cores.append(core)
137 |                     num_cores -= 1
138 |                 if iteration == 0 or num_cores == 0:
139 |                     break
140 |         
141 |         centers = None
142 |         for core in self.cores:
143 |             if centers is None:
144 |                 centers = np.array([[core.x,core.y]])
145 |             else:
146 |                 c = np.array([[core.x,core.y]])
147 |                 centers = np.vstack((centers,c))
148 |         
149 |         # KDTree storing vortex core center positions
150 |         if centers is not None:
151 |             self.core_centers = scipy.spatial.KDTree(centers)
152 | 
153 |         ##### generate static obstacles with random position and size
154 |         if num_obs > 0:
155 |             iteration = 500
156 |             while True:
157 |                 center = self.rd.uniform(low = 5.0*np.ones(2), high = np.array([self.width-5.0,self.height-5.0]))
158 |                 r = self.rd.uniform(low = self.obs_r_range[0], high = self.obs_r_range[1])
159 |                 obs = Obstacle(center[0],center[1],r)
160 |                 iteration -= 1
161 |                 if self.check_obstacle(obs):
162 |                     self.obstacles.append(obs)
163 |                     num_obs -= 1
164 |                 if iteration == 0 or num_obs == 0:
165 |                     break
166 |         
167 |         return self.get_observations()
168 | 
169 |     def reset_robot(self,rob):
170 |         # reset robot state
171 |         rob.reach_goal = False
172 |         rob.collision = False
173 |         rob.deactivated = False
174 |         rob.init_theta = self.rd.uniform(low = 0.0, high = 2*np.pi)
175 |         rob.init_speed = self.rd.uniform(low = 0.0, high = rob.max_speed)
176 |         current_v = self.get_velocity(rob.start[0],rob.start[1])
177 |         rob.reset_state(current_velocity=current_v)
178 | 
179 |     def check_all_deactivated(self):
180 |         res = True
181 |         for rob in self.robots:
182 |             if not rob.deactivated:
183 |                 res = False
184 |                 break
185 |         return res
186 | 
187 |     def check_all_reach_goal(self):
188 |         res = True
189 |         for rob in self.robots:
190 |             if not rob.reach_goal:
191 |                 res = False
192 |                 break
193 |         return res
194 |     
195 |     def check_any_collision(self):
196 |         res = False
197 |         for rob in self.robots:
198 |             if rob.collision:
199 |                 res = True
200 |                 break
201 |         return res
202 | 
203 |     def step(self, actions):
204 | 
205 |         rewards = [0]*len(self.robots)
206 | 
207 |         assert len(actions) == len(self.robots), "Number of actions not equal number of robots!"
208 |         assert self.check_all_reach_goal() is not True, "All robots reach goals, not actions are available!"
209 |         # Execute actions for all robots
210 |         for i,action in enumerate(actions):
211 |             rob = self.robots[i]
212 | 
213 |             if rob.deactivated:
214 |                 # This robot is in the deactivate state
215 |                 continue
216 | 
217 |             # save action to history
218 |             rob.action_history.append(action)
219 | 
220 |             dis_before = rob.dist_to_goal()
221 | 
222 |             # update robot state after executing the action    
223 |             for _ in range(rob.N):
224 |                 current_velocity = self.get_velocity(rob.x, rob.y)
225 |                 rob.update_state(action,current_velocity)
226 |             
227 |             # save robot state
228 |             rob.trajectory.append([rob.x,rob.y,rob.theta,rob.speed,rob.velocity[0],rob.velocity[1]])
229 | 
230 |             dis_after = rob.dist_to_goal()
231 | 
232 |             # constant penalty applied at every time step
233 |             rewards[i] += self.timestep_penalty
234 | 
235 |             # reward agent for getting closer to the goal
236 |             rewards[i] += dis_before - dis_after
237 |         
238 | 
239 |         # Get observation for all robots
240 |         observations, collisions, reach_goals = self.get_observations()
241 | 
242 |         dones = [False]*len(self.robots)
243 |         infos = [{"state":"normal"}]*len(self.robots)
244 | 
245 |         # (1) end the current episode if it's too long
246 |         # (2) if any collision happens, end the current episode
247 |         # (3) if all robots reach goals (robot list is empty), end the current episode
248 |         
249 |         # end_episode = False
250 |         for idx,rob in enumerate(self.robots):
251 |             if rob.deactivated:
252 |                 # This robot is in the deactivate state
253 |                 dones[idx] = True
254 |                 if rob.collision:
255 |                     infos[idx] = {"state":"deactivated after collision"}
256 |                 elif rob.reach_goal:
257 |                     infos[idx] = {"state":"deactivated after reaching goal"}
258 |                 else:
259 |                     raise RuntimeError("Robot being deactived can only be caused by collsion or reaching goal!")
260 |                 continue
261 |             
262 |             # min_dis = min_distances[idx]
263 |             # if min_dis <= rob.obs_dis:
264 |             #     # penalize agent for being close to other objects
265 |             #     rewards[i] += self.distance_penalty + min_dis * (-self.distance_penalty/rob.obs_dis)
266 | 
267 |             if self.episode_timesteps >= 1000:
268 |                 # end_episode = True
269 |                 dones[idx] = True
270 |                 infos[idx] = {"state":"too long episode"}
271 |             elif collisions[idx]:
272 |                 rewards[idx] += self.collision_penalty
273 |                 # end_episode = True
274 |                 dones[idx] = True
275 |                 infos[idx] = {"state":"collision"}
276 |             elif reach_goals[idx]:
277 |                 rewards[idx] += self.goal_reward
278 |                 dones[idx] = True
279 |                 infos[idx] = {"state":"reach goal"}
280 |             else:
281 |                 dones[idx] = False
282 |                 infos[idx] = {"state":"normal"}
283 |         
284 |         # if self.check_all_reach_goal():
285 |         #     end_episode = True
286 | 
287 |         # if self.set_boundary and self.out_of_boundary():
288 |         #     # No used in training 
289 |         #     done = True
290 |         #     info = {"state":"out of boundary"}
291 | 
292 |         self.episode_timesteps += 1
293 |         self.total_timesteps += 1
294 | 
295 |         return observations, rewards, dones, infos
296 | 
297 |     def out_of_boundary(self):
298 |         # only used when boundary is set
299 |         x_out = self.robot.x < 0.0 or self.robot.x > self.width
300 |         y_out = self.robot.y < 0.0 or self.robot.y > self.height
301 |         return x_out or y_out
302 | 
303 |     def get_observations(self):
304 |         observations = []
305 |         collisions = []
306 |         reach_goals = []
307 |         # min_distances = []
308 |         for robot in self.robots:
309 |             observation, collision, reach_goal = robot.perception_output(self.obstacles,self.robots,self.observation_in_robot_frame)
310 |             observations.append(observation)
311 |             collisions.append(collision)
312 |             reach_goals.append(reach_goal)
313 |             # min_distances.append(min_distance)
314 |         return observations, collisions, reach_goals
315 |     
316 |     def check_start_and_goal(self,start,goal):
317 |         
318 |         # The start and goal point is far enough
319 |         if np.linalg.norm(goal-start) < self.min_start_goal_dis:
320 |             return False
321 |         
322 |         for robot in self.robots:
323 |             
324 |             dis_s = robot.start - start
325 |             # Start point not too close to that of existing robots
326 |             if np.linalg.norm(dis_s) <= self.clear_r:
327 |                 return False
328 |             
329 |             dis_g = robot.goal - goal
330 |             # Goal point not too close to that of existing robots
331 |             if np.linalg.norm(dis_g) <= self.clear_r:
332 |                 return False
333 |         
334 |         return True
335 |     
336 |     def check_core(self,core_j):
337 | 
338 |         # Within the range of the map
339 |         if core_j.x - self.r < 0.0 or core_j.x + self.r > self.width:
340 |             return False
341 |         if core_j.y - self.r < 0.0 or core_j.y + self.r > self.width:
342 |             return False
343 | 
344 |         for robot in self.robots:
345 |             # Not too close to start and goal point of each robot
346 |             core_pos = np.array([core_j.x,core_j.y])
347 |             dis_s = core_pos - robot.start
348 |             if np.linalg.norm(dis_s) < self.r + self.clear_r:
349 |                 return False
350 |             dis_g = core_pos - robot.goal
351 |             if np.linalg.norm(dis_g) < self.r + self.clear_r:
352 |                 return False
353 | 
354 |         for core_i in self.cores:
355 |             dx = core_i.x - core_j.x
356 |             dy = core_i.y - core_j.y
357 |             dis = np.sqrt(dx*dx+dy*dy)
358 | 
359 |             if core_i.clockwise == core_j.clockwise:
360 |                 # i and j rotate in the same direction, their currents run towards each other at boundary
361 |                 # The currents speed at boundary need to be lower than threshold  
362 |                 boundary_i = core_i.Gamma / (2*np.pi*self.v_rel_max)
363 |                 boundary_j = core_j.Gamma / (2*np.pi*self.v_rel_max)
364 |                 if dis < boundary_i + boundary_j:
365 |                     return False
366 |             else:
367 |                 # i and j rotate in the opposite direction, their currents join at boundary
368 |                 # The relative current speed of the stronger vortex at boundary need to be lower than threshold 
369 |                 Gamma_l = max(core_i.Gamma, core_j.Gamma)
370 |                 Gamma_s = min(core_i.Gamma, core_j.Gamma)
371 |                 v_1 = Gamma_l / (2*np.pi*(dis-2*self.r))
372 |                 v_2 = Gamma_s / (2*np.pi*self.r)
373 |                 if v_1 > self.p * v_2:
374 |                     return False
375 | 
376 |         return True
377 | 
378 |     def check_obstacle(self,obs):
379 | 
380 |         # Within the range of the map
381 |         if obs.x - obs.r < 0.0 or obs.x + obs.r > self.width:
382 |             return False
383 |         if obs.y - obs.r < 0.0 or obs.y + obs.r > self.height:
384 |             return False
385 | 
386 |         for robot in self.robots:
387 |             # Not too close to start and goal point
388 |             obs_pos = np.array([obs.x,obs.y])
389 |             dis_s = obs_pos - robot.start
390 |             if np.linalg.norm(dis_s) < obs.r + self.clear_r:
391 |                 return False
392 |             dis_g = obs_pos - robot.goal
393 |             if np.linalg.norm(dis_g) < obs.r + self.clear_r:
394 |                 return False
395 | 
396 |         # Not too close to vortex cores
397 |         for core in self.cores:
398 |             dx = core.x - obs.x
399 |             dy = core.y - obs.y
400 |             dis = np.sqrt(dx*dx + dy*dy)
401 | 
402 |             if dis <= self.r + obs.r:
403 |                 return False
404 |         
405 |         # Not collide with other obstacles
406 |         for obstacle in self.obstacles:
407 |             dx = obstacle.x - obs.x
408 |             dy = obstacle.y - obs.y
409 |             dis = np.sqrt(dx*dx + dy*dy)
410 | 
411 |             if dis <= obstacle.r + obs.r:
412 |                 return False
413 |         
414 |         return True
415 | 
416 |     def get_velocity(self,x:float, y:float):
417 |         if len(self.cores) == 0:
418 |             return np.zeros(2)
419 |         
420 |         # sort the vortices according to their distance to the query point
421 |         d, idx = self.core_centers.query(np.array([x,y]),k=len(self.cores))
422 |         if isinstance(idx,np.int64):
423 |             idx = [idx]
424 | 
425 |         v_radial_set = []
426 |         v_velocity = np.zeros((2,1))
427 |         for i in list(idx): 
428 |             core = self.cores[i]
429 |             v_radial = np.matrix([[core.x-x],[core.y-y]])
430 | 
431 |             for v in v_radial_set:
432 |                 project = np.transpose(v) * v_radial
433 |                 if project[0,0] > 0:
434 |                     # if the core is in the outter area of a checked core (wrt the query position),
435 |                     # assume that it has no influence the velocity of the query position
436 |                     continue
437 |             
438 |             v_radial_set.append(v_radial)
439 |             dis = np.linalg.norm(v_radial)
440 |             v_radial /= dis
441 |             if core.clockwise:
442 |                 rotation = np.matrix([[0., -1.],[1., 0]])
443 |             else:
444 |                 rotation = np.matrix([[0., 1.],[-1., 0]])
445 |             v_tangent = rotation * v_radial
446 |             speed = self.compute_speed(core.Gamma,dis)
447 |             v_velocity += v_tangent * speed
448 |         
449 |         return np.array([v_velocity[0,0], v_velocity[1,0]])
450 | 
451 |     def get_velocity_test(self,x:float, y:float):
452 |         v = np.ones(2)
453 |         return v / np.linalg.norm(v)
454 | 
455 |     def compute_speed(self, Gamma:float, d:float):
456 |         if d <= self.r:
457 |             return Gamma / (2*np.pi*self.r*self.r) * d
458 |         else:
459 |             return Gamma / (2*np.pi*d)
460 | 
461 |     def reset_with_eval_config(self,eval_config):
462 |         self.episode_timesteps = 0
463 |         
464 |         # load env config
465 |         self.sd = eval_config["env"]["seed"]
466 |         self.width = eval_config["env"]["width"]
467 |         self.height = eval_config["env"]["height"]
468 |         self.r = eval_config["env"]["r"]
469 |         self.v_rel_max = eval_config["env"]["v_rel_max"]
470 |         self.p = eval_config["env"]["p"]
471 |         self.v_range = copy.deepcopy(eval_config["env"]["v_range"])
472 |         self.obs_r_range = copy.deepcopy(eval_config["env"]["obs_r_range"])
473 |         # self.min_obs_core_dis = eval_config["env"]["min_obs_core_dis"]
474 |         self.clear_r = eval_config["env"]["clear_r"]
475 |         self.timestep_penalty = eval_config["env"]["timestep_penalty"]
476 |         # self.distance_penalty = eval_config["env"]["distance_penalty"]
477 |         self.collision_penalty = eval_config["env"]["collision_penalty"]
478 |         self.goal_reward = eval_config["env"]["goal_reward"]
479 | 
480 |         # load vortex cores
481 |         self.cores.clear()
482 |         centers = None
483 |         for i in range(len(eval_config["env"]["cores"]["positions"])):
484 |             center = eval_config["env"]["cores"]["positions"][i]
485 |             clockwise = eval_config["env"]["cores"]["clockwise"][i]
486 |             Gamma = eval_config["env"]["cores"]["Gamma"][i]
487 |             core = Core(center[0],center[1],clockwise,Gamma)
488 |             self.cores.append(core)
489 |             if centers is None:
490 |                 centers = np.array([[core.x,core.y]])
491 |             else:
492 |                 c = np.array([[core.x,core.y]])
493 |                 centers = np.vstack((centers,c))
494 |         
495 |         if centers is not None:
496 |             self.core_centers = scipy.spatial.KDTree(centers)
497 | 
498 |         # load obstacles
499 |         self.obstacles.clear()
500 |         for i in range(len(eval_config["env"]["obstacles"]["positions"])):
501 |             center = eval_config["env"]["obstacles"]["positions"][i]
502 |             r = eval_config["env"]["obstacles"]["r"][i]
503 |             obs = Obstacle(center[0],center[1],r)
504 |             self.obstacles.append(obs)
505 | 
506 |         # load robot config
507 |         self.robots.clear()
508 |         for i in range(len(eval_config["robots"]["cooperative"])):
509 |             rob = robot.Robot(eval_config["robots"]["cooperative"][i])
510 |             rob.dt = eval_config["robots"]["dt"][i]
511 |             rob.N = eval_config["robots"]["N"][i]
512 |             rob.length = eval_config["robots"]["length"][i]
513 |             rob.width = eval_config["robots"]["width"][i]
514 |             rob.r = eval_config["robots"]["r"][i]
515 |             rob.detect_r = eval_config["robots"]["detect_r"][i]
516 |             rob.goal_dis = eval_config["robots"]["goal_dis"][i]
517 |             rob.obs_dis = eval_config["robots"]["obs_dis"][i]
518 |             rob.max_speed = eval_config["robots"]["max_speed"][i]
519 |             rob.a = np.array(eval_config["robots"]["a"][i])
520 |             rob.w = np.array(eval_config["robots"]["w"][i])
521 |             rob.start = np.array(eval_config["robots"]["start"][i])
522 |             rob.goal = np.array(eval_config["robots"]["goal"][i])
523 |             rob.compute_k()
524 |             rob.compute_actions()
525 |             rob.init_theta = eval_config["robots"]["init_theta"][i]
526 |             rob.init_speed = eval_config["robots"]["init_speed"][i]
527 |             
528 |             rob.perception.range = eval_config["robots"]["perception"]["range"][i]
529 |             rob.perception.angle = eval_config["robots"]["perception"]["angle"][i]
530 |             # rob.perception.num_beams = eval_config["robots"]["perception"]["num_beams"][i]
531 |             # rob.perception.len_obs_history = eval_config["robots"]["perception"]["len_obs_history"][i]
532 | 
533 |             current_v = self.get_velocity(rob.start[0],rob.start[1])
534 |             rob.reset_state(current_velocity=current_v)
535 |             
536 |             self.robots.append(rob)
537 | 
538 |         return self.get_observations()          
539 | 
540 |     def episode_data(self):
541 |         episode = {}
542 | 
543 |         # save environment config
544 |         episode["env"] = {}
545 |         episode["env"]["seed"] = self.sd
546 |         episode["env"]["width"] = self.width
547 |         episode["env"]["height"] = self.height
548 |         episode["env"]["r"] = self.r
549 |         episode["env"]["v_rel_max"] = self.v_rel_max
550 |         episode["env"]["p"] = self.p
551 |         episode["env"]["v_range"] = copy.deepcopy(self.v_range) 
552 |         episode["env"]["obs_r_range"] = copy.deepcopy(self.obs_r_range)
553 |         # episode["env"]["min_obs_core_dis"] = self.min_obs_core_dis
554 |         episode["env"]["clear_r"] = self.clear_r
555 |         episode["env"]["timestep_penalty"] = self.timestep_penalty
556 |         # episode["env"]["distance_penalty"] = self.distance_penalty
557 |         episode["env"]["collision_penalty"] = self.collision_penalty
558 |         episode["env"]["goal_reward"] = self.goal_reward
559 | 
560 |         # save vortex cores information
561 |         episode["env"]["cores"] = {}
562 |         episode["env"]["cores"]["positions"] = []
563 |         episode["env"]["cores"]["clockwise"] = []
564 |         episode["env"]["cores"]["Gamma"] = []
565 |         for core in self.cores:
566 |             episode["env"]["cores"]["positions"].append([core.x,core.y])
567 |             episode["env"]["cores"]["clockwise"].append(core.clockwise)
568 |             episode["env"]["cores"]["Gamma"].append(core.Gamma)
569 | 
570 |         # save obstacles information
571 |         episode["env"]["obstacles"] = {}
572 |         episode["env"]["obstacles"]["positions"] = []
573 |         episode["env"]["obstacles"]["r"] = []
574 |         for obs in self.obstacles:
575 |             episode["env"]["obstacles"]["positions"].append([obs.x,obs.y])
576 |             episode["env"]["obstacles"]["r"].append(obs.r)
577 | 
578 |         # save robots information
579 |         episode["robots"] = {}
580 |         episode["robots"]["cooperative"] = []
581 |         episode["robots"]["dt"] = []
582 |         episode["robots"]["N"] = []
583 |         episode["robots"]["length"] = []
584 |         episode["robots"]["width"] = []
585 |         episode["robots"]["r"] = []
586 |         episode["robots"]["detect_r"] = []
587 |         episode["robots"]["goal_dis"] = []
588 |         episode["robots"]["obs_dis"] = []
589 |         episode["robots"]["max_speed"] = []
590 |         episode["robots"]["a"] = []
591 |         episode["robots"]["w"] = []
592 |         episode["robots"]["start"] = []
593 |         episode["robots"]["goal"] = []
594 |         episode["robots"]["init_theta"] = []
595 |         episode["robots"]["init_speed"] = []
596 | 
597 |         episode["robots"]["perception"] = {}
598 |         episode["robots"]["perception"]["range"] = []
599 |         episode["robots"]["perception"]["angle"] = []
600 |         # episode["robots"]["perception"]["num_beams"] = []
601 |         # episode["robots"]["perception"]["len_obs_history"] = []
602 | 
603 |         episode["robots"]["action_history"] = []
604 |         episode["robots"]["trajectory"] = []
605 | 
606 |         for rob in self.robots:
607 |             episode["robots"]["cooperative"].append(rob.cooperative)
608 |             episode["robots"]["dt"].append(rob.dt)
609 |             episode["robots"]["N"].append(rob.N)
610 |             episode["robots"]["length"].append(rob.length)
611 |             episode["robots"]["width"].append(rob.width)
612 |             episode["robots"]["r"].append(rob.r)
613 |             episode["robots"]["detect_r"].append(rob.detect_r)
614 |             episode["robots"]["goal_dis"].append(rob.goal_dis)
615 |             episode["robots"]["obs_dis"].append(rob.obs_dis)
616 |             episode["robots"]["max_speed"].append(rob.max_speed)
617 |             episode["robots"]["a"].append(list(rob.a))
618 |             episode["robots"]["w"].append(list(rob.w))
619 |             episode["robots"]["start"].append(list(rob.start))
620 |             episode["robots"]["goal"].append(list(rob.goal))
621 |             episode["robots"]["init_theta"].append(rob.init_theta)
622 |             episode["robots"]["init_speed"].append(rob.init_speed)
623 | 
624 |             episode["robots"]["perception"]["range"].append(rob.perception.range)
625 |             episode["robots"]["perception"]["angle"].append(rob.perception.angle)
626 |             # episode["robots"]["perception"]["num_beams"].append(rob.perception.num_beams)
627 |             # episode["robots"]["perception"]["len_obs_history"].append(rob.perception.len_obs_history)
628 | 
629 |             episode["robots"]["action_history"].append(copy.deepcopy(rob.action_history))
630 |             episode["robots"]["trajectory"].append(copy.deepcopy(rob.trajectory))
631 | 
632 |         return episode
633 | 
634 |     def save_episode(self,filename):
635 |         episode = self.episode_data()
636 |         with open(filename,"w") as file:
637 |             json.dump(episode,file)
638 | 


--------------------------------------------------------------------------------
/marinenav_env/envs/utils/robot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import copy
  3 | import heapq
  4 | 
  5 | class Perception:
  6 | 
  7 |     def __init__(self,cooperative:bool=False):
  8 |         # 2D LiDAR model with detection area as a sector
  9 |         self.range = 15.0 # range of beams (meter)
 10 |         self.angle = 2 * np.pi # detection angle range
 11 |         # self.len_obs_history = 5 # the window size of observation history
 12 |         self.max_obs_num = 5 # the maximum number of obstacles to be considered
 13 |         self.max_obj_num = 5 # the maximum number of dyanmic objects to be considered
 14 |         self.observation_format(cooperative)
 15 |         self.observed_obs = [] # indices of observed static obstacles
 16 |         self.observed_objs = [] # indiced of observed dynamic objects
 17 | 
 18 |     def observation_format(self,cooperative:bool=False):
 19 |         # format: {"self": [goal,velocity], 
 20 |         #          "static":[[obs_1.x,obs_1.y,obs_1.r],...,[obs_n.x,obs_n.y,obs_n.r]],
 21 |         #          "dynamic":[[robot_1.x,robot_1.y,robot_1.vx,robot_1.vy],...,[robot_n.x,robot_n.y,robot_n.vx,robot_n.vy]]
 22 |         if cooperative:
 23 |             self.observation = dict(self=[],static=[],dynamic=[])
 24 |         else:
 25 |             self.observation = dict(self=[],static=[])
 26 | 
 27 | 
 28 | class Robot:
 29 | 
 30 |     def __init__(self,cooperative:bool=False):
 31 |         
 32 |         # parameter initialization
 33 |         self.cooperative = cooperative # if the robot is cooperative or not
 34 |         self.dt = 0.05 # discretized time step (second)
 35 |         self.N = 10 # number of time step per action
 36 |         self.perception = Perception(cooperative)
 37 |         self.length = 1.0 
 38 |         self.width = 0.5
 39 |         self.r = 0.8 # collision range
 40 |         self.detect_r = 0.5*np.sqrt(self.length**2+self.width**2) # detection range
 41 |         self.goal_dis = 2.0 # max distance to goal considered as reached
 42 |         self.obs_dis = 5.0 # min distance to other objects that is considered safe     
 43 |         self.max_speed = 2.0
 44 |         self.a = np.array([-0.4,0.0,0.4]) # linear accelerations (m/s^2)
 45 |         self.w = np.array([-np.pi/6,0.0,np.pi/6]) # angular velocities (rad/s)
 46 |         self.compute_k() # cofficient of water resistance
 47 |         self.compute_actions() # list of actions
 48 | 
 49 |         self.x = None # x coordinate
 50 |         self.y = None # y coordinate
 51 |         self.theta = None # steering heading angle
 52 |         self.speed = None # steering foward speed
 53 |         self.velocity = None # velocity wrt sea floor
 54 |         
 55 |         self.start = None # start position
 56 |         self.goal = None # goal position
 57 |         self.collision = False
 58 |         self.reach_goal = False
 59 |         self.deactivated = False # deactivate the robot if it collides with any objects or reaches the goal 
 60 | 
 61 |         self.init_theta = 0.0 # theta at initial position
 62 |         self.init_speed = 0.0 # speed at initial position
 63 | 
 64 |         self.action_history = [] # history of action commands in one episode
 65 |         self.trajectory = [] # trajectory in one episode
 66 | 
 67 |     def compute_k(self):
 68 |         self.k = np.max(self.a)/self.max_speed
 69 |     
 70 |     def compute_actions(self):
 71 |         self.actions = [(acc,ang_v) for acc in self.a for ang_v in self.w]
 72 | 
 73 |     def compute_actions_dimension(self):
 74 |         return len(self.actions)
 75 | 
 76 |     def compute_dist_reward_scale(self):
 77 |         # scale the distance reward
 78 |         return 1 / (self.max_speed * self.N * self.dt)
 79 |     
 80 |     def compute_penalty_matrix(self):
 81 |         # scale the penalty value to [-1,0]
 82 |         scale_a = 1 / (np.max(self.a)*np.max(self.a))
 83 |         scale_w = 1 / (np.max(self.w)*np.max(self.w))
 84 |         p = -0.5 * np.matrix([[scale_a,0.0],[0.0,scale_w]])
 85 |         return p
 86 | 
 87 |     def compute_action_energy_cost(self,action):
 88 |         # scale the a and w to [0,1]
 89 |         a,w = self.actions[action]
 90 |         a /= np.max(self.a)
 91 |         w /= np.max(self.w)
 92 |         return np.abs(a) + np.abs(w)
 93 |     
 94 |     def dist_to_goal(self):
 95 |         return np.linalg.norm(self.goal - np.array([self.x,self.y]))
 96 | 
 97 |     def check_reach_goal(self):
 98 |         if self.dist_to_goal() <= self.goal_dis:
 99 |             self.reach_goal = True
100 | 
101 |     def reset_state(self,current_velocity=np.zeros(2)):
102 |         # only called when resetting the environment
103 |         self.action_history.clear()
104 |         self.trajectory.clear()
105 |         self.x = self.start[0]
106 |         self.y = self.start[1]
107 |         self.theta = self.init_theta 
108 |         self.speed = self.init_speed
109 |         self.update_velocity(current_velocity)
110 |         self.trajectory.append([self.x,self.y,self.theta,self.speed,self.velocity[0],self.velocity[1]]) 
111 | 
112 |     def get_robot_transform(self):
113 |         # compute transformation from world frame to robot frame
114 |         R_wr = np.matrix([[np.cos(self.theta),-np.sin(self.theta)],[np.sin(self.theta),np.cos(self.theta)]])
115 |         t_wr = np.matrix([[self.x],[self.y]])
116 |         return R_wr, t_wr
117 | 
118 |     def get_steer_velocity(self):
119 |         return self.speed * np.array([np.cos(self.theta), np.sin(self.theta)])
120 | 
121 |     def update_velocity(self,current_velocity=np.zeros(2)):
122 |         steer_velocity = self.get_steer_velocity()
123 |         self.velocity = steer_velocity + current_velocity
124 | 
125 |     def update_state(self,action,current_velocity):
126 |         # update robot position in one time step
127 |         self.update_velocity(current_velocity)
128 |         dis = self.velocity * self.dt
129 |         self.x += dis[0]
130 |         self.y += dis[1]
131 |         
132 |         # update robot speed in one time step
133 |         a,w = self.actions[action]
134 |         
135 |         # assume that water resistance force is proportion to the speed
136 |         self.speed += (a-self.k*self.speed) * self.dt
137 |         self.speed = np.clip(self.speed,0.0,self.max_speed)
138 |         
139 |         # update robot heading angle in one time step
140 |         self.theta += w * self.dt
141 | 
142 |         # warp theta to [0,2*pi)
143 |         while self.theta < 0.0:
144 |             self.theta += 2 * np.pi
145 |         while self.theta >= 2 * np.pi:
146 |             self.theta -= 2 * np.pi
147 | 
148 |     def check_collision(self,obj_x,obj_y,obj_r):
149 |         d = self.compute_distance(obj_x,obj_y,obj_r)
150 |         if d <= 0.0:
151 |             self.collision = True
152 | 
153 |     def compute_distance(self,x,y,r,in_robot_frame=False):
154 |         if in_robot_frame:
155 |             d = np.sqrt(x**2+y**2) - r - self.r
156 |         else:
157 |             d = np.sqrt((self.x-x)**2+(self.y-y)**2) - r - self.r
158 |         return d
159 | 
160 |     def check_detection(self,obj_x,obj_y,obj_r):
161 |         proj_pos = self.project_to_robot_frame(np.array([obj_x,obj_y]),False)
162 |         
163 |         if np.linalg.norm(proj_pos) > self.perception.range + obj_r:
164 |             return False
165 |         
166 |         angle = np.arctan2(proj_pos[1],proj_pos[0])
167 |         if angle < -0.5*self.perception.angle or angle > 0.5*self.perception.angle:
168 |             return False
169 |         
170 |         return True
171 |     
172 |     def project_to_robot_frame(self,x,is_vector=True):
173 |         assert isinstance(x,np.ndarray), "the input needs to be an numpy array"
174 |         assert np.shape(x) == (2,)
175 | 
176 |         x_r = np.reshape(x,(2,1))
177 | 
178 |         R_wr, t_wr = self.get_robot_transform()
179 | 
180 |         R_rw = np.transpose(R_wr)
181 |         t_rw = -R_rw * t_wr 
182 | 
183 |         if is_vector:
184 |             x_r = R_rw * x_r
185 |         else:
186 |             x_r = R_rw * x_r + t_rw
187 | 
188 |         x_r.resize((2,))
189 |         return np.array(x_r)            
190 | 
191 | 
192 |     def perception_output(self,obstacles,robots,in_robot_frame=True):
193 |         if self.deactivated:
194 |             return None, self.collision, self.reach_goal
195 |         
196 |         self.perception.observation["static"].clear()
197 |         if self.cooperative:
198 |             self.perception.observation["dynamic"].clear()
199 | 
200 |         ##### self observation #####
201 |         if in_robot_frame:
202 |             # vehicle velocity wrt seafloor in self frame
203 |             abs_velocity_r = self.project_to_robot_frame(self.velocity)
204 | 
205 |             # goal position in self frame
206 |             goal_r = self.project_to_robot_frame(self.goal,False)
207 | 
208 |             self.perception.observation["self"] = list(np.concatenate((goal_r,abs_velocity_r)))
209 |         else:
210 |             self.perception.observation["self"] = [self.x,self.y,self.theta,self.speed,self.velocity[0],self.velocity[1],self.goal[0],self.goal[1]]
211 | 
212 | 
213 |         self.perception.observed_obs.clear()
214 |         if self.cooperative:
215 |             self.perception.observed_objs.clear()
216 | 
217 |         self.check_reach_goal()
218 | 
219 |         # min_distance = np.inf
220 | 
221 |         ##### static obstacles observation #####
222 |         for i,obs in enumerate(obstacles):
223 |             if not self.check_detection(obs.x,obs.y,obs.r):
224 |                 continue
225 | 
226 |             self.perception.observed_obs.append(i)
227 | 
228 |             if not self.collision:
229 |                 self.check_collision(obs.x,obs.y,obs.r)
230 | 
231 |             if in_robot_frame:
232 |                 pos_r = self.project_to_robot_frame(np.array([obs.x,obs.y]),False)
233 |                 self.perception.observation["static"].append([pos_r[0],pos_r[1],obs.r])
234 |             else:
235 |                 self.perception.observation["static"].append([obs.x,obs.y,obs.r])
236 |             # min_distance = min(min_distance,np.linalg.norm(pos_r)-obs.r)
237 | 
238 |         if self.cooperative:
239 |             ##### dynamic objects observation #####
240 |             for j,robot in enumerate(robots):
241 |                 if robot is self:
242 |                     continue
243 |                 if robot.deactivated:
244 |                     # This robot is in the deactivate state, and abscent from the current map
245 |                     continue
246 |                 if not self.check_detection(robot.x,robot.y,robot.detect_r):
247 |                     continue
248 | 
249 |                 self.perception.observed_objs.append(j)
250 |                 
251 |                 if not self.collision:
252 |                     self.check_collision(robot.x,robot.y,robot.r)
253 | 
254 |                 if in_robot_frame:
255 |                     pos_r = self.project_to_robot_frame(np.array([robot.x,robot.y]),False)
256 |                     v_r = self.project_to_robot_frame(robot.velocity)
257 |                     new_obs = list(np.concatenate((pos_r,v_r)))
258 |                     # if j in self.perception.observation["dynamic"].copy().keys():
259 |                     #     self.perception.observation["dynamic"][j].append(new_obs)
260 |                     #     while len(self.perception.observation["dynamic"][j]) > self.perception.len_obs_history:
261 |                     #         del self.perception.observation["dynamic"][j][0]
262 |                     # else:
263 |                     #     self.perception.observation["dynamic"][j] = [new_obs]
264 |                     self.perception.observation["dynamic"].append(new_obs)
265 |                     # min_distance = min(min_distance,np.linalg.norm(pos_r)-robot.r)
266 |                 else:
267 |                     self.perception.observation["dynamic"].append([robot.x,robot.y,robot.velocity[0],robot.velocity[1]])
268 | 
269 |         # if self.cooperative:
270 |         #     for idx in self.perception.observation["dynamic"].copy().keys():
271 |         #         if idx not in self.perception.observed_objs:
272 |         #             # remove the observation history if the object is not observed in the current step
273 |         #             del self.perception.observation["dynamic"][idx]
274 | 
275 |         self_state = copy.deepcopy(self.perception.observation["self"])
276 |         static_observations = copy.deepcopy(heapq.nsmallest(self.perception.max_obs_num,
277 |                                                       self.perception.observation["static"],
278 |                                                       key=lambda obs:self.compute_distance(obs[0],obs[1],obs[2],True)))
279 |         
280 |         static_states = []
281 |         for static in static_observations:
282 |             static_states += static
283 |         static_states += [0.0,0.0,0.0]*(self.perception.max_obs_num-len(static_observations))
284 | 
285 |         if self.cooperative:
286 |             # # remove object indices 
287 |             # dynamic_states = copy.deepcopy(list(self.perception.observation["dynamic"].values()))
288 |             # if fixed_dynamic_size:
289 |             #     dynamic_states_v = []
290 |             #     for obs_history in dynamic_states:
291 |             #         # convert the observation history into a vector [s_{t-k},s_{t-k+1},...,s_t]
292 |             #         pad_len = max(self.perception.len_obs_history-len(obs_history),0)
293 |             #         dynamic_states_vectors = [0.,0.,0.,0.] * pad_len
294 |             #         for obs in obs_history:
295 |             #             dynamic_states_vectors += obs
296 |             #         dynamic_states_v.append(dynamic_states_vectors)
297 |             #     return (self_state,static_states,dynamic_states_v), collision, self.reach_goal
298 |             # else:
299 |             #     idx_array = []
300 |             #     for idx,obs_history in enumerate(dynamic_states):
301 |             #         # pad the dynamic observation and save the indices of exact lastest element
302 |             #         idx_array.append([idx,len(obs_history)-1])
303 |             #         while len(obs_history) < self.perception.len_obs_history:
304 |             #             obs_history.append([0.,0.,0.,0.])
305 |             
306 |             #     return (self_state,static_states,dynamic_states,idx_array), collision, self.reach_goal
307 |             dynamic_observations = copy.deepcopy(heapq.nsmallest(self.perception.max_obj_num, 
308 |                                                            self.perception.observation["dynamic"],
309 |                                                            key=lambda obj:self.compute_distance(obj[0],obj[1],self.r,True)))
310 |         else:
311 |             dynamic_observations = [[0.0,0.0,0.0,0.0]]*self.perception.max_obj_num
312 |             
313 |         dynamic_states = []
314 |         for dynamic in dynamic_observations:
315 |             dynamic_states += dynamic
316 |         dynamic_states += [0.0,0.0,0.0,0.0]*(self.perception.max_obj_num-len(dynamic_observations))
317 | 
318 |         return self_state+static_states+dynamic_states, self.collision, self.reach_goal
319 | 
320 | 
321 |     # def perception_prediction(self,obs_states,robots_states=[]):
322 |     #     # perception of predicted future state (robot frame)
323 | 
324 |     #     ##### self observation ##### 
325 |     #     # vehicle velocity wrt seafloor in self frame
326 |     #     abs_velocity_r = self.project_to_robot_frame(self.velocity)
327 | 
328 |     #     # goal position in self frame
329 |     #     goal_r = self.project_to_robot_frame(self.goal,False)
330 | 
331 |     #     self_state = list(np.concatenate((goal_r,abs_velocity_r)))
332 | 
333 |     #     collision = False
334 |     #     self.check_reach_goal()
335 | 
336 |     #     ##### static obstatcles observation #####
337 |     #     static_states = []
338 |     #     for obs in obs_states:
339 |     #         if not self.check_detection(obs[0],obs[1],obs[2]):
340 |     #             continue
341 | 
342 |     #         if not collision:
343 |     #             collision = self.check_collision(obs[0],obs[1],obs[2])
344 | 
345 |     #         pos_r = self.project_to_robot_frame(np.array([obs[0],obs[1]]),False)
346 |     #         static_states.append([pos_r[0],pos_r[1],obs[2]])
347 | 
348 |     #     if self.cooperative:
349 |     #         # dynamic objects observation in self frame
350 |     #         dynamic_states = []
351 |     #         for obj in robots_states:
352 |     #             if not self.check_detection(obj[0],obj[1],self.detect_r):
353 |     #                 continue
354 | 
355 |     #             if not collision:
356 |     #                 collision = self.check_collision(obj[0],obj[1],self.r)
357 | 
358 |     #             pos_r = self.project_to_robot_frame(np.array([obj[0],obj[1]]),False)
359 |     #             v_r = self.project_to_robot_frame(np.array([obj[2],obj[3]]))
360 |     #             dynamic_states.append(list(np.concatenate((pos_r,v_r))))
361 |             
362 |     #         return (self_state,static_states,dynamic_states), collision, self.reach_goal
363 |     #     else:
364 |     #         return (self_state,static_states), collision, self.reach_goal
365 | 
366 |             
367 | 
368 | 
369 | 
370 | 
371 |             
372 | 
373 |             
374 | 
375 |                      
376 | 
377 | 
378 | 
379 | 


--------------------------------------------------------------------------------
/marinenav_env/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='marinenav_env',
4 |       version='0.0.1',
5 |       install_requires=['gym']
6 | )


--------------------------------------------------------------------------------
/policy/DQN_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import os
  5 | import json
  6 | from torch.nn.functional import softmax,relu
  7 | 
  8 | def encoder(input_dimension,output_dimension):
  9 |     l1 = nn.Linear(input_dimension,output_dimension)
 10 |     l2 = nn.ReLU()
 11 |     model = nn.Sequential(l1, l2)
 12 |     return model
 13 | 
 14 | class DQN_Policy(nn.Module):
 15 |     def __init__(self,
 16 |                  self_dimension,
 17 |                  static_dimension,
 18 |                  dynamic_dimension,
 19 |                  self_feature_dimension,
 20 |                  static_feature_dimension,
 21 |                  dynamic_feature_dimension,
 22 |                  hidden_dimension,
 23 |                  action_size,
 24 |                  device='cpu',
 25 |                  seed=0
 26 |                  ):
 27 |         super().__init__()
 28 | 
 29 |         self.self_dimension = self_dimension
 30 |         self.static_dimension = static_dimension
 31 |         self.dynamic_dimension = dynamic_dimension
 32 |         self.self_feature_dimension = self_feature_dimension
 33 |         self.static_feature_dimension = static_feature_dimension
 34 |         self.dynamic_feature_dimension = dynamic_feature_dimension
 35 |         self.concat_feature_dimension = self_feature_dimension + static_feature_dimension + dynamic_feature_dimension
 36 |         self.hidden_dimension = hidden_dimension
 37 |         self.action_size = action_size
 38 |         self.device = device
 39 |         self.seed_id = seed
 40 |         self.seed = torch.manual_seed(seed)
 41 | 
 42 |         self.self_encoder = encoder(self_dimension,self_feature_dimension)
 43 |         self.static_encoder = encoder(static_dimension,static_feature_dimension)
 44 |         self.dynamic_encoder = encoder(dynamic_dimension,dynamic_feature_dimension)
 45 | 
 46 |         # hidden layers
 47 |         self.hidden_layer = nn.Linear(self.concat_feature_dimension, hidden_dimension)
 48 |         self.hidden_layer_2 = nn.Linear(hidden_dimension, hidden_dimension)
 49 |         self.output_layer = nn.Linear(hidden_dimension, action_size)
 50 | 
 51 |     def forward(self, x):
 52 | 
 53 |         self_states = x[:,:self.self_dimension]
 54 |         static_states = x[:,self.self_dimension:self.self_dimension+self.static_dimension]
 55 |         dynamic_states = x[:,self.self_dimension+self.static_dimension:]
 56 | 
 57 |         # encode observations as features
 58 |         self_features = self.self_encoder(self_states)
 59 |         static_features = self.static_encoder(static_states)
 60 |         dynamic_features = self.dynamic_encoder(dynamic_states)
 61 |         features = torch.cat((self_features,static_features,dynamic_features),1)
 62 |         
 63 |         features = relu(self.hidden_layer(features))
 64 |         features = relu(self.hidden_layer_2(features))
 65 |         output = self.output_layer(features)
 66 |         
 67 |         return output
 68 |     
 69 |     def get_constructor_parameters(self):       
 70 |         return dict(self_dimension = self.self_dimension,
 71 |                     static_dimension = self.static_dimension,
 72 |                     dynamic_dimension = self.dynamic_dimension,
 73 |                     self_feature_dimension = self.self_feature_dimension,
 74 |                     static_feature_dimension = self.static_feature_dimension,
 75 |                     dynamic_feature_dimension = self.dynamic_feature_dimension,
 76 |                     hidden_dimension = self.hidden_dimension,
 77 |                     action_size = self.action_size,
 78 |                     seed = self.seed_id)
 79 |     
 80 |     def save(self,directory):
 81 |         # save network parameters
 82 |         torch.save(self.state_dict(),os.path.join(directory,f"dqn_network_params.pth"))
 83 |         
 84 |         # save constructor parameters
 85 |         with open(os.path.join(directory,f"dqn_constructor_params.json"),mode="w") as constructor_f:
 86 |             json.dump(self.get_constructor_parameters(),constructor_f)
 87 | 
 88 |     @classmethod
 89 |     def load(cls,directory,device="cpu"):
 90 |         # load network parameters
 91 |         model_params = torch.load(os.path.join(directory,"dqn_network_params.pth"),
 92 |                                   map_location=device)
 93 | 
 94 |         # load constructor parameters
 95 |         with open(os.path.join(directory,"dqn_constructor_params.json"), mode="r") as constructor_f:
 96 |             constructor_params = json.load(constructor_f)
 97 |             constructor_params["device"] = device
 98 | 
 99 |         model = cls(**constructor_params)
100 |         model.load_state_dict(model_params)
101 |         model.to(device)
102 | 
103 |         return model


--------------------------------------------------------------------------------
/policy/IQN_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import os
  5 | import json
  6 | from torch.nn.functional import softmax,relu
  7 | 
  8 | def encoder(input_dimension,output_dimension):
  9 |     l1 = nn.Linear(input_dimension,output_dimension)
 10 |     l2 = nn.ReLU()
 11 |     model = nn.Sequential(l1, l2)
 12 |     return model
 13 | 
 14 | class IQN_Policy(nn.Module):
 15 |     def __init__(self,
 16 |                  self_dimension,
 17 |                  static_dimension,
 18 |                  dynamic_dimension,
 19 |                  self_feature_dimension,
 20 |                  static_feature_dimension,
 21 |                  dynamic_feature_dimension,
 22 |                  hidden_dimension,
 23 |                  action_size,
 24 |                  device='cpu',
 25 |                  seed=0
 26 |                  ):
 27 |         super().__init__()
 28 | 
 29 |         self.self_dimension = self_dimension
 30 |         self.static_dimension = static_dimension
 31 |         self.dynamic_dimension = dynamic_dimension
 32 |         self.self_feature_dimension = self_feature_dimension
 33 |         self.static_feature_dimension = static_feature_dimension
 34 |         self.dynamic_feature_dimension = dynamic_feature_dimension
 35 |         self.concat_feature_dimension = self_feature_dimension + static_feature_dimension + dynamic_feature_dimension
 36 |         self.hidden_dimension = hidden_dimension
 37 |         self.action_size = action_size
 38 |         self.device = device
 39 |         self.seed_id = seed
 40 |         self.seed = torch.manual_seed(seed)
 41 | 
 42 |         self.self_encoder = encoder(self_dimension,self_feature_dimension)
 43 |         self.static_encoder = encoder(static_dimension,static_feature_dimension)
 44 |         self.dynamic_encoder = encoder(dynamic_dimension,dynamic_feature_dimension)
 45 | 
 46 |         self.K = 32 # number of quantiles in output
 47 |         self.n = 64 # number of cosine features
 48 | 
 49 |         # quantile encoder
 50 |         self.pis = torch.FloatTensor([np.pi * i for i in range(self.n)]).view(1,1,self.n).to(device)
 51 |         self.cos_embedding = nn.Linear(self.n,self.concat_feature_dimension)
 52 | 
 53 |         # hidden layers
 54 |         self.hidden_layer = nn.Linear(self.concat_feature_dimension, hidden_dimension)
 55 |         self.hidden_layer_2 = nn.Linear(hidden_dimension, hidden_dimension)
 56 |         self.output_layer = nn.Linear(hidden_dimension, action_size)
 57 | 
 58 |         # temp for reproducibility
 59 |         # self.taus = None
 60 | 
 61 |     def calc_cos(self, batch_size, num_tau=8, cvar=1.0):
 62 |         """
 63 |         Calculating the cosinus values depending on the number of tau samples
 64 |         """
 65 |         # temp for reproducibility
 66 |         # if self.taus is None:
 67 |         #     t = np.arange(0.05,1,0.03).astype(np.float32)
 68 |         #     self.taus = torch.from_numpy(t).to(self.device).unsqueeze(-1) # (batch_size, n_tau, 1) for broadcast
 69 |         # taus = self.taus * cvar
 70 | 
 71 |         taus = torch.rand(batch_size,num_tau).to(self.device).unsqueeze(-1)
 72 | 
 73 |         # distorted quantile sampling
 74 |         taus = taus * cvar
 75 | 
 76 |         cos = torch.cos(taus * self.pis)
 77 |         assert cos.shape == (batch_size, num_tau, self.n), "cos shape is incorrect"
 78 |         return cos, taus
 79 |     
 80 |     def forward(self, x, num_tau=8, cvar=1.0):
 81 |         batch_size = x.shape[0]
 82 | 
 83 |         self_states = x[:,:self.self_dimension]
 84 |         static_states = x[:,self.self_dimension:self.self_dimension+self.static_dimension]
 85 |         dynamic_states = x[:,self.self_dimension+self.static_dimension:]
 86 | 
 87 |         # encode observations as features
 88 |         self_features = self.self_encoder(self_states)
 89 |         static_features = self.static_encoder(static_states)
 90 |         dynamic_features = self.dynamic_encoder(dynamic_states)
 91 |         features = torch.cat((self_features,static_features,dynamic_features),1)
 92 | 
 93 |         # encode quantiles as features
 94 |         cos, taus = self.calc_cos(batch_size, num_tau, cvar)
 95 |         cos = cos.view(batch_size*num_tau, self.n)
 96 |         cos_features = relu(self.cos_embedding(cos)).view(batch_size,num_tau,self.concat_feature_dimension)
 97 | 
 98 |         # pairwise product of the input feature and cosine features
 99 |         features = (features.unsqueeze(1) * cos_features).view(batch_size*num_tau,self.concat_feature_dimension)
100 |         
101 |         features = relu(self.hidden_layer(features))
102 |         features = relu(self.hidden_layer_2(features))
103 |         quantiles = self.output_layer(features)
104 |         
105 |         return quantiles.view(batch_size,num_tau,self.action_size), taus
106 |     
107 |     def get_constructor_parameters(self):       
108 |         return dict(self_dimension = self.self_dimension,
109 |                     static_dimension = self.static_dimension,
110 |                     dynamic_dimension = self.dynamic_dimension,
111 |                     self_feature_dimension = self.self_feature_dimension,
112 |                     static_feature_dimension = self.static_feature_dimension,
113 |                     dynamic_feature_dimension = self.dynamic_feature_dimension,
114 |                     hidden_dimension = self.hidden_dimension,
115 |                     action_size = self.action_size,
116 |                     seed = self.seed_id)
117 |     
118 |     def save(self,directory):
119 |         # save network parameters
120 |         torch.save(self.state_dict(),os.path.join(directory,f"network_params.pth"))
121 |         
122 |         # save constructor parameters
123 |         with open(os.path.join(directory,f"constructor_params.json"),mode="w") as constructor_f:
124 |             json.dump(self.get_constructor_parameters(),constructor_f)
125 | 
126 |     @classmethod
127 |     def load(cls,directory,device="cpu"):
128 |         # load network parameters
129 |         model_params = torch.load(os.path.join(directory,"network_params.pth"),
130 |                                   map_location=device)
131 | 
132 |         # load constructor parameters
133 |         with open(os.path.join(directory,"constructor_params.json"), mode="r") as constructor_f:
134 |             constructor_params = json.load(constructor_f)
135 |             constructor_params["device"] = device
136 | 
137 |         model = cls(**constructor_params)
138 |         model.load_state_dict(model_params)
139 |         model.to(device)
140 | 
141 |         return model


--------------------------------------------------------------------------------
/policy/agent.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | from policy.IQN_model import IQN_Policy
  4 | from policy.DQN_model import DQN_Policy
  5 | from policy.replay_buffer import ReplayBuffer
  6 | from marinenav_env.envs.utils.robot import Robot
  7 | import numpy as np
  8 | import random
  9 | import time
 10 | from torch.nn import functional as F 
 11 | 
 12 | class Agent():
 13 |     def __init__(self, 
 14 |                  self_dimension=4,
 15 |                  static_dimension=15,
 16 |                  self_feature_dimension=32,
 17 |                  static_feature_dimension=120,
 18 |                  hidden_dimension=156,
 19 |                  action_size=9,
 20 |                  dynamic_dimension=20,
 21 |                  dynamic_feature_dimension=160,
 22 |                  cooperative=True, 
 23 |                  BATCH_SIZE=32, 
 24 |                  BUFFER_SIZE=1_000_000,
 25 |                  LR=1e-4, 
 26 |                  TAU=1.0, 
 27 |                  GAMMA=0.99,  
 28 |                  device="cpu", 
 29 |                  seed=0,
 30 |                  training=True,
 31 |                  use_iqn=True
 32 |                  ):
 33 |         
 34 |         self.cooperative = cooperative
 35 |         self.device = device
 36 |         self.LR = LR
 37 |         self.TAU = TAU
 38 |         self.GAMMA = GAMMA
 39 |         self.BUFFER_SIZE = BUFFER_SIZE
 40 |         self.BATCH_SIZE = BATCH_SIZE
 41 |         self.training = training
 42 |         self.action_size = action_size
 43 |         self.use_iqn = use_iqn
 44 | 
 45 |         if training:
 46 |             if use_iqn:
 47 |                 self.policy_local = IQN_Policy(self_dimension,
 48 |                                             static_dimension,
 49 |                                             dynamic_dimension,
 50 |                                             self_feature_dimension,
 51 |                                             static_feature_dimension,
 52 |                                             dynamic_feature_dimension,
 53 |                                             hidden_dimension,
 54 |                                             action_size,
 55 |                                             device,
 56 |                                             seed).to(device)
 57 |                 self.policy_target = IQN_Policy(self_dimension,
 58 |                                             static_dimension,
 59 |                                             dynamic_dimension,
 60 |                                             self_feature_dimension,
 61 |                                             static_feature_dimension,
 62 |                                             dynamic_feature_dimension,
 63 |                                             hidden_dimension,
 64 |                                             action_size,
 65 |                                             device,
 66 |                                             seed).to(device)
 67 |             else:
 68 |                 self.policy_local = DQN_Policy(self_dimension,
 69 |                                             static_dimension,
 70 |                                             dynamic_dimension,
 71 |                                             self_feature_dimension,
 72 |                                             static_feature_dimension,
 73 |                                             dynamic_feature_dimension,
 74 |                                             hidden_dimension,
 75 |                                             action_size,
 76 |                                             device,
 77 |                                             seed).to(device)
 78 |                 self.policy_target = DQN_Policy(self_dimension,
 79 |                                             static_dimension,
 80 |                                             dynamic_dimension,
 81 |                                             self_feature_dimension,
 82 |                                             static_feature_dimension,
 83 |                                             dynamic_feature_dimension,
 84 |                                             hidden_dimension,
 85 |                                             action_size,
 86 |                                             device,
 87 |                                             seed).to(device)
 88 |         
 89 |             self.optimizer = optim.Adam(self.policy_local.parameters(), lr=self.LR)
 90 | 
 91 |             self.memory = ReplayBuffer(BUFFER_SIZE,BATCH_SIZE)
 92 | 
 93 |     def act_dqn(self, state, eps=0.0, use_eval=True):
 94 |         state = torch.tensor([state]).float().to(self.device)
 95 |         if use_eval:
 96 |             self.policy_local.eval()
 97 |         else:
 98 |             self.policy_local.train()
 99 |         with torch.no_grad():
100 |             action_values = self.policy_local(state)
101 |         self.policy_local.train()
102 | 
103 |         # epsilon-greedy action selection
104 |         if random.random() > eps:
105 |             action = np.argmax(action_values.cpu().data.numpy())
106 |         else:
107 |             action = random.choice(np.arange(self.action_size))
108 | 
109 |         return action, action_values.cpu().data.numpy()
110 | 
111 |     def act(self, state, eps=0.0, cvar=1.0, use_eval=True):
112 |         """Returns action index and quantiles 
113 |         Params
114 |         ======
115 |             frame: to adjust epsilon
116 |             state (array_like): current state
117 |         """
118 |         # state = self.memory.state_batch([state])
119 |         # state = self.state_to_tensor(state) 
120 |         state = torch.tensor([state]).float().to(self.device)
121 |         if use_eval:
122 |             self.policy_local.eval()
123 |         else:
124 |             self.policy_local.train()
125 |         with torch.no_grad():
126 |             quantiles, taus = self.policy_local(state, self.policy_local.K, cvar)
127 |             action_values = quantiles.mean(dim=1)
128 |         self.policy_local.train()
129 | 
130 |         # epsilon-greedy action selection
131 |         if random.random() > eps:
132 |             action = np.argmax(action_values.cpu().data.numpy())
133 |         else:
134 |             action = random.choice(np.arange(self.action_size))
135 |         
136 |         return action, quantiles.cpu().data.numpy(), taus.cpu().data.numpy()
137 | 
138 |     def act_adaptive(self, state, eps=0.0):
139 |         """adptively tune the CVaR value, compute action index and quantiles
140 |         Params
141 |         ======
142 |             frame: to adjust epsilon
143 |             state (array_like): current state
144 |         """
145 |         cvar = self.adjust_cvar(state)
146 |         action, quantiles, taus = self.act(state, eps, cvar)
147 |         return action, quantiles, taus, cvar
148 | 
149 |     def adjust_cvar(self,state):
150 |         # scale CVaR value according to the closest distance to obstacles
151 |         
152 |         assert len(state) == 39, "The state size does not equal 39"
153 | 
154 |         static = state[4:19]
155 |         dynamic = state[19:] 
156 |         
157 |         closest_d = np.inf
158 | 
159 |         for i in range(0,len(static),3):
160 |             if np.abs(static[i]) < 1e-3 and np.abs(static[i+1]) < 1e-3:
161 |                 # padding
162 |                 continue
163 |             dist = np.linalg.norm(static[i:i+2]) - static[i+2] - 0.8
164 |             closest_d = min(closest_d,dist)
165 | 
166 |         for i in range(0,len(dynamic),4):
167 |             if np.abs(dynamic[i]) < 1e-3 and np.abs(dynamic[i+1]) < 1e-3:
168 |                 # padding
169 |                 continue
170 |             dist = np.linalg.norm(dynamic[i:i+2]) - 1.6
171 |             closest_d = min(closest_d,dist)
172 |         
173 |         cvar = 1.0
174 |         if closest_d < 10.0:
175 |             cvar = closest_d / 10.0
176 | 
177 |         return cvar
178 | 
179 |     def train(self):
180 |         if self.use_iqn:
181 |             return self.train_IQN()
182 |         else:
183 |             return self.train_DQN()
184 |     
185 |     def train_IQN(self):
186 |         """Update value parameters using given batch of experience tuples
187 |         Params
188 |         ======
189 |             experiences (Tuple[torch.Tensor]): tuple of (s, a, r, s', done) tuples 
190 |             gamma (float): discount factor
191 |         """
192 | 
193 |         states, actions, rewards, next_states, dones = self.memory.sample()
194 |         states = torch.tensor(states).float().to(self.device)
195 |         actions = torch.tensor(actions).unsqueeze(-1).to(self.device)
196 |         rewards = torch.tensor(rewards).unsqueeze(-1).float().to(self.device)
197 |         next_states = torch.tensor(next_states).float().to(self.device)
198 |         dones = torch.tensor(dones).unsqueeze(-1).float().to(self.device)
199 | 
200 |         self.optimizer.zero_grad()
201 |         # Get max predicted Q values (for next states) from target model
202 |         Q_targets_next,_ = self.policy_target(next_states)
203 |         Q_targets_next = Q_targets_next.detach().max(2)[0].unsqueeze(1) # (batch_size, 1, N)
204 |         
205 |         # Compute Q targets for current states 
206 |         Q_targets = rewards.unsqueeze(-1) + (self.GAMMA * Q_targets_next * (1. - dones.unsqueeze(-1)))
207 |         # Get expected Q values from local model
208 |         Q_expected,taus = self.policy_local(states)
209 |         Q_expected = Q_expected.gather(2, actions.unsqueeze(-1).expand(self.BATCH_SIZE, 8, 1))
210 | 
211 |         # Quantile Huber loss
212 |         td_error = Q_targets - Q_expected
213 |         assert td_error.shape == (self.BATCH_SIZE, 8, 8), "wrong td error shape"
214 |         huber_l = calculate_huber_loss(td_error, 1.0)
215 |         quantil_l = abs(taus -(td_error.detach() < 0).float()) * huber_l / 1.0
216 |         
217 |         loss = quantil_l.sum(dim=1).mean(dim=1) # keepdim=True if per weights get multiple
218 |         loss = loss.mean()
219 | 
220 |         # minimize the loss
221 |         loss.backward()
222 |         torch.nn.utils.clip_grad_norm_(self.policy_local.parameters(), 0.5)
223 |         self.optimizer.step()
224 | 
225 |         return loss.detach().cpu().numpy()
226 |     
227 |     def train_DQN(self):
228 |         states, actions, rewards, next_states, dones = self.memory.sample()
229 |         states = torch.tensor(states).float().to(self.device)
230 |         actions = torch.tensor(actions).unsqueeze(-1).to(self.device)
231 |         rewards = torch.tensor(rewards).unsqueeze(-1).float().to(self.device)
232 |         next_states = torch.tensor(next_states).float().to(self.device)
233 |         dones = torch.tensor(dones).unsqueeze(-1).float().to(self.device)
234 | 
235 |         self.optimizer.zero_grad()
236 | 
237 |         # compute target values
238 |         Q_targets_next = self.policy_target(next_states)
239 |         Q_targets_next,_ = Q_targets_next.max(dim=1,keepdim=True)
240 |         Q_targets = rewards + (1-dones) * self.GAMMA * Q_targets_next
241 | 
242 |         # compute expected values
243 |         Q_expected = self.policy_local(states)
244 |         Q_expected = Q_expected.gather(1,actions)
245 | 
246 |         # compute huber loss
247 |         loss = F.smooth_l1_loss(Q_expected,Q_targets)
248 | 
249 |         # minimize the loss
250 |         loss.backward()
251 |         torch.nn.utils.clip_grad_norm_(self.policy_local.parameters(), 0.5)
252 |         self.optimizer.step()
253 | 
254 |         return loss.detach().cpu().numpy()
255 | 
256 | 
257 | 
258 |     def soft_update(self):
259 |         """Soft update model parameters.
260 |         θ_target = τ * θ_local + (1 - τ) * θ_target
261 |         Params
262 |         ======
263 |             local_model (PyTorch model): weights will be copied from
264 |             target_model (PyTorch model): weights will be copied to
265 |             tau (float): interpolation parameter
266 |         """
267 |         for target_param, local_param in zip(self.policy_target.parameters(), self.policy_local.parameters()):
268 |             target_param.data.copy_(self.TAU * local_param.data + (1.0 - self.TAU) * target_param.data)
269 | 
270 |     def save_latest_model(self,directory):
271 |         self.policy_local.save(directory)
272 | 
273 |     def load_model(self,path,agent_type="cooperative",device="cpu"):
274 |         if self.use_iqn:
275 |             self.policy_local = IQN_Policy.load(path,device)
276 |         else:
277 |             self.policy_local = DQN_Policy.load(path,device)
278 | 
279 | 
280 | def calculate_huber_loss(td_errors, k=1.0):
281 |     """
282 |     Calculate huber loss element-wisely depending on kappa k.
283 |     """
284 |     loss = torch.where(td_errors.abs() <= k, 0.5 * td_errors.pow(2), k * (td_errors.abs() - 0.5 * k))
285 |     assert loss.shape == (td_errors.shape[0], 8, 8), "huber loss has wrong shape"
286 |     return loss


--------------------------------------------------------------------------------
/policy/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from collections import deque
 3 | import torch
 4 | import copy
 5 | 
 6 | class ReplayBuffer:
 7 |     """Fixed-size buffer to store experience tuples."""
 8 | 
 9 |     def __init__(self, buffer_size, batch_size):
10 |         """
11 |         Params
12 |         ======
13 |             buffer_size (int): maximum size of buffer
14 |             batch_size (int): size of each training batch
15 |         """
16 |         self.memory = deque(maxlen=buffer_size)  
17 |         self.batch_size = batch_size
18 |     
19 |     def add(self,item):
20 |         """Add a new experience to memory."""
21 |         self.memory.append(item)
22 |     
23 |     def sample(self):
24 |         """Randomly sample a batch of experiences from memory."""
25 |         samples = random.sample(self.memory, k=self.batch_size)
26 |         states = []
27 |         actions = []
28 |         rewards = []
29 |         next_states = []
30 |         dones = []
31 |         for sample in samples:
32 |             state, action, reward, next_state, done = sample 
33 |             states.append(state)
34 |             actions.append(action)
35 |             rewards.append(reward)
36 |             next_states.append(next_state)
37 |             dones.append(done)
38 | 
39 |         return states, actions, rewards, next_states, dones
40 | 
41 |     def size(self):
42 |         """Return the current size of internal memory."""
43 |         return len(self.memory)
44 | 
45 | 
46 |     
47 | 
48 | 


--------------------------------------------------------------------------------
/policy/trainer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import numpy as np
  3 | import os
  4 | import copy
  5 | import time
  6 | 
  7 | class Trainer():
  8 |     def __init__(self,
  9 |                  train_env,
 10 |                  eval_env,
 11 |                  eval_schedule,
 12 |                  non_cooperative_agent=None,
 13 |                  cooperative_agent=None,
 14 |                  UPDATE_EVERY=4,
 15 |                  learning_starts=2000,
 16 |                  target_update_interval=10000,
 17 |                  exploration_fraction=0.25,
 18 |                  initial_eps=0.6,
 19 |                  final_eps=0.05
 20 |                  ):
 21 |         
 22 |         self.train_env = train_env
 23 |         self.eval_env = eval_env
 24 |         self.cooperative_agent = cooperative_agent
 25 |         self.noncooperative_agent = non_cooperative_agent
 26 |         self.eval_config = []
 27 |         self.create_eval_configs(eval_schedule)
 28 | 
 29 |         self.UPDATE_EVERY = UPDATE_EVERY
 30 |         self.learning_starts = learning_starts
 31 |         self.target_update_interval = target_update_interval
 32 |         self.exploration_fraction = exploration_fraction
 33 |         self.initial_eps = initial_eps
 34 |         self.final_eps = final_eps
 35 | 
 36 |         # Current time step
 37 |         self.current_timestep = 0
 38 | 
 39 |         # Learning time step (start counting after learning_starts time step)
 40 |         self.learning_timestep = 0
 41 | 
 42 |         # Evaluation data
 43 |         self.eval_timesteps = []
 44 |         self.eval_actions = []
 45 |         self.eval_trajectories = []
 46 |         self.eval_rewards = []
 47 |         self.eval_successes = []
 48 |         self.eval_times = []
 49 |         self.eval_energies = []
 50 |         self.eval_obs = []
 51 |         self.eval_objs = []
 52 | 
 53 |     def create_eval_configs(self,eval_schedule):
 54 |         self.eval_config.clear()
 55 | 
 56 |         count = 0
 57 |         for i,num_episode in enumerate(eval_schedule["num_episodes"]):
 58 |             for _ in range(num_episode): 
 59 |                 self.eval_env.num_cooperative = eval_schedule["num_cooperative"][i]
 60 |                 self.eval_env.num_non_cooperative = eval_schedule["num_non_cooperative"][i]
 61 |                 self.eval_env.num_cores = eval_schedule["num_cores"][i]
 62 |                 self.eval_env.num_obs = eval_schedule["num_obstacles"][i]
 63 |                 self.eval_env.min_start_goal_dis = eval_schedule["min_start_goal_dis"][i]
 64 | 
 65 |                 self.eval_env.reset()
 66 | 
 67 |                 # save eval config
 68 |                 self.eval_config.append(self.eval_env.episode_data())
 69 |                 count += 1
 70 | 
 71 |     def save_eval_config(self,directory):
 72 |         file = os.path.join(directory,"eval_configs.json")
 73 |         with open(file, "w+") as f:
 74 |             json.dump(self.eval_config, f)
 75 | 
 76 |     def learn(self,
 77 |               total_timesteps,
 78 |               eval_freq,
 79 |               eval_log_path,
 80 |               verbose=True):
 81 |         
 82 |         states,_,_ = self.train_env.reset()
 83 | 
 84 |         # # Sample CVaR value from (0.0,1.0)
 85 |         # cvar = 1 - np.random.uniform(0.0, 1.0)
 86 | 
 87 |         # current episode 
 88 |         ep_rewards = np.zeros(len(self.train_env.robots))
 89 |         ep_deactivated_t = [-1]*len(self.train_env.robots)
 90 |         ep_length = 0
 91 |         ep_num = 0
 92 |         
 93 |         while self.current_timestep <= total_timesteps:
 94 |             
 95 |             # start_all = time.time()
 96 |             eps = self.linear_eps(total_timesteps)
 97 |             
 98 |             # gather actions for robots from agents 
 99 |             # start_1 = time.time()
100 |             actions = []
101 |             for i,rob in enumerate(self.train_env.robots):
102 |                 if rob.deactivated:
103 |                     actions.append(None)
104 |                     continue
105 | 
106 |                 if rob.cooperative:
107 |                     if self.cooperative_agent.use_iqn:
108 |                         action,_,_ = self.cooperative_agent.act(states[i],eps)
109 |                     else:
110 |                         action,_ = self.cooperative_agent.act_dqn(states[i],eps)
111 |                 else:
112 |                     if self.noncooperative_agent.use_iqn:
113 |                         action,_,_ = self.noncooperative_agent.act(states[i],eps)
114 |                     else:
115 |                         action,_ = self.noncooperative_agent.act_dqn(states[i],eps)
116 |                 actions.append(action)
117 |             # end_1 = time.time()
118 |             # elapsed_time_1 = end_1 - start_1
119 |             # if self.current_timestep % 100 == 0:
120 |             #     print("Elapsed time 1: {:.6f} seconds".format(elapsed_time_1))
121 | 
122 |             # start_2 = time.time()
123 |             # execute actions in the training environment
124 |             next_states, rewards, dones, infos = self.train_env.step(actions)
125 |             # end_2 = time.time()
126 |             # elapsed_time_2 = end_2 - start_2
127 |             # if self.current_timestep % 100 == 0:
128 |             #     print("Elapsed time 2: {:.6f} seconds".format(elapsed_time_2))
129 | 
130 |             # save experience in replay memory
131 |             for i,rob in enumerate(self.train_env.robots):
132 |                 if rob.deactivated:
133 |                     continue
134 | 
135 |                 if rob.cooperative:
136 |                     ep_rewards[i] += self.cooperative_agent.GAMMA ** ep_length * rewards[i]
137 |                     if self.cooperative_agent.training:
138 |                         self.cooperative_agent.memory.add((states[i], actions[i], rewards[i], next_states[i], dones[i]))
139 |                 else:
140 |                     ep_rewards[i] += self.noncooperative_agent.GAMMA ** ep_length * rewards[i]
141 |                     if self.noncooperative_agent.training:
142 |                         self.noncooperative_agent.memory.add((states[i], actions[i], rewards[i], next_states[i], dones[i]))
143 |                 
144 |                 if rob.collision or rob.reach_goal:
145 |                     rob.deactivated = True
146 |                     ep_deactivated_t[i] = ep_length
147 | 
148 |             end_episode = (ep_length >= 1000) or self.train_env.check_all_deactivated()
149 |             
150 |             # Learn, update and evaluate models after learning_starts time step 
151 |             if self.current_timestep >= self.learning_starts:
152 |                 # start_3 = time.time()
153 | 
154 |                 for agent in [self.cooperative_agent,self.noncooperative_agent]:
155 |                     if agent is None:
156 |                         continue
157 | 
158 |                     if not agent.training:
159 |                         continue
160 | 
161 |                     # Learn every UPDATE_EVERY time steps.
162 |                     if self.current_timestep % self.UPDATE_EVERY == 0:
163 |                         # If enough samples are available in memory, get random subset and learn
164 |                         if agent.memory.size() > agent.BATCH_SIZE:
165 |                             agent.train()
166 | 
167 |                     # Update the target model every target_update_interval time steps
168 |                     if self.current_timestep % self.target_update_interval == 0:
169 |                         agent.soft_update()
170 | 
171 |                 # end_3 = time.time()
172 |                 # elapsed_time_3 = end_3 - start_3
173 |                 # if self.current_timestep % 100 == 0:
174 |                 #     print("Elapsed time 3: {:.6f} seconds".format(elapsed_time_3))
175 | 
176 |                 # Evaluate learning agents every eval_freq time steps
177 |                 if self.current_timestep == self.learning_starts or self.current_timestep % eval_freq == 0: 
178 |                     self.evaluation()
179 |                     self.save_evaluation(eval_log_path)
180 | 
181 |                     for agent in [self.cooperative_agent,self.noncooperative_agent]:
182 |                         if agent is None:
183 |                             continue
184 |                         if not agent.training:
185 |                             continue
186 |                         # save the latest models
187 |                         agent.save_latest_model(eval_log_path)
188 | 
189 |                 # self.learning_timestep += 1
190 | 
191 |             if end_episode:
192 |                 ep_num += 1
193 |                 
194 |                 if verbose:
195 |                     # print abstract info of learning process
196 |                     print("======== Episode Info ========")
197 |                     print("current ep_length: ",ep_length)
198 |                     print("current ep_num: ",ep_num)
199 |                     print("current exploration rate: ",eps)
200 |                     print("current timesteps: ",self.current_timestep)
201 |                     print("total timesteps: ",total_timesteps)
202 |                     print("======== Episode Info ========\n")
203 |                     print("======== Robots Info ========")
204 |                     for i,rob in enumerate(self.train_env.robots):
205 |                         info = infos[i]["state"]
206 |                         if info == "deactivated after collision" or info == "deactivated after reaching goal":
207 |                             print(f"Robot {i} ep reward: {ep_rewards[i]:.2f}, {info} at step {ep_deactivated_t[i]}")
208 |                         else:
209 |                             print(f"Robot {i} ep reward: {ep_rewards[i]:.2f}, {info}")
210 |                     print("======== Robots Info ========\n") 
211 | 
212 |                 states,_,_ = self.train_env.reset()
213 |                 # cvar = 1 - np.random.uniform(0.0, 1.0)
214 | 
215 |                 ep_rewards = np.zeros(len(self.train_env.robots))
216 |                 ep_deactivated_t = [-1]*len(self.train_env.robots)
217 |                 ep_length = 0
218 |             else:
219 |                 states = next_states
220 |                 ep_length += 1
221 | 
222 |             # end_all = time.time()
223 |             # elapsed_time_all = end_all - start_all
224 |             # if self.current_timestep % 100 == 0:
225 |             #     print("one step elapsed time: {:.6f} seconds".format(elapsed_time_all))
226 |             
227 |             self.current_timestep += 1
228 | 
229 |     def linear_eps(self,total_timesteps):
230 |         
231 |         progress = self.current_timestep / total_timesteps
232 |         if progress < self.exploration_fraction:
233 |             r = progress / self.exploration_fraction
234 |             return self.initial_eps + r * (self.final_eps - self.initial_eps)
235 |         else:
236 |             return self.final_eps
237 | 
238 |     def evaluation(self):
239 |         """Evaluate performance of the agent
240 |         Params
241 |         ======
242 |             eval_env (gym compatible env): evaluation environment
243 |             eval_config: eval envs config file
244 |         """
245 |         actions_data = []
246 |         trajectories_data = []
247 |         rewards_data = []
248 |         successes_data = []
249 |         times_data = []
250 |         energies_data = []
251 |         obs_data = []
252 |         objs_data = []
253 |         
254 |         for idx, config in enumerate(self.eval_config):
255 |             print(f"Evaluating episode {idx}")
256 |             state,_,_ = self.eval_env.reset_with_eval_config(config)
257 |             obs = [[copy.deepcopy(rob.perception.observed_obs)] for rob in self.eval_env.robots]
258 |             objs = [[copy.deepcopy(rob.perception.observed_objs)] for rob in self.eval_env.robots]
259 |             
260 |             rob_num = len(self.eval_env.robots)
261 | 
262 |             rewards = [0.0]*rob_num
263 |             times = [0.0]*rob_num
264 |             energies = [0.0]*rob_num
265 |             end_episode = False
266 |             length = 0
267 |             
268 |             while not end_episode:
269 |                 # gather actions for robots from agents 
270 |                 action = []
271 |                 for i,rob in enumerate(self.eval_env.robots):
272 |                     if rob.deactivated:
273 |                         action.append(None)
274 |                         continue
275 | 
276 |                     if rob.cooperative:
277 |                         if self.cooperative_agent.use_iqn:
278 |                             a,_,_ = self.cooperative_agent.act(state[i])
279 |                         else:
280 |                             a,_ = self.cooperative_agent.act_dqn(state[i])
281 |                     else:
282 |                         if self.noncooperative_agent.use_iqn:
283 |                             a,_,_ = self.noncooperative_agent.act(state[i])
284 |                         else:
285 |                             a,_ = self.noncooperative_agent.act_dqn(state[i])
286 | 
287 |                     action.append(a)
288 | 
289 |                 # execute actions in the training environment
290 |                 state, reward, done, info = self.eval_env.step(action)
291 |                 
292 |                 for i,rob in enumerate(self.eval_env.robots):
293 |                     if rob.deactivated:
294 |                         continue
295 |                     
296 |                     if rob.cooperative:
297 |                         rewards[i] += self.cooperative_agent.GAMMA ** length * reward[i]
298 |                     else:
299 |                         rewards[i] += self.noncooperative_agent.GAMMA ** length * reward[i]
300 |                     times[i] += rob.dt * rob.N
301 |                     energies[i] += rob.compute_action_energy_cost(action[i])
302 |                     obs[i].append(copy.deepcopy(rob.perception.observed_obs))
303 |                     objs[i].append(copy.deepcopy(rob.perception.observed_objs))
304 | 
305 |                     if rob.collision or rob.reach_goal:
306 |                         rob.deactivated = True
307 | 
308 |                 end_episode = (length >= 1000) or self.eval_env.check_any_collision() or self.eval_env.check_all_deactivated()
309 |                 length += 1
310 | 
311 |             actions = []
312 |             trajectories = []
313 |             for rob in self.eval_env.robots:
314 |                 actions.append(copy.deepcopy(rob.action_history))
315 |                 trajectories.append(copy.deepcopy(rob.trajectory))
316 | 
317 |             success = True if self.eval_env.check_all_reach_goal() else False
318 | 
319 |             actions_data.append(actions)
320 |             trajectories_data.append(trajectories)
321 |             rewards_data.append(np.mean(rewards))
322 |             successes_data.append(success)
323 |             times_data.append(np.mean(times))
324 |             energies_data.append(np.mean(energies))
325 |             obs_data.append(obs)
326 |             objs_data.append(objs)
327 |         
328 |         avg_r = np.mean(rewards_data)
329 |         success_rate = np.sum(successes_data)/len(successes_data)
330 |         idx = np.where(np.array(successes_data) == 1)[0]
331 |         avg_t = None if np.shape(idx)[0] == 0 else np.mean(np.array(times_data)[idx])
332 |         avg_e = None if np.shape(idx)[0] == 0 else np.mean(np.array(energies_data)[idx])
333 | 
334 |         print(f"++++++++ Evaluation Info ++++++++")
335 |         print(f"Avg cumulative reward: {avg_r:.2f}")
336 |         print(f"Success rate: {success_rate:.2f}")
337 |         if avg_t is not None:
338 |             print(f"Avg time: {avg_t:.2f}")
339 |             print(f"Avg energy: {avg_e:.2f}")
340 |         print(f"++++++++ Evaluation Info ++++++++\n")
341 | 
342 |         self.eval_timesteps.append(self.current_timestep)
343 |         self.eval_actions.append(actions_data)
344 |         self.eval_trajectories.append(trajectories_data)
345 |         self.eval_rewards.append(rewards_data)
346 |         self.eval_successes.append(successes_data)
347 |         self.eval_times.append(times_data)
348 |         self.eval_energies.append(energies_data)
349 |         self.eval_obs.append(obs_data)
350 |         self.eval_objs.append(objs_data)
351 | 
352 |     def save_evaluation(self,eval_log_path):
353 |         filename = "evaluations.npz"
354 |         
355 |         np.savez(
356 |             os.path.join(eval_log_path,filename),
357 |             timesteps=np.array(self.eval_timesteps,dtype=object),
358 |             actions=np.array(self.eval_actions,dtype=object),
359 |             trajectories=np.array(self.eval_trajectories,dtype=object),
360 |             rewards=np.array(self.eval_rewards,dtype=object),
361 |             successes=np.array(self.eval_successes,dtype=object),
362 |             times=np.array(self.eval_times,dtype=object),
363 |             energies=np.array(self.eval_energies,dtype=object),
364 |             obs=np.array(self.eval_obs,dtype=object),
365 |             objs=np.array(self.eval_objs,dtype=object)
366 |         )


--------------------------------------------------------------------------------
/pretrained_models/DQN/seed_9/dqn_constructor_params.json:
--------------------------------------------------------------------------------
1 | {"self_dimension": 4, "static_dimension": 15, "dynamic_dimension": 20, "self_feature_dimension": 32, "static_feature_dimension": 120, "dynamic_feature_dimension": 160, "hidden_dimension": 156, "action_size": 9, "seed": 109}


--------------------------------------------------------------------------------
/pretrained_models/DQN/seed_9/dqn_network_params.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobustFieldAutonomyLab/Multi_Robot_Distributional_RL_Navigation/889bfed3aded1fc2edc1967090517f22e14de7a0/pretrained_models/DQN/seed_9/dqn_network_params.pth


--------------------------------------------------------------------------------
/pretrained_models/IQN/seed_9/constructor_params.json:
--------------------------------------------------------------------------------
1 | {"self_dimension": 4, "static_dimension": 15, "dynamic_dimension": 20, "self_feature_dimension": 32, "static_feature_dimension": 120, "dynamic_feature_dimension": 160, "hidden_dimension": 156, "action_size": 9, "seed": 109}


--------------------------------------------------------------------------------
/pretrained_models/IQN/seed_9/network_params.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobustFieldAutonomyLab/Multi_Robot_Distributional_RL_Navigation/889bfed3aded1fc2edc1967090517f22e14de7a0/pretrained_models/IQN/seed_9/network_params.pth


--------------------------------------------------------------------------------
/run_experiments.py:
--------------------------------------------------------------------------------
  1 | import marinenav_env.envs.marinenav_env as marinenav_env
  2 | from policy.agent import Agent
  3 | import numpy as np
  4 | import copy
  5 | import scipy.spatial
  6 | import json
  7 | from datetime import datetime
  8 | import time as t_module
  9 | import os
 10 | import matplotlib.pyplot as plt
 11 | import APF
 12 | import sys
 13 | sys.path.insert(0,"./thirdparty")
 14 | import RVO
 15 | 
 16 | def evaluation(state, agent, eval_env, use_rl=True, use_iqn=True, act_adaptive=True, save_episode=False):
 17 |     """Evaluate performance of the agent
 18 |     """
 19 |     
 20 |     rob_num = len(eval_env.robots)
 21 | 
 22 | 
 23 |     rewards = [0.0]*rob_num
 24 |     times = [0.0]*rob_num
 25 |     energies = [0.0]*rob_num
 26 |     computation_times = []
 27 | 
 28 |     end_episode = False
 29 |     length = 0
 30 |     
 31 |     while not end_episode:
 32 |         # gather actions for robots from agents 
 33 |         action = []
 34 |         for i,rob in enumerate(eval_env.robots):
 35 |             if rob.deactivated:
 36 |                 action.append(None)
 37 |                 continue
 38 | 
 39 |             assert rob.cooperative, "Every robot must be cooperative!"
 40 |         
 41 |             start = t_module.time()
 42 |             if use_rl:
 43 |                 if use_iqn:
 44 |                     if act_adaptive:
 45 |                         a,_,_,_ = agent.act_adaptive(state[i])
 46 |                     else:
 47 |                         a,_,_ = agent.act(state[i])
 48 |                 else:
 49 |                     a,_ = agent.act_dqn(state[i])
 50 |             else:
 51 |                 a = agent.act(state[i])
 52 |             end = t_module.time()
 53 |             computation_times.append(end-start)
 54 | 
 55 |             action.append(a)
 56 | 
 57 |         # execute actions in the training environment
 58 |         state, reward, done, info = eval_env.step(action)
 59 |         
 60 |         for i,rob in enumerate(eval_env.robots):
 61 |             if rob.deactivated:
 62 |                 continue
 63 |             
 64 |             assert rob.cooperative, "Every robot must be cooperative!"
 65 |             
 66 |             rewards[i] += agent.GAMMA ** length * reward[i]
 67 | 
 68 |             times[i] += rob.dt * rob.N
 69 |             energies[i] += rob.compute_action_energy_cost(action[i])
 70 | 
 71 |             if rob.collision or rob.reach_goal:
 72 |                 rob.deactivated = True
 73 | 
 74 |         end_episode = (length >= 360) or eval_env.check_all_deactivated()
 75 |         length += 1
 76 | 
 77 |     success = True if eval_env.check_all_reach_goal() else False
 78 |     # success = 0
 79 |     # for rob in eval_env.robots:
 80 |     #     if rob.reach_goal:
 81 |     #         success += 1
 82 | 
 83 |     # save time and energy data of robots that reach goal
 84 |     success_times = []
 85 |     success_energies = []
 86 |     for i,rob in enumerate(eval_env.robots):
 87 |         if rob.reach_goal:
 88 |             success_times.append(times[i])
 89 |             success_energies.append(energies[i])
 90 | 
 91 |     if save_episode:
 92 |         trajectories = []
 93 |         for rob in eval_env.robots:
 94 |             trajectories.append(copy.deepcopy(rob.trajectory))
 95 |         return success, rewards, computation_times, success_times, success_energies, trajectories
 96 |     else:
 97 |         return success, rewards, computation_times, success_times, success_energies
 98 | 
 99 | def exp_setup(envs,eval_schedule,i):
100 |     observations = []
101 | 
102 |     for test_env in envs:
103 |         test_env.num_cooperative = eval_schedule["num_cooperative"][i]
104 |         test_env.num_non_cooperative = eval_schedule["num_non_cooperative"][i]
105 |         test_env.num_cores = eval_schedule["num_cores"][i]
106 |         test_env.num_obs = eval_schedule["num_obstacles"][i]
107 |         test_env.min_start_goal_dis = eval_schedule["min_start_goal_dis"][i]
108 | 
109 |         # save eval config
110 |         state,_,_ = test_env.reset()
111 |         observations.append(state)
112 | 
113 |     return observations
114 | 
115 | def dashboard(eval_schedule,i):
116 |     print("\n======== eval schedule ========")
117 |     print("num of cooperative agents: ",eval_schedule["num_cooperative"][i])
118 |     print("num of non-cooperative agents: ",eval_schedule["num_non_cooperative"][i])
119 |     print("num of cores: ",eval_schedule["num_cores"][i])
120 |     print("num of obstacles: ",eval_schedule["num_obstacles"][i])
121 |     print("min start goal dis: ",eval_schedule["min_start_goal_dis"][i])
122 |     print("======== eval schedule ========\n") 
123 | 
124 | def run_experiment(eval_schedules):
125 |     agents = [adaptive_IQN_agent,IQN_agent,DQN_agent,APF_agent,RVO_agent]
126 |     names = ["adaptive_IQN","IQN","DQN","APF","RVO"]
127 |     envs = [test_env_0,test_env_1,test_env_2,test_env_3,test_env_4]
128 |     evaluations = [evaluation,evaluation,evaluation,evaluation,evaluation]
129 | 
130 |     colors = ["b","g","r","tab:orange","m"]
131 | 
132 |     save_trajectory = True
133 | 
134 |     dt = datetime.now()
135 |     timestamp = dt.strftime("%Y-%m-%d-%H-%M-%S")
136 | 
137 |     robot_nums = []
138 |     # all_test_rob_exp = []
139 |     all_successes_exp = []
140 |     all_rewards_exp = []
141 |     all_success_times_exp = []
142 |     all_success_energies_exp =[]
143 |     if save_trajectory:
144 |         all_trajectories_exp = []
145 |         all_eval_configs_exp = []
146 | 
147 |     for idx,count in enumerate(eval_schedules["num_episodes"]):
148 |         dashboard(eval_schedules,idx)
149 | 
150 |         robot_nums.append(eval_schedules["num_cooperative"][idx])
151 |         # all_test_rob = [0]*len(agents)
152 |         all_successes = [[] for _ in agents]
153 |         all_rewards = [[] for _ in agents]
154 |         all_computation_times = [[] for _ in agents]
155 |         all_success_times = [[] for _ in agents]
156 |         all_success_energies = [[] for _ in agents]
157 |         if save_trajectory:
158 |             all_trajectories = [[] for _ in agents]
159 |             all_eval_configs = [[] for _ in agents]
160 | 
161 |         for i in range(count):
162 |             print("Evaluating all agents on episode ",i)
163 |             observations = exp_setup(envs,eval_schedules,idx)
164 |             for j in range(len(agents)):
165 |                 agent = agents[j]
166 |                 env = envs[j]
167 |                 eval_func = evaluations[j]
168 |                 name = names[j]
169 |                 
170 |                 if save_trajectory:
171 |                     all_eval_configs[j].append(env.episode_data())
172 | 
173 |                 # obs = env.reset()
174 |                 obs = observations[j]
175 |                 if save_trajectory:
176 |                     if name == "adaptive_IQN":
177 |                         success, rewards, computation_times, success_times, success_energies, trajectories = eval_func(obs,agent,env,save_episode=True)
178 |                     elif name == "IQN":
179 |                         success, rewards, computation_times, success_times, success_energies, trajectories = eval_func(obs,agent,env,act_adaptive=False,save_episode=True)
180 |                     elif name == "DQN":
181 |                         success, rewards, computation_times, success_times, success_energies, trajectories = eval_func(obs,agent,env,use_iqn=False,save_episode=True)
182 |                     elif name == "APF":
183 |                         success, rewards, computation_times, success_times, success_energies, trajectories = eval_func(obs,agent,env,use_rl=False,save_episode=True)
184 |                     elif name == "RVO":
185 |                         success, rewards, computation_times, success_times, success_energies, trajectories = eval_func(obs,agent,env,use_rl=False,save_episode=True)
186 |                     else:
187 |                         raise RuntimeError("Agent not implemented!")
188 |                 else:
189 |                     if name == "adaptive_IQN":
190 |                         success, rewards, computation_times, success_times, success_energies = eval_func(obs,agent,env)
191 |                     elif name == "IQN":
192 |                         success, rewards, computation_times, success_times, success_energies = eval_func(obs,agent,env,act_adaptive=False)
193 |                     elif name == "DQN":
194 |                         success, rewards, computation_times, success_times, success_energies = eval_func(obs,agent,env,use_iqn=False)
195 |                     elif name == "APF":
196 |                         success, rewards, computation_times, success_times, success_energies = eval_func(obs,agent,env,use_rl=False)
197 |                     elif name == "RVO":
198 |                         success, rewards, computation_times, success_times, success_energies = eval_func(obs,agent,env,use_rl=False)
199 |                     else:
200 |                         raise RuntimeError("Agent not implemented!")
201 | 
202 |                 all_successes[j].append(success)
203 |                 # all_test_rob[j] += eval_schedules["num_cooperative"][idx]
204 |                 # all_successes[j] += success
205 |                 all_rewards[j] += rewards
206 |                 all_computation_times[j] += computation_times
207 |                 all_success_times[j] += success_times
208 |                 all_success_energies[j] += success_energies
209 |                 if save_trajectory:
210 |                     all_trajectories[j].append(copy.deepcopy(trajectories))
211 |         
212 |         for k,name in enumerate(names):
213 |             s_rate = np.sum(all_successes[k])/len(all_successes[k])
214 |             # s_rate = all_successes[k]/all_test_rob[k]
215 |             avg_r = np.mean(all_rewards[k])
216 |             avg_compute_t = np.mean(all_computation_times[k])
217 |             avg_t = np.mean(all_success_times[k])
218 |             avg_e = np.mean(all_success_energies[k])
219 |             print(f"{name} | success rate: {s_rate:.2f} | avg_reward: {avg_r:.2f} | avg_compute_t: {avg_compute_t} | \
220 |                   avg_t: {avg_t:.2f} | avg_e: {avg_e:.2f}")
221 |         
222 |         print("\n")
223 | 
224 |         # all_test_rob_exp.append(all_test_rob)
225 |         all_successes_exp.append(all_successes)
226 |         all_rewards_exp.append(all_rewards)
227 |         all_success_times_exp.append(all_success_times)
228 |         all_success_energies_exp.append(all_success_energies)
229 |         if save_trajectory:
230 |             all_trajectories_exp.append(copy.deepcopy(all_trajectories))
231 |             all_eval_configs_exp.append(copy.deepcopy(all_eval_configs))
232 | 
233 |     # save data
234 |     if save_trajectory:
235 |         exp_data = dict(eval_schedules=eval_schedules,
236 |                         names=names,
237 |                         all_successes_exp=all_successes_exp,
238 |                         all_rewards_exp=all_rewards_exp,
239 |                         all_success_times_exp=all_success_times_exp,
240 |                         all_success_energies_exp=all_success_energies_exp,
241 |                         all_trajectories_exp=all_trajectories_exp,
242 |                         all_eval_configs_exp=all_eval_configs_exp 
243 |                     )
244 |     else:
245 |         exp_data = dict(eval_schedules=eval_schedules,
246 |                         names=names,
247 |                         all_successes_exp=all_successes_exp,
248 |                         all_rewards_exp=all_rewards_exp,
249 |                         all_success_times_exp=all_success_times_exp,
250 |                         all_success_energies_exp=all_success_energies_exp,
251 |                     )
252 | 
253 | 
254 |     exp_dir = f"experiment_data/exp_data_{timestamp}"
255 |     os.makedirs(exp_dir)
256 | 
257 |     filename = os.path.join(exp_dir,"exp_results.json")
258 |     with open(filename,"w") as file:
259 |         json.dump(exp_data,file)
260 | 
261 |     fig1, ax1 = plt.subplots()
262 |     fig2, ax2 = plt.subplots()
263 |     fig3, ax3 = plt.subplots()
264 |     bar_width = 0.25
265 |     interval_scale = 1.5
266 |     set_label = [True]*len(names)
267 |     for i,robot_num in enumerate(robot_nums):
268 |          
269 |         all_successes = all_successes_exp[i]
270 |         all_success_times = all_success_times_exp[i]
271 |         all_success_energies = all_success_energies_exp[i] 
272 |         for j,pos in enumerate([-2*bar_width,-bar_width,0.0,bar_width,2*bar_width]):
273 |             # bar plot for success rate
274 |             s_rate = np.sum(all_successes[j])/len(all_successes[j])
275 |             if set_label[j]:
276 |                 ax1.bar(interval_scale*i+pos,s_rate,0.8*bar_width,color=colors[j],label=names[j])
277 |                 set_label[j] = False
278 |             else:
279 |                 ax1.bar(interval_scale*i+pos,s_rate,0.8*bar_width,color=colors[j])
280 |             
281 |             # box plot for time
282 |             box = ax2.boxplot(all_success_times[j],positions=[interval_scale*i+pos],flierprops={'marker': '.','markersize': 1},patch_artist=True)
283 |             for patch in box["boxes"]:
284 |                 patch.set_facecolor(colors[j])
285 |             for line in box["medians"]:
286 |                 line.set_color("black")
287 | 
288 |             # box plot for energy
289 |             box = ax3.boxplot(all_success_energies[j],positions=[interval_scale*i+pos],flierprops={'marker': '.','markersize': 1},patch_artist=True)
290 |             for patch in box["boxes"]:
291 |                 patch.set_facecolor(colors[j])
292 |             for line in box["medians"]:
293 |                 line.set_color("black")
294 |     
295 |     ax1.set_xticks(interval_scale*np.arange(len(robot_nums)))
296 |     ax1.set_xticklabels(robot_nums)
297 |     ax1.set_title("Success Rate")
298 |     ax1.legend()
299 | 
300 |     ax2.set_xticks(interval_scale*np.arange(len(robot_nums)))
301 |     ax2.set_xticklabels([str(robot_num) for robot_num in eval_schedules["num_cooperative"]])
302 |     ax2.set_title("Time")
303 | 
304 |     ax3.set_xticks(interval_scale*np.arange(len(robot_nums)))
305 |     ax3.set_xticklabels([str(robot_num) for robot_num in eval_schedules["num_cooperative"]])
306 |     ax3.set_title("Energy")
307 | 
308 |     fig1.savefig(os.path.join(exp_dir,"success_rate.png"))
309 |     fig2.savefig(os.path.join(exp_dir,"time.png"))
310 |     fig3.savefig(os.path.join(exp_dir,"energy.png"))
311 | 
312 | 
313 | if __name__ == "__main__":
314 |     seed = 3 # PRNG seed for all testing envs
315 | 
316 |     ##### adaptive IQN #####
317 |     test_env_0 = marinenav_env.MarineNavEnv2(seed)
318 | 
319 |     save_dir = "pretrained_models/IQN/seed_9"
320 |     device = "cpu"
321 | 
322 |     adaptive_IQN_agent = Agent(cooperative=True,device=device)
323 |     adaptive_IQN_agent.load_model(save_dir,"cooperative",device)
324 |     ##### adaptive IQN #####
325 | 
326 |     ##### IQN #####
327 |     test_env_1 = marinenav_env.MarineNavEnv2(seed)
328 | 
329 |     save_dir = "pretrained_models/IQN/seed_9"
330 |     device = "cpu"
331 | 
332 |     IQN_agent = Agent(cooperative=True,device=device)
333 |     IQN_agent.load_model(save_dir,"cooperative",device)
334 |     ##### IQN #####
335 | 
336 |     ##### DQN #####
337 |     test_env_2 = marinenav_env.MarineNavEnv2(seed)
338 | 
339 |     save_dir = "pretrained_models/DQN/seed_9"
340 |     device = "cpu"
341 | 
342 |     DQN_agent = Agent(cooperative=True,device=device,use_iqn=False)
343 |     DQN_agent.load_model(save_dir,"cooperative",device)
344 |     ##### DQN #####
345 | 
346 |     ##### APF #####
347 |     test_env_3 = marinenav_env.MarineNavEnv2(seed)
348 | 
349 |     APF_agent = APF.APF_agent(test_env_3.robots[0].a,test_env_3.robots[0].w)
350 |     ##### APF #####
351 | 
352 |     ##### RVO #####
353 |     test_env_4 = marinenav_env.MarineNavEnv2(seed)
354 | 
355 |     RVO_agent = RVO.RVO_agent(test_env_4.robots[0].a,test_env_4.robots[0].w,test_env_4.robots[0].max_speed)
356 |     ##### RVO #####
357 | 
358 |     eval_schedules = dict(num_episodes=[100,100,100,100,100],
359 |                           num_cooperative=[3,4,5,6,7],
360 |                           num_non_cooperative=[0,0,0,0,0],
361 |                           num_cores=[4,5,6,7,8],
362 |                           num_obstacles=[4,5,6,7,8],
363 |                           min_start_goal_dis=[40.0,40.0,40.0,40.0,40.0]
364 |                          )
365 | 
366 |     run_experiment(eval_schedules)
367 | 
368 | 


--------------------------------------------------------------------------------
/scripts/plot_eval_returns.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib as mpl
 4 | import matplotlib.ticker as ticker
 5 | import os
 6 | 
 7 | if __name__ == "__main__":
 8 |     first = True
 9 |     seeds = [9]
10 |     eval_agents = ["IQN","DQN"]
11 |     colors = ["b","tab:orange"]
12 |     data_dirs = ["your/IQN/training/evaluation/file/directory",
13 |                  "your/DQN/training/evaluation/file/directory"]
14 | 
15 |     fig, (ax_rewards,ax_success_rate) = plt.subplots(1,2,figsize=(16,6))
16 |     
17 |     IQN_final = []
18 |     DQN_final = []
19 | 
20 |     for idx, eval_agent in enumerate(eval_agents):
21 |         all_rewards = []
22 |         all_success_rates = []
23 |         print(f"===== Plotting {eval_agent} results =====")
24 |         for seed in seeds:
25 |             seed_dir = "seed_"+str(seed)
26 |             eval_data = np.load(os.path.join(data_dirs[idx],seed_dir,"evaluations.npz"),allow_pickle=True)
27 | 
28 |             timesteps = np.array(eval_data['timesteps'],dtype=np.float64)
29 |             rewards = np.mean(eval_data['rewards'],axis=1)
30 |             success_rates = []
31 |             for i in range(len(eval_data['timesteps'])):
32 |                 successes = eval_data['successes'][i]
33 |                 success_rates.append(np.sum(successes)/len(successes))
34 | 
35 |             if eval_agent == "IQN":
36 |                 IQN_final.append(success_rates[-1])
37 |             else:
38 |                 DQN_final.append(success_rates[-1])
39 | 
40 |             all_rewards.append(rewards.tolist())
41 |             all_success_rates.append(success_rates)
42 | 
43 |         all_rewards_mean = np.mean(all_rewards,axis=0)
44 |         all_rewards_std = np.std(all_rewards,axis=0)/np.sqrt(np.shape(all_rewards)[0])
45 |         all_success_rates_mean = np.mean(all_success_rates,axis=0)
46 |         all_success_rates_std = np.std(all_success_rates,axis=0)
47 | 
48 |         mpl.rcParams["font.size"]=25
49 |         [x.set_linewidth(1.5) for x in ax_rewards.spines.values()]
50 |         ax_rewards.tick_params(axis="x", labelsize=22)
51 |         ax_rewards.tick_params(axis="y", labelsize=22)
52 |         ax_rewards.plot(timesteps,all_rewards_mean,linewidth=3,label=eval_agent,c=colors[idx],zorder=5-idx)
53 |         ax_rewards.fill_between(timesteps,all_rewards_mean+all_rewards_std,all_rewards_mean-all_rewards_std,alpha=0.2,color=colors[idx],zorder=5-idx)
54 |         ax_rewards.xaxis.set_ticks(np.arange(0,eval_data['timesteps'][-1]+1,1000000))
55 |         scale_x = 1e-5
56 |         ticks_x = ticker.FuncFormatter(lambda x, pos:'{0:g}'.format(x*scale_x))
57 |         ax_rewards.xaxis.set_major_formatter(ticks_x)
58 |         ax_rewards.set_xlabel("Timestep(x10^5)",fontsize=25)
59 |         ax_rewards.set_title("Cumulative Reward",fontsize=25,fontweight='bold')
60 | 
61 |         [x.set_linewidth(1.5) for x in ax_success_rate.spines.values()]
62 |         ax_success_rate.tick_params(axis="x", labelsize=22)
63 |         ax_success_rate.tick_params(axis="y", labelsize=22)
64 |         ax_success_rate.plot(timesteps,all_success_rates_mean,linewidth=3,label=eval_agent,c=colors[idx],zorder=5-idx)
65 |         ax_success_rate.fill_between(timesteps,all_success_rates_mean+all_success_rates_std,all_success_rates_mean-all_success_rates_std,alpha=0.2,color=colors[idx],zorder=5-idx)
66 |         ax_success_rate.xaxis.set_ticks(np.arange(0,eval_data['timesteps'][-1]+1,1000000))
67 |         scale_x = 1e-5
68 |         ticks_x = ticker.FuncFormatter(lambda x, pos:'{0:g}'.format(x*scale_x))
69 |         ax_success_rate.xaxis.set_major_formatter(ticks_x)
70 |         ax_success_rate.set_xlabel("Timestep(x10^5)",fontsize=25)
71 |         ax_success_rate.yaxis.set_ticks(np.arange(0,1.1,0.2))
72 |         ax_success_rate.set_title("Success Rate",fontsize=25,fontweight='bold')
73 |         ax_success_rate.legend(loc="lower right",bbox_to_anchor=(1, 0.35))
74 | 
75 |     print(f"IQN final success rates of all models: {IQN_final}")
76 |     print(f"DQN final success rates of all models: {DQN_final}")
77 | 
78 |     fig.tight_layout()
79 |     fig.savefig("learning_curves.png")
80 | 


--------------------------------------------------------------------------------
/scripts/visualize_eval_episode.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0,"../")
 3 | import env_visualizer
 4 | import os
 5 | 
 6 | if __name__ == "__main__":
 7 |     dir = "your/training/evaluation/file/directory"
 8 | 
 9 |     eval_configs = os.path.join(dir,"eval_configs.json")
10 |     evaluations = os.path.join(dir,"evaluations.npz")
11 | 
12 |     ev = env_visualizer.EnvVisualizer(seed=231)
13 | 
14 |     colors = ["r","lime","cyan","orange","tab:olive","white","chocolate"]
15 | 
16 |     eval_id = -1
17 |     eval_episode = 0
18 | 
19 |     ev.load_eval_config_and_episode(eval_configs,evaluations)
20 |     ev.play_eval_episode(eval_id,eval_episode,colors)


--------------------------------------------------------------------------------
/scripts/visualize_exp_episode.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0,"../")
 3 | import env_visualizer
 4 | import json
 5 | import numpy as np
 6 | from policy.agent import Agent
 7 | 
 8 | if __name__ == "__main__":
 9 |     filename = "your/experiment/result/file"
10 | 
11 |     with open(filename,"r") as f:
12 |         exp_data = json.load(f)
13 | 
14 | 
15 |     schedule_id = -1
16 |     agent_id = 0
17 |     ep_id = 0
18 |     
19 |     colors = ["r","lime","cyan","orange","tab:olive","white","chocolate"]
20 | 
21 | 
22 |     ev = env_visualizer.EnvVisualizer()
23 | 
24 |     ev.env.reset_with_eval_config(exp_data["all_eval_configs_exp"][schedule_id][agent_id][ep_id])
25 |     ev.init_visualize()
26 | 
27 |     ev.play_episode(trajectories=exp_data["all_trajectories_exp"][schedule_id][agent_id][ep_id],
28 |                     colors=colors)
29 |     


--------------------------------------------------------------------------------
/system_requirements:
--------------------------------------------------------------------------------
1 | ubuntu 20.04
2 | gym 0.19.0


--------------------------------------------------------------------------------
/thirdparty/RVO.py:
--------------------------------------------------------------------------------
  1 | # from math import ceil, floor, sqrt
  2 | import copy
  3 | import numpy as np
  4 | 
  5 | # from math import cos, sin, tan, atan2, asin
  6 | 
  7 | # from math import pi as PI
  8 | 
  9 | class RVO_agent:
 10 | 
 11 |     def __init__(self, a, w, max_vel):
 12 |         self.min_vel = 1.0 # if velocity is lower than the threshold, mandate acceleration
 13 |         self.max_vel = max_vel
 14 | 
 15 |         self.a = a # available linear acceleration (action 1)
 16 |         self.w = w # available angular velocity (action 2)
 17 | 
 18 |         self.rob_r = 2*0.8
 19 |         self.GAMMA = 0.99
 20 | 
 21 |     def distance(self,pose1,pose2):
 22 |         """ compute Euclidean distance for 2D """
 23 |         return np.sqrt((pose1[0]-pose2[0])**2+(pose1[1]-pose2[1])**2)+0.001
 24 | 
 25 |     def act(self, observation):
 26 |         assert len(observation) == 39, "The state size does not equal 39"
 27 | 
 28 |         obs_array = np.array(observation)
 29 |             
 30 |         ego = obs_array[:4]
 31 |         static = obs_array[4:19]
 32 |         dynamic = obs_array[19:]
 33 | 
 34 |         # desired velocity (towards goal)
 35 |         goal = ego[:2]
 36 |         v_desire = self.max_vel * goal / np.linalg.norm(goal)
 37 | 
 38 |         vA = [ego[2],ego[3]]
 39 |         pA = [0.0,0.0]
 40 | 
 41 |         RVO_BA_all = []
 42 | 
 43 |         # velocity obstacle from dynamic obstacles
 44 |         for i in range(0,len(dynamic),4):
 45 |             if np.abs(dynamic[i]) < 1e-3 and np.abs(dynamic[i+1]) < 1e-3:
 46 |                 # padding
 47 |                 continue
 48 | 
 49 |             vB = [dynamic[i+2],dynamic[i+3]]
 50 |             pB = [dynamic[i],dynamic[i+1]]
 51 | 
 52 |             # use RVO
 53 |             transl_vB_vA = [pA[0]+0.5*(vB[0]+vA[0]), pA[1]+0.5*(vB[1]+vA[1])]
 54 | 
 55 |             dist_BA = self.distance(pA, pB)
 56 |             theta_BA = np.arctan2(pB[1]-pA[1], pB[0]-pA[0])
 57 | 
 58 |             if 2*self.rob_r > dist_BA:
 59 |                 dist_BA = 2*self.rob_r
 60 |             
 61 |             theta_BAort = np.arcsin(2*self.rob_r/dist_BA)
 62 |             theta_ort_left = theta_BA+theta_BAort
 63 |             bound_left = [np.cos(theta_ort_left), np.sin(theta_ort_left)]
 64 |             theta_ort_right = theta_BA-theta_BAort
 65 |             bound_right = [np.cos(theta_ort_right), np.sin(theta_ort_right)]
 66 | 
 67 |             RVO_BA = [transl_vB_vA, bound_left, bound_right, dist_BA, 2*self.rob_r]
 68 |             RVO_BA_all.append(RVO_BA)
 69 | 
 70 |         # velocity obstacle from static obstacles
 71 |         for i in range(0,len(static),3):
 72 |             if np.abs(static[i]) < 1e-3 and np.abs(static[i+1]) < 1e-3:
 73 |                 # padding
 74 |                 continue
 75 | 
 76 |             vB = [0.0, 0.0]
 77 |             pB = [static[i],static[i+1]]
 78 | 
 79 |             transl_vB_vA = [pA[0]+vB[0], pA[1]+vB[1]]
 80 |             dist_BA = self.distance(pA, pB)
 81 |             theta_BA = np.arctan2(pB[1]-pA[1], pB[0]-pA[0])
 82 | 
 83 |             # over-approximation of square to circular
 84 |             OVER_APPROX_C2S = 1.5
 85 |             rad = 2*static[i+2]*OVER_APPROX_C2S
 86 |             if (rad+self.rob_r) > dist_BA:
 87 |                 dist_BA = rad+self.rob_r
 88 | 
 89 |             theta_BAort = np.arcsin((rad+self.rob_r)/dist_BA)
 90 |             theta_ort_left = theta_BA+theta_BAort
 91 |             bound_left = [np.cos(theta_ort_left), np.sin(theta_ort_left)]
 92 |             theta_ort_right = theta_BA-theta_BAort
 93 |             bound_right = [np.cos(theta_ort_right), np.sin(theta_ort_right)]
 94 |             RVO_BA = [transl_vB_vA, bound_left, bound_right, dist_BA, rad+self.rob_r]
 95 |             RVO_BA_all.append(RVO_BA)
 96 | 
 97 |         vA_post = self.intersect(pA, v_desire, RVO_BA_all)
 98 | 
 99 |         # select angular velocity action 
100 |         vA_angle = 0.0
101 |         vA_post_angle = 0.0
102 |         if np.linalg.norm(np.array(vA)) > 1e-03:
103 |             vA_angle = np.arctan2(vA[1],vA[0])
104 |         if np.linalg.norm(np.array(vA_post)) > 1e-03:
105 |             vA_post_angle = np.arctan2(vA_post[1],vA_post[0])
106 | 
107 |         diff_angle = vA_post_angle - vA_angle
108 |         while diff_angle < -np.pi:
109 |             diff_angle += 2 * np.pi
110 |         while diff_angle >= np.pi:
111 |             diff_angle -= 2 * np.pi
112 | 
113 |         w_idx = np.argmin(np.abs(self.w-diff_angle))
114 | 
115 |         # select linear acceleration action
116 |         vA_dir = np.array([1.0,0.0])
117 |         if np.linalg.norm(np.array(vA)) > 1e-03:
118 |             vA_dir = np.array(vA) / np.linalg.norm(np.array(vA))
119 |         vA_post_proj = np.dot(vA_dir,np.array(vA_post))
120 | 
121 |         a_proj = (vA_post_proj - np.linalg.norm(np.array(vA))) / 0.5
122 |         a = copy.deepcopy(self.a)
123 |         if np.linalg.norm(np.array(vA)) < self.min_vel:
124 |             # if the velocity is small, mandate acceleration
125 |             a[a<=0.0] = -np.inf
126 |         a_diff = a-a_proj
127 |         a_idx = np.argmin(np.abs(a_diff))
128 | 
129 |         return a_idx * len(self.w) + w_idx
130 | 
131 | 
132 |     def intersect(self,pA, vA, RVO_BA_all):
133 |         # print '----------------------------------------'
134 |         # print 'Start intersection test'
135 |         norm_v = self.distance(vA, [0, 0])
136 |         suitable_V = []
137 |         unsuitable_V = []
138 |         for theta in np.arange(0, 2*np.pi, 0.1):
139 |             for rad in np.arange(0.02, norm_v+0.02, norm_v/5.0):
140 |                 new_v = [rad*np.cos(theta), rad*np.sin(theta)]
141 |                 suit = True
142 |                 for RVO_BA in RVO_BA_all:
143 |                     p_0 = RVO_BA[0]
144 |                     left = RVO_BA[1]
145 |                     right = RVO_BA[2]
146 |                     dif = [new_v[0]+pA[0]-p_0[0], new_v[1]+pA[1]-p_0[1]]
147 |                     theta_dif = np.arctan2(dif[1], dif[0])
148 |                     theta_right = np.arctan2(right[1], right[0])
149 |                     theta_left = np.arctan2(left[1], left[0])
150 |                     if self.in_between(theta_right, theta_dif, theta_left):
151 |                         suit = False
152 |                         break
153 |                 if suit:
154 |                     suitable_V.append(new_v)
155 |                 else:
156 |                     unsuitable_V.append(new_v)                
157 |         new_v = vA[:]
158 |         suit = True
159 |         for RVO_BA in RVO_BA_all:                
160 |             p_0 = RVO_BA[0]
161 |             left = RVO_BA[1]
162 |             right = RVO_BA[2]
163 |             dif = [new_v[0]+pA[0]-p_0[0], new_v[1]+pA[1]-p_0[1]]
164 |             theta_dif = np.arctan2(dif[1], dif[0])
165 |             theta_right = np.arctan2(right[1], right[0])
166 |             theta_left = np.arctan2(left[1], left[0])
167 |             if self.in_between(theta_right, theta_dif, theta_left):
168 |                 suit = False
169 |                 break
170 |         if suit:
171 |             suitable_V.append(new_v)
172 |         else:
173 |             unsuitable_V.append(new_v)
174 |         #----------------------        
175 |         if suitable_V:
176 |             # print 'Suitable found'
177 |             vA_post = min(suitable_V, key = lambda v: self.distance(v, vA))
178 |             new_v = vA_post[:]
179 |             for RVO_BA in RVO_BA_all:
180 |                 p_0 = RVO_BA[0]
181 |                 left = RVO_BA[1]
182 |                 right = RVO_BA[2]
183 |                 dif = [new_v[0]+pA[0]-p_0[0], new_v[1]+pA[1]-p_0[1]]
184 |                 theta_dif = np.arctan2(dif[1], dif[0])
185 |                 theta_right = np.arctan2(right[1], right[0])
186 |                 theta_left = np.arctan2(left[1], left[0])
187 |         else:
188 |             # print 'Suitable not found'
189 |             tc_V = dict()
190 |             for unsuit_v in unsuitable_V:
191 |                 tc_V[tuple(unsuit_v)] = 0
192 |                 tc = []
193 |                 for RVO_BA in RVO_BA_all:
194 |                     p_0 = RVO_BA[0]
195 |                     left = RVO_BA[1]
196 |                     right = RVO_BA[2]
197 |                     dist = RVO_BA[3]
198 |                     rad = RVO_BA[4]
199 |                     dif = [unsuit_v[0]+pA[0]-p_0[0], unsuit_v[1]+pA[1]-p_0[1]]
200 |                     theta_dif = np.arctan2(dif[1], dif[0])
201 |                     theta_right = np.arctan2(right[1], right[0])
202 |                     theta_left = np.arctan2(left[1], left[0])
203 |                     if self.in_between(theta_right, theta_dif, theta_left):
204 |                         small_theta = np.abs(theta_dif-0.5*(theta_left+theta_right))
205 |                         if np.abs(dist*np.sin(small_theta)) >= rad:
206 |                             rad = np.abs(dist*np.sin(small_theta))
207 |                         big_theta = np.arcsin(np.abs(dist*np.sin(small_theta))/rad)
208 |                         dist_tg = np.abs(dist*np.cos(small_theta))-np.abs(rad*np.cos(big_theta))
209 |                         if dist_tg < 0:
210 |                             dist_tg = 0                    
211 |                         tc_v = dist_tg/self.distance(dif, [0,0])
212 |                         tc.append(tc_v)
213 |                 tc_V[tuple(unsuit_v)] = min(tc)+0.001
214 |             WT = 0.2
215 |             vA_post = min(unsuitable_V, key = lambda v: ((WT/tc_V[tuple(v)])+self.distance(v, vA)))
216 |         return vA_post 
217 | 
218 |     def in_between(self, theta_right, theta_dif, theta_left):
219 |         if abs(theta_right - theta_left) <= np.pi:
220 |             if theta_right <= theta_dif <= theta_left:
221 |                 return True
222 |             else:
223 |                 return False
224 |         else:
225 |             if (theta_left <0) and (theta_right >0):
226 |                 theta_left += 2*np.pi
227 |                 if theta_dif < 0:
228 |                     theta_dif += 2*np.pi
229 |                 if theta_right <= theta_dif <= theta_left:
230 |                     return True
231 |                 else:
232 |                     return False
233 |             if (theta_left >0) and (theta_right <0):
234 |                 theta_right += 2*np.pi
235 |                 if theta_dif < 0:
236 |                     theta_dif += 2*np.pi
237 |                 if theta_left <= theta_dif <= theta_right:
238 |                     return True
239 |                 else:
240 |                     return False
241 |     
242 |     


--------------------------------------------------------------------------------
/train_rl_agents.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import itertools
  4 | from multiprocessing import Pool
  5 | import json
  6 | from datetime import datetime
  7 | import numpy as np
  8 | from marinenav_env.envs.marinenav_env import MarineNavEnv2
  9 | from policy.agent import Agent
 10 | from policy.trainer import Trainer
 11 | 
 12 | parser = argparse.ArgumentParser(description="Train IQN model")
 13 | 
 14 | parser.add_argument(
 15 |     "-C",
 16 |     "--config-file",
 17 |     dest="config_file",
 18 |     type=open,
 19 |     required=True,
 20 |     help="configuration file for training parameters",
 21 | )
 22 | parser.add_argument(
 23 |     "-P",
 24 |     "--num-procs",
 25 |     dest="num_procs",
 26 |     type=int,
 27 |     default=1,
 28 |     help="number of subprocess workers to use for trial parallelization",
 29 | )
 30 | parser.add_argument(
 31 |     "-D",
 32 |     "--device",
 33 |     dest="device",
 34 |     type=str,
 35 |     default="cpu",
 36 |     help="device to run all subprocesses, could only specify 1 device in each run"
 37 | )
 38 | 
 39 | def product(*args, repeat=1):
 40 |     # This function is a modified version of 
 41 |     # https://docs.python.org/3/library/itertools.html#itertools.product
 42 |     pools = [tuple(pool) for pool in args] * repeat
 43 |     result = [[]]
 44 |     for pool in pools:
 45 |         result = [x+[y] for x in result for y in pool]
 46 |     for prod in result:
 47 |         yield tuple(prod)
 48 | 
 49 | def trial_params(params):
 50 |     if isinstance(params,(str,int,float)):
 51 |         return [params]
 52 |     elif isinstance(params,list):
 53 |         return params
 54 |     elif isinstance(params, dict):
 55 |         keys, vals = zip(*params.items())
 56 |         mix_vals = []
 57 |         for val in vals:
 58 |             val = trial_params(val)
 59 |             mix_vals.append(val)
 60 |         return [dict(zip(keys, mix_val)) for mix_val in itertools.product(*mix_vals)]
 61 |     else:
 62 |         raise TypeError("Parameter type is incorrect.")
 63 | 
 64 | def params_dashboard(params):
 65 |     print("\n====== Training Setup ======\n")
 66 |     print("seed: ",params["seed"])
 67 |     print("total_timesteps: ",params["total_timesteps"])
 68 |     print("eval_freq: ",params["eval_freq"])
 69 |     print("use_iqn: ",params["use_iqn"])
 70 |     print("\n")
 71 | 
 72 | def run_trial(device,params):
 73 |     exp_dir = os.path.join(params["save_dir"],
 74 |                            "training_"+params["training_time"],
 75 |                            "seed_"+str(params["seed"]))
 76 |     os.makedirs(exp_dir)
 77 | 
 78 |     param_file = os.path.join(exp_dir,"trial_config.json")
 79 |     with open(param_file, 'w+') as outfile:
 80 |         json.dump(params, outfile)
 81 |     
 82 |     train_env = MarineNavEnv2(seed=params["seed"],schedule=params["training_schedule"])
 83 | 
 84 |     eval_env = MarineNavEnv2(seed=253)
 85 | 
 86 |     cooperative_agent = Agent(cooperative=True,device=device,use_iqn=params["use_iqn"],seed=params["seed"]+100)
 87 | 
 88 |     if "load_model" in params:
 89 |         cooperative_agent.load_model(params["load_model"],"cooperative",device)
 90 | 
 91 |     trainer = Trainer(train_env=train_env,
 92 |                       eval_env=eval_env,
 93 |                       eval_schedule=params["eval_schedule"],
 94 |                       cooperative_agent=cooperative_agent,
 95 |                       non_cooperative_agent=None,
 96 |                      )
 97 |     
 98 |     trainer.save_eval_config(exp_dir)
 99 | 
100 |     trainer.learn(total_timesteps=params["total_timesteps"],
101 |                   eval_freq=params["eval_freq"],
102 |                   eval_log_path=exp_dir)
103 | 
104 | if __name__ == "__main__":
105 |     args = parser.parse_args()
106 |     params = json.load(args.config_file)
107 |     params_dashboard(params)
108 |     training_schedule = params.pop("training_schedule")
109 |     eval_schedule = params.pop("eval_schedule")
110 |     
111 |     trial_param_list = trial_params(params)
112 | 
113 |     dt = datetime.now()
114 |     timestamp = dt.strftime("%Y-%m-%d-%H-%M-%S")
115 | 
116 |     if args.num_procs == 1:
117 |         for param in trial_param_list:
118 |             param["training_time"]=timestamp
119 |             param["training_schedule"]=training_schedule
120 |             param["eval_schedule"]=eval_schedule
121 |             run_trial(args.device,param)
122 |     else:
123 |         with Pool(processes=args.num_procs) as pool:
124 |             for param in trial_param_list:
125 |                 param["training_time"]=timestamp
126 |                 param["training_schedule"]=training_schedule
127 |                 param["eval_schedule"]=eval_schedule
128 |                 pool.apply_async(run_trial,(args.device,param))
129 |             
130 |             pool.close()
131 |             pool.join()    
132 | 
133 | 


--------------------------------------------------------------------------------