├── README.md ├── docs └── demo.gif ├── src ├── MultiVehicleEnv │ ├── GUI.py │ ├── __init__.py │ ├── basic.py │ ├── environment.py │ ├── evaluate.py │ ├── geometry.py │ ├── rendering.py │ ├── scenario.py │ ├── scenarios │ │ ├── 1p.py │ │ ├── 3p1t2f.py │ │ ├── 3p2t2f.py │ │ ├── __init__.py │ │ ├── fast_multi_reach_road.py │ │ ├── multi_reach.py │ │ ├── multi_reach_road.py │ │ ├── sparse.py │ │ ├── yyz.py │ │ └── yyz2.py │ └── utils.py └── setup.py └── test ├── test_GUI_threading.py ├── test_eval_auto_with_gui.py ├── test_eval_step_with_gui.py ├── test_eval_step_with_gui_conti.py ├── test_eval_wo_gui.py ├── test_multi_car.py ├── test_train_mode.py └── test_yyz.py /README.md: -------------------------------------------------------------------------------- 1 | # MultiVehicleEnv 2 | This is a simulator designed for MARL(Multi-Agent Reinforcement Learning) Algorithm researchers to train robot motion control strategies. 3 | 4 | - The simulator models robots with Ackerman, Mecanum and differential steering and lidar, which can be applied to common tasks such as obstacle avoidance, navigation, environment exploration,etc. 5 | 6 | - We realize the method proposed in our paper to accelerate robot kinematics simulation and lidar rendering, achieving about **2.56** and **14.2** times simulation speedup respectively. 7 | 8 | 9 | ![Demo](docs/demo.gif) 10 | 11 | ## Code Structure 12 | ```plaintext 13 | . 14 | ├─docs/ 15 | ├─src/ 16 | │ ├─MultiVehicleEnv/ 17 | │ │ ├─scenarios/ 18 | │ │ ├─basic.py 19 | │ │ ├─environment.py 20 | │ │ ├─evaluate.py 21 | │ │ ├─geometry.py 22 | │ │ ├─GUI.py 23 | │ │ ├─rendering.py 24 | │ │ ├─scenario.py 25 | │ │ └─utils.py 26 | │ └─setup.py 27 | ├─test/ 28 | └─README.md 29 | ``` 30 | - The main part of simulator is implemented under the folder `src/MultiVehicleEnv` , including the attribute definitions and physical modeling of `Vehicle`, `Obstacle` and `Lidar`, as well as API for the whole system. 31 | 32 | - In `src/MultiVehicleEnv/scenarios`, we combine the above elements to form complete scenarios. Under the folder `src/MultiVehicleEnv/test`, for each scenario we have defined, we train the RL strategy to accomplish a specific task. 33 | 34 | 35 | 36 | 37 | ## Install 38 | ### Prerequisites 39 | Python (3.8.10), OpenAI gym (0.18.3), pyglet(1.5.15), numpy (1.20.3) 40 | ### Instructions 41 | Just install this repo by: 42 | ```shell 43 | git clone https://github.com/efc-robot/MultiVehicleEnv.git 44 | cd MultiVehicleEnv/src 45 | pip install -e . 46 | ``` 47 | 48 | ## Getting Started 49 | The API of simulator is Gym-like, which is friendly and easy to understand for researchers. 50 | 51 | You can take `src/MultiVehicleEnv/test/test_eval_auto_with_gui.py` as an example to get started. In this script, we instantiate a scenario for cooperative navigation (defined in `src/MultiVehicleEnv/scenarios/multi_reach.py`) and obtain the simulation environment to train and evaluate RL strategy. 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efc-robot/MultiVehicleEnv/4f0d139cdff8b5b16ac2f0ad4fe13f4ff25455fe/docs/demo.gif -------------------------------------------------------------------------------- /src/MultiVehicleEnv/GUI.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pickle 3 | from typing import Union 4 | import pyglet 5 | import os 6 | 7 | class GUI(object): 8 | def __init__(self,port_type:str = 'file',gui_port = '/dev/shm/gui_port' , fps:float = 24): 9 | self.field_range = None 10 | self.cam_bound = None 11 | self.viewer = None 12 | self.file_handle = None 13 | self.port_type = port_type 14 | self.gui_port = gui_port 15 | self.fps = fps 16 | self.mode = 'human' 17 | 18 | 19 | def _read_data(self): 20 | if self.port_type == 'file': 21 | while(True): 22 | if os.path.exists(self.gui_port): 23 | self.file_handle = open(self.gui_port, "rb") 24 | self.file_handle.seek(0) 25 | gui_data = pickle.load(self.file_handle) 26 | self.file_handle.close 27 | return gui_data 28 | else: 29 | print('%s does not exist, wait for Simulator dump GUI data'%self.gui_port) 30 | time.sleep(1) 31 | elif self.port_type == 'direct': 32 | gui_data = self.gui_port.dumpGUI(port_type='direct') 33 | return gui_data 34 | 35 | 36 | def init_viewer(self): 37 | from . import rendering 38 | gui_data = self._read_data() 39 | self.field_range = gui_data['field_range'] 40 | if self.cam_bound is None: 41 | center_x = (self.field_range[0]+self.field_range[2])/2.0 42 | center_y = (self.field_range[1]+self.field_range[3])/2.0 43 | length_x = (self.field_range[2]-self.field_range[0]) 44 | length_y = (self.field_range[3]-self.field_range[1]) 45 | # 1.4 times of field range for camera range 46 | gap = 0.1 47 | self.cam_bound = [center_x - (0.5+gap)*length_x, (1+gap*2)*length_x, center_y - (0.5+gap)*length_y, (1+gap*2)*length_y ] 48 | 49 | screen = pyglet.canvas.get_display().get_default_screen() 50 | max_width = int(screen.width * 0.9) 51 | max_height = int(screen.height * 0.9) 52 | 53 | if self.cam_bound[1]/self.cam_bound[3]>max_width/max_height: 54 | screen_width = max_width 55 | screen_height = max_width/(self.cam_bound[1]/self.cam_bound[3]) 56 | else: 57 | screen_height = max_height 58 | screen_width = max_height*(self.cam_bound[1]/self.cam_bound[3]) 59 | 60 | self.viewer = rendering.Viewer(int(screen_width),int(screen_height)) 61 | self.viewer.set_bounds(self.cam_bound[0], 62 | self.cam_bound[0]+self.cam_bound[1], 63 | self.cam_bound[2], 64 | self.cam_bound[2]+self.cam_bound[3]) 65 | 66 | def init_object(self): 67 | from . import rendering 68 | self.viewer.geoms = [] 69 | gui_data = self._read_data() 70 | 71 | self.vehicle_list = gui_data['vehicle_list'] 72 | self.landmark_list = gui_data['landmark_list'] 73 | self.obstacle_list = gui_data['obstacle_list'] 74 | 75 | self.obstacle_geom_list = [] 76 | for obstacle in self.obstacle_list: 77 | obstacle_geom = {} 78 | total_xform = rendering.Transform() 79 | obstacle_geom['total_xform'] = total_xform 80 | geom = rendering.make_circle(radius=obstacle.radius, filled=True) 81 | geom.set_color(*obstacle.color[0],alpha = obstacle.color[1]) 82 | xform = rendering.Transform() 83 | geom.add_attr(xform) 84 | geom.add_attr(total_xform) 85 | obstacle_geom['base']=(geom,xform) 86 | self.obstacle_geom_list.append(obstacle_geom) 87 | for obstacle_geom in self.obstacle_geom_list: 88 | self.viewer.add_geom(obstacle_geom['base'][0]) 89 | 90 | self.landmark_geom_list = [] 91 | for landmark in self.landmark_list: 92 | landmark_geom = {} 93 | total_xform = rendering.Transform() 94 | landmark_geom['total_xform'] = total_xform 95 | geom = rendering.make_circle(radius=landmark.radius, filled=True) 96 | geom.set_color(*landmark.color[0],alpha = landmark.color[1]) 97 | xform = rendering.Transform() 98 | geom.add_attr(xform) 99 | geom.add_attr(total_xform) 100 | landmark_geom['base']=(geom,xform) 101 | self.landmark_geom_list.append(landmark_geom) 102 | for landmark_geom in self.landmark_geom_list: 103 | self.viewer.add_geom(landmark_geom['base'][0]) 104 | 105 | self.vehicle_geom_list = [] 106 | for vehicle in self.vehicle_list: 107 | vehicle_geom = {} 108 | total_xform = rendering.Transform() 109 | vehicle_geom['total_xform'] = total_xform 110 | 111 | half_l = vehicle.L_car/2.0 112 | half_w = vehicle.W_car/2.0 113 | geom = rendering.make_polygon([[half_l,half_w],[-half_l,half_w],[-half_l,-half_w],[half_l,-half_w]]) 114 | geom.set_color(*vehicle.color[0],alpha = vehicle.color[1]) 115 | xform = rendering.Transform() 116 | geom.add_attr(xform) 117 | geom.add_attr(total_xform) 118 | vehicle_geom['base']=(geom,xform) 119 | 120 | geom = rendering.make_line((0,0),(half_l,0)) 121 | geom.set_color(1.0,0.0,0.0,alpha = 1) 122 | xform = rendering.Transform() 123 | geom.add_attr(xform) 124 | geom.add_attr(total_xform) 125 | vehicle_geom['front_line']=(geom,xform) 126 | 127 | geom = rendering.make_line((0,0),(-half_l,0)) 128 | geom.set_color(0.0,0.0,0.0,alpha = 1) 129 | xform = rendering.Transform() 130 | geom.add_attr(xform) 131 | geom.add_attr(total_xform) 132 | vehicle_geom['back_line']=(geom,xform) 133 | 134 | self.vehicle_geom_list.append(vehicle_geom) 135 | 136 | for vehicle_geom in self.vehicle_geom_list: 137 | self.viewer.add_geom(vehicle_geom['base'][0]) 138 | self.viewer.add_geom(vehicle_geom['front_line'][0]) 139 | self.viewer.add_geom(vehicle_geom['back_line'][0]) 140 | 141 | def _render(self): 142 | gui_data = self._read_data() 143 | self.vehicle_list = gui_data['vehicle_list'] 144 | self.landmark_list = gui_data['landmark_list'] 145 | self.obstacle_list = gui_data['obstacle_list'] 146 | self.total_time = gui_data['total_time'] 147 | self.info = gui_data['info'] 148 | for obstacle, obstacle_geom in zip(self.obstacle_list, self.obstacle_geom_list): 149 | obstacle_geom['total_xform'].set_translation(obstacle.state.coordinate[0],obstacle.state.coordinate[1]) 150 | for landmark, landmark_geom in zip(self.landmark_list, self.landmark_geom_list): 151 | landmark_geom['total_xform'].set_translation(landmark.state.coordinate[0],landmark.state.coordinate[1]) 152 | for vehicle, vehicle_geom in zip(self.vehicle_list,self.vehicle_geom_list): 153 | vehicle_geom['front_line'][1].set_rotation(vehicle.state.phi) 154 | vehicle_geom['total_xform'].set_rotation(vehicle.state.theta) 155 | vehicle_geom['total_xform'].set_translation(vehicle.state.coordinate[0],vehicle.state.coordinate[1]) 156 | 157 | info = '' 158 | for key in self.info.keys(): 159 | info = info + str(key)+':'+str(self.info[key])+'\n' 160 | 161 | self.viewer.render(time = '%.1f'%(self.total_time),info = info,return_rgb_array = self.mode=='rgb_array') 162 | 163 | def _render_target(self): 164 | self.init_viewer() 165 | self.init_object() 166 | 167 | while True: 168 | if self.viewer.closed: 169 | self.init_viewer() 170 | self.init_object() 171 | self._render() 172 | time.sleep(1.0/self.fps) 173 | 174 | #def getspin(self): 175 | # t= threading.Thread(target=self._render_target) 176 | # t.setDaemon(True) 177 | # t.start() 178 | #t.join() -------------------------------------------------------------------------------- /src/MultiVehicleEnv/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/efc-robot/MultiVehicleEnv/4f0d139cdff8b5b16ac2f0ad4fe13f4ff25455fe/src/MultiVehicleEnv/__init__.py -------------------------------------------------------------------------------- /src/MultiVehicleEnv/basic.py: -------------------------------------------------------------------------------- 1 | from typing import * 2 | import numpy as np 3 | from .geometry import laser_circle_dist 4 | 5 | 6 | class Sensor(object): 7 | def __init__(self): 8 | pass 9 | 10 | 11 | class Lidar(Sensor): 12 | def __init__(self): 13 | self.angle_min = -np.pi 14 | self.angle_max = np.pi 15 | self.N_laser = 720 16 | self.range_min = 0.15 17 | self.range_max = 12.0 18 | 19 | 20 | class VehicleState(object): 21 | def __init__(self): 22 | self.reset() 23 | 24 | def reset(self): 25 | # center point position in x,y axis 26 | self.coordinate:List[float]= [0.0,0.0] 27 | # direction of car axis 28 | self.theta:float = 0.0 29 | # linear velocity of back point 30 | self.vel_b:float = 0.0 31 | # deflection angle of front wheel 32 | self.phi:float = 0.0 33 | # Control signal of linear velocity of back point 34 | self.ctrl_vel_b:float = 0.0 35 | # Control signal of deflection angle of front wheel 36 | self.ctrl_phi:float = 0.0 37 | # cannot move if movable is False. Default is True and be setted as False when crashed. 38 | self.movable:bool = True 39 | # Default is False and be setted as True when crashed into other collideable object. 40 | self.crashed:bool = False 41 | 42 | 43 | class Vehicle(object): 44 | def __init__(self): 45 | self.vehicle_id:str = 'none' 46 | # safe radius of the vehicle 47 | self.r_safe:float = 0.17 48 | # length of the vehicle 49 | self.L_car:float = 0.250 50 | # width of the vehicle 51 | self.W_car:float = 0.185 52 | # distance between front and back wheel 53 | self.L_axis:float = 0.20 54 | # coefficient of back whell velocity control 55 | self.K_vel:float = 0.361 56 | # coefficient of front wheel deflection control 57 | self.K_phi:float = 0.561 58 | # the acceleration of the back whell velocity 59 | self.dv_dt:float = 2.17 60 | # the angular acceleration of the back whell velocity 61 | self.dphi_dt:float = 2.10 62 | # the color of the vehicle 63 | self.color:list[Union[list[float] , float]] = [[0.0,0.0,0.0],0.0] 64 | # the discrete action table of the vehicle, action_code -> (ctrl_vel,ctrl_phi) 65 | self.discrete_table:Dict[int,Tuple[float,float]] = {0:( 0.0, 0.0), 66 | 1:( 1.0, 0.0), 2:( 1.0, 1.0), 3:( 1.0, -1.0), 67 | 4:(-1.0, 0.0), 5:(-1.0, 1.0), 6:(-1.0, -1.0)} 68 | self.data_slot:Dict[str,Any]= {} 69 | 70 | self.lidar:Lidar = None 71 | # the state of the vehicle 72 | self.state:VehicleState = VehicleState() 73 | 74 | 75 | class EntityState(object): 76 | def __init__(self): 77 | # center point position in x,y axis 78 | self.coordinate:List[float] = [0.0,0.0] 79 | # direction of entity axis 80 | self.theta:float = 0.0 81 | 82 | 83 | class Obstacle(object): 84 | def __init__(self): 85 | # the redius of the entity 86 | self.radius:float = 1.0 87 | # true if the entity can crash into other collideable object 88 | self.collideable:bool = False 89 | # the color of the entoty 90 | self.color:list[Union[list[float] , float]] = [[0.0,0.0,0.0],0.0] 91 | # the state of the entity 92 | self.state:EntityState = EntityState() 93 | 94 | class Landmark(object): 95 | def __init__(self): 96 | # the redius of the entity 97 | self.radius:float = 1.0 98 | # true if the entity can crash into other collideable object 99 | self.collideable:bool = False 100 | # the color of the entoty 101 | self.color:list[Union[list[float] , float]] = [[0.0,0.0,0.0],0.0] 102 | # the state of the entity 103 | self.state:EntityState = EntityState() 104 | 105 | # multi-vehicle world 106 | class World(object): 107 | def __init__(self, kinetic_mode = None, lidar_mode = None): 108 | # list of vehicles and entities (can change at execution-time!) 109 | self.vehicle_list:List[Vehicle] = [] 110 | self.landmark_list:List[Landmark] = [] 111 | self.obstacle_list:List[Obstacle] = [] 112 | self.vehicle_id_list = [] 113 | self.data_interface = {} 114 | 115 | self.kinetic_mode = kinetic_mode 116 | self.lidar_mode = lidar_mode 117 | 118 | if self.kinetic_mode == 'linear': 119 | self.linear_matrix = None 120 | 121 | if self.lidar_mode == 'table': 122 | #the data for liar scaning by look-up table 123 | self.lidar_table={} 124 | self.lidar_NUM_R=101 125 | self.lidar_r_vec_vec=np.array([]) 126 | self.lidar_size_vec =np.array([]) 127 | 128 | # range of the main field 129 | self.field_range:List[float] = [-1.0,-1.0,1.0,1.0] 130 | 131 | # real-world time duration for one MDP step 132 | self.step_t:float = 1.0 133 | 134 | # split the step_t into sim_step pieces state simulation 135 | self.sim_step:int = 1000 136 | 137 | # the data slot for additional data defined in scenario 138 | self.data_slot:Dict[str,Any] = {} 139 | 140 | # real-world time duration for one state simulation step 141 | @property 142 | def sim_t(self)->float: 143 | return self.step_t/self.sim_step 144 | 145 | # coordinate of the main field center 146 | @property 147 | def field_center(self)->Tuple[float,float]: 148 | center_x = (self.field_range[0] + self.field_range[2])/2 149 | center_y = (self.field_range[1] + self.field_range[3])/2 150 | return (center_x, center_y) 151 | 152 | # width and height of the main field center 153 | @property 154 | def field_half_size(self)->Tuple[float,float]: 155 | width = (self.field_range[2] - self.field_range[0])/2 156 | height = (self.field_range[3] - self.field_range[1])/2 157 | return (width, height) 158 | 159 | # return all entities in the world. May not be used because Vehicle and Entit have no common parent class 160 | @property 161 | def entities(self)->List[Union[Vehicle,Obstacle,Landmark]]: 162 | result_list:List[Union[Vehicle,Obstacle,Landmark]] = [] 163 | for vehicle in self.vehicle_list: 164 | result_list.append(vehicle) 165 | for landmark in self.landmark_list: 166 | result_list.append(landmark) 167 | for obstacle in self.obstacle_list: 168 | result_list.append(obstacle) 169 | return result_list 170 | 171 | # return all vehicles controllable by external policies 172 | @property 173 | def policy_vehicles(self): 174 | raise NotImplementedError() 175 | 176 | # return all vehicles controlled by world scripts 177 | @property 178 | def scripted_vehicles(self): 179 | raise NotImplementedError() 180 | 181 | def assign_data_step(self): 182 | for vehicle in self.vehicle_list: 183 | state = vehicle.state 184 | data_interface = self.data_interface[vehicle.vehicle_id] 185 | state.coordinate[0] = data_interface['x'] 186 | state.coordinate[1] = data_interface['y'] 187 | state.theta = data_interface['theta'] 188 | 189 | # update the physical state for one sim_step. 190 | def _update_one_sim_step(self): 191 | for vehicle in self.vehicle_list: 192 | state = vehicle.state 193 | data_interface = self.data_interface[vehicle.vehicle_id] 194 | data_interface['x'] = state.coordinate[0] 195 | data_interface['y'] = state.coordinate[1] 196 | data_interface['theta'] = state.theta 197 | 198 | # if the vehicle is not movable, skip update its physical state 199 | if state.movable: 200 | if self.kinetic_mode is None: 201 | new_state_data = _update_one_sim_step_warp(state, vehicle, self.sim_t) 202 | else: 203 | new_state_data = _update_one_sim_step_linear_warp(state, vehicle, self.sim_t, self.linear_matrix) 204 | data_interface['x'], data_interface['y'], data_interface['theta'] = new_state_data 205 | 206 | def _update_laser_sim_step(self): 207 | for vehicle in self.vehicle_list: 208 | if vehicle.lidar is None: 209 | continue 210 | state = vehicle.state 211 | lidar_c = np.array(state.coordinate) 212 | lidar_angle = state.theta 213 | lidar = vehicle.lidar 214 | obstacle_list = self.obstacle_list 215 | data_interface = self.data_interface[vehicle.vehicle_id] 216 | if self.lidar_mode is None: 217 | lidar_data = lidar_scan(lidar_c, lidar_angle, lidar, obstacle_list) 218 | else: 219 | lidar_data = lidar_scan_table(lidar_c, 220 | lidar_angle, 221 | lidar, 222 | obstacle_list, 223 | self.lidar_table, 224 | self.lidar_size_vec, 225 | self.lidar_r_vec_vec) 226 | data_interface['lidar'] = lidar_data[0] 227 | 228 | # check collision state for each vehicle 229 | def _check_collision(self): 230 | for idx_a, vehicle_a in enumerate(self.vehicle_list): 231 | if vehicle_a.state.crashed : 232 | continue 233 | 234 | # check if the agent_a crashed into other agent_b 235 | for idx_b, vehicle_b in enumerate(self.vehicle_list): 236 | if idx_a == idx_b: 237 | continue 238 | dist = ((vehicle_a.state.coordinate[0]-vehicle_b.state.coordinate[0])**2 239 | +(vehicle_a.state.coordinate[1]-vehicle_b.state.coordinate[1])**2)**0.5 240 | if dist < vehicle_a.r_safe + vehicle_b.r_safe: 241 | vehicle_a.state.crashed = True 242 | vehicle_a.state.movable = False 243 | break 244 | 245 | # check if the agent_a crashed into a obstacle 246 | for obstacle in self.obstacle_list: 247 | dist = ((vehicle_a.state.coordinate[0]-obstacle.state.coordinate[0])**2 248 | +(vehicle_a.state.coordinate[1]-obstacle.state.coordinate[1])**2)**0.5 249 | if dist < vehicle_a.r_safe + obstacle.radius: 250 | vehicle_a.state.crashed = True 251 | vehicle_a.state.movable = False 252 | break 253 | 254 | # warp one sim step into a function with pure math calculation 255 | def _update_one_sim_step_warp(state:VehicleState, vehicle:Vehicle, dt:float): 256 | def linear_update(x:float, dx_dt:float, dt:float, target:float)->float: 257 | if x < target: 258 | return min(x + dx_dt*dt, target) 259 | elif x > target: 260 | return max(x - dx_dt*dt, target) 261 | return x 262 | target_vel_b = state.ctrl_vel_b * vehicle.K_vel 263 | state.vel_b = linear_update(state.vel_b, vehicle.dv_dt, dt, target_vel_b) 264 | target_phi = state.ctrl_phi * vehicle.K_phi 265 | state.phi = linear_update(state.phi, vehicle.dphi_dt, dt, target_phi) 266 | 267 | update_data = _update_one_sim_step_njit(state.phi, 268 | state.vel_b, 269 | state.theta, 270 | vehicle.L_axis, 271 | state.coordinate[0], 272 | state.coordinate[1], 273 | dt) 274 | return update_data 275 | 276 | # core math calculation part for speed up by numba 277 | # if want to speed up, please uncomment the followed code 278 | #from numba import njit 279 | #@njit 280 | def _update_one_sim_step_njit(_phi:float, _vb:float, _theta:float, _L:float, _x:float, _y:float, dt:float)->Tuple[float,float,float]: 281 | sth = np.sin(_theta) 282 | cth = np.cos(_theta) 283 | _xb = _x - cth*_L/2.0 284 | _yb = _y - sth*_L/2.0 285 | tphi = np.tan(_phi) 286 | _omega = _vb/_L*tphi 287 | _delta_theta = _omega * dt 288 | if abs(_phi)>0.00001: 289 | _rb = _L/tphi 290 | _delta_tao = _rb*(1-np.cos(_delta_theta)) 291 | _delta_yeta = _rb*np.sin(_delta_theta) 292 | else: 293 | _delta_tao = _vb*dt*(_delta_theta/2.0) 294 | _delta_yeta = _vb*dt*(1-_delta_theta**2/6.0) 295 | _xb += _delta_yeta*cth - _delta_tao*sth 296 | _yb += _delta_yeta*sth + _delta_tao*cth 297 | _theta += _delta_theta 298 | _theta = (_theta/3.1415926)%2*3.1415926 299 | 300 | nx = _xb + np.cos(_theta)*_L/2.0 301 | ny = _yb + np.sin(_theta)*_L/2.0 302 | ntheta = _theta 303 | return nx,ny,ntheta 304 | 305 | 306 | 307 | def _update_one_sim_step_linear_warp(state:VehicleState, vehicle:Vehicle, dt:float, A): 308 | 309 | 310 | delta_x = (A[1][0] * state.vel_b+ A[3][0] * state.ctrl_vel_b) 311 | delta_y = (A[2][1] * state.phi+ A[4][1] * state.ctrl_phi) 312 | delta_theta =(A[6][2] * state.vel_b+ A[10][2] * state.ctrl_vel_b) * state.phi 313 | delta_vb = (A[3][3] * state.ctrl_vel_b + A[1][3] * state.vel_b) 314 | delta_phi = (A[4][4] * state.ctrl_phi + A[2][4] * state.phi) 315 | 316 | #delta_x, delta_y, delta_theta, delta_vb, delta_phi = [0,0,0,0,0] 317 | 318 | state.vel_b += delta_vb 319 | state.phi += delta_phi 320 | theta = state.theta 321 | ct = np.cos(theta) 322 | st = np.sin(theta) 323 | x = state.coordinate[0] 324 | y = state.coordinate[1] 325 | nx = x + ct * delta_x - st * delta_y 326 | ny = y + st * delta_x + ct * delta_y 327 | 328 | ntheta = theta + delta_theta 329 | 330 | update_data = nx,ny,ntheta 331 | 332 | return update_data 333 | 334 | 335 | def lidar_scan(lidar_c, lidar_angle, lidar: Lidar, obstacle_list: List[Obstacle]): 336 | ranges = np.ones(lidar.N_laser)*lidar.range_max 337 | intensities = np.zeros(lidar.N_laser) 338 | angle_increment = (lidar.angle_max - lidar.angle_min) / lidar.N_laser 339 | for lidar_idx in range(lidar.N_laser): 340 | angle = lidar_angle + angle_increment * lidar_idx + lidar.angle_min 341 | final_dist = lidar.range_max 342 | final_intensities = 0.0 343 | for obstacle in obstacle_list: 344 | dist, inten = laser_circle_dist(lidar_c, angle, lidar.range_max, obstacle.state.coordinate, obstacle.radius) 345 | if dist < final_dist: 346 | final_dist = dist 347 | final_intensities = inten 348 | ranges[lidar_idx] = final_dist 349 | intensities[lidar_idx] = final_intensities 350 | return ranges, intensities 351 | 352 | 353 | 354 | def lidar_scan_table(lidar_c, lidar_angle, lidar: Lidar, obstacle_list: List[Obstacle],table:dict,size_vec,r_vec_vec ): 355 | 356 | lidar1=lidar 357 | lidar_Angle=lidar_angle 358 | lidar_Centroid=lidar_c 359 | obs_vec=obstacle_list 360 | PI=3.1415926535 361 | if len(obs_vec) == 0: 362 | final_ranges = np.ones(lidar1.N_laser)*lidar1.range_max 363 | final_intensities = np.zeros(lidar1.N_laser) 364 | return final_ranges,final_intensities 365 | 366 | coordinate_vec=np.zeros((2,len(obs_vec))) 367 | for i in range(len(obs_vec)): 368 | coordinate_vec[:,i]=obs_vec[i].state.coordinate-lidar_Centroid 369 | 370 | rel_coordinate_vec=np.zeros((2,len(obs_vec))) 371 | #转换成相对坐标 372 | rel_coordinate_vec[[0],:]=coordinate_vec[[0],:]*np.cos(lidar_Angle)+coordinate_vec[[1],:]*np.sin(lidar_Angle) 373 | rel_coordinate_vec[[1],:]=-coordinate_vec[[0],:]*np.sin(lidar_Angle)+coordinate_vec[[1],:]*np.cos(lidar_Angle) 374 | 375 | 376 | 377 | 378 | #输入坐标(obs_x,obs_y) (朝向theta省略) 379 | dis_vec=np.sqrt(rel_coordinate_vec[0,:]**2+rel_coordinate_vec[1,:]**2) 380 | theta_vec=np.arctan2(rel_coordinate_vec[1,:],rel_coordinate_vec[0,:])/PI*180 #-pi~pi -180~180 381 | #精度为360度/N_laser 得到偏移量(注意:偏移量是往右循环移位,所以应该对应负角度) 382 | #之前没有降采样,这里也没有插值,而是直接取整来偏移 383 | 384 | offset_by_theta = np.round(theta_vec*(lidar1.N_laser/360))*(-1) 385 | offset_by_theta=(offset_by_theta%lidar1.N_laser).astype('int') 386 | #查找表 387 | #一系列不同距离的球 暂时都在x轴上 388 | outputs_ranges=np.ones([lidar1.N_laser, 1])*lidar1.range_max*np.ones((1,len(dis_vec))) 389 | outputs_intensities=np.zeros([lidar1.N_laser, 1])*np.ones((1,len(obs_vec))) 390 | 391 | 392 | 393 | for i in range(len(dis_vec)): 394 | #根据半径 395 | size_index=np.abs(size_vec - obs_vec[i].radius ).argmin() 396 | ranges_vec=table['r'][size_index] 397 | intensities_vec=table['i'][size_index] 398 | 399 | index=np.abs(r_vec_vec[size_index] - dis_vec[i]).argmin() 400 | #根据距离查找ranges 401 | output_range=ranges_vec[:,[index]] 402 | output_range=np.vstack((output_range[offset_by_theta[i]:], output_range[:offset_by_theta[i]])) 403 | outputs_ranges[:,[i]]=output_range 404 | #查找intense 405 | output_intensities=intensities_vec[:,[index]] 406 | output_intensities=np.vstack((output_intensities[offset_by_theta[i]:], output_intensities[:offset_by_theta[i]])) 407 | outputs_intensities[:,[i]]=output_intensities 408 | #ranges"叠加" 409 | final_ranges=np.ones(lidar1.N_laser)*lidar1.range_max 410 | index_min=np.argmin(outputs_ranges,axis=1) 411 | for i in range(len(final_ranges)): 412 | final_ranges[i]=outputs_ranges[i,index_min[i]] 413 | #intense叠加 414 | final_intensities=np.zeros(lidar1.N_laser) 415 | index_max=np.argmax(outputs_intensities,axis=1) 416 | for i in range(len(final_intensities)): 417 | final_intensities[i]=outputs_intensities[i,index_max[i]] 418 | 419 | 420 | 421 | return final_ranges,final_intensities 422 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/environment.py: -------------------------------------------------------------------------------- 1 | from typing import Any, BinaryIO, Callable, Dict, List,Union 2 | import time 3 | import gym 4 | from gym import spaces 5 | import numpy as np 6 | from .basic import World 7 | from .GUI import GUI 8 | import copy 9 | import pickle 10 | 11 | T_action = Union[List[int],List[List[int]]] 12 | # environment for all vehicles in the multi-vehicle world 13 | # currently code assumes that no vehicle will be created/destroyed at runtime! 14 | class MultiVehicleEnv(gym.Env): 15 | def __init__(self, world:World, 16 | reset_callback:Callable=None, 17 | reward_callback:Callable=None, 18 | observation_callback:Callable=None, 19 | info_callback:Callable=None, 20 | done_callback:Callable=None, 21 | updata_callback:Callable=None, 22 | GUI_port:Union[str,None] = None, 23 | shared_reward:bool = False): 24 | 25 | self.world = world 26 | # set required vectorized gym env property 27 | self.shared_reward = shared_reward 28 | 29 | self.GUI_port = GUI_port 30 | self.GUI_file:Union[BinaryIO,None]=None 31 | 32 | 33 | # scenario callbacks 34 | self.reset_callback = reset_callback 35 | self.reward_callback = reward_callback 36 | self.observation_callback = observation_callback 37 | self.info_callback = info_callback 38 | self.done_callback = done_callback 39 | self.updata_callback = updata_callback 40 | # record total real-world time past by 41 | self.total_time:float = 0.0 42 | 43 | # action spaces 44 | self.action_space:List[spaces.Discrete] = [] 45 | for vehicle in self.world.vehicle_list: 46 | if vehicle.discrete_table is None: 47 | self.action_space.append(spaces.Box(low=-1.0,high=1.0,shape=(2,))) 48 | else: 49 | self.action_space.append(spaces.Discrete(len(vehicle.discrete_table))) 50 | # observation space 51 | self.observation_space = [] 52 | for vehicle in self.world.vehicle_list: 53 | if self.observation_callback is None: 54 | obs_dim = 0 55 | else: 56 | obs_dim = len(self.observation_callback(vehicle, self.world)) 57 | self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) 58 | self.GUI = None 59 | 60 | # get info used for benchmarking 61 | def _get_info(self, vehicle): 62 | if self.info_callback is None: 63 | return {} 64 | return self.info_callback(vehicle, self.world) 65 | 66 | # get observation for a particular vehicle 67 | def _get_obs(self, vehicle): 68 | if self.observation_callback is None: 69 | return np.zeros(0) 70 | return self.observation_callback(vehicle, self.world) 71 | 72 | # get dones for a particular vehicle 73 | # unused right now -- vehicle are allowed to go beyond the viewing screen 74 | def _get_done(self, vehicle): 75 | if self.done_callback is None: 76 | return False 77 | return self.done_callback(vehicle, self.world) 78 | 79 | # get reward for a particular vehicle 80 | def _get_reward(self, vehicle): 81 | if self.reward_callback is None: 82 | return 0.0 83 | return self.reward_callback(vehicle, self.world, self.old_world) 84 | 85 | def step(self, action_n:T_action): 86 | obs_n:List[np.ndarray] = [] 87 | reward_n:List[float] = [] 88 | done_n:List[bool] = [] 89 | info_n:Dict[str,Any] = {'n': []} 90 | # set action for each vehicle 91 | for i, vehicle in enumerate(self.world.vehicle_list): 92 | if vehicle.discrete_table is None: 93 | ctrl_vel_b = action_n[i][0] 94 | ctrl_phi = action_n[i][1] 95 | else: 96 | if isinstance(action_n[i],int): 97 | action_i = action_n[i] 98 | else: 99 | action_i = list(action_n[i]).index(1) 100 | [ctrl_vel_b,ctrl_phi] = vehicle.discrete_table[action_i] 101 | vehicle.state.ctrl_vel_b = ctrl_vel_b 102 | vehicle.state.ctrl_phi = ctrl_phi 103 | # advance world state 104 | self.old_world = copy.deepcopy(self.world) 105 | 106 | 107 | for idx in range(self.world.sim_step): 108 | self.total_time += self.world.sim_t 109 | self.world._update_one_sim_step() 110 | self.world.assign_data_step() 111 | self.world._check_collision() 112 | if self.GUI_port is not None: 113 | # if use GUI, slow down the simulation speed 114 | #time.sleep(self.world.sim_t) 115 | self.dumpGUI() 116 | 117 | self.world._update_laser_sim_step() 118 | 119 | 120 | # record observation for each vehicle 121 | for vehicle in self.world.vehicle_list: 122 | reward_n.append(self._get_reward(vehicle)) 123 | for vehicle in self.world.vehicle_list: 124 | done_n.append(self._get_done(vehicle)) 125 | for vehicle in self.world.vehicle_list: 126 | info_n['n'].append(self._get_info(vehicle)) 127 | if self.updata_callback is not None: 128 | self.updata_callback(self.world) 129 | for vehicle in self.world.vehicle_list: 130 | obs_n.append(self._get_obs(vehicle)) 131 | 132 | 133 | if 'max_step_number' in self.world.data_slot.keys(): 134 | step_done = self.world.data_slot['max_step_number']<=self.world.data_slot['total_step_number'] 135 | info_n['TimeLimit.truncated'] = step_done 136 | 137 | # all vehicles get total reward in cooperative case 138 | reward = np.sum(reward_n) 139 | if self.shared_reward: 140 | reward_n = [reward] * len(self.world.vehicle_list) 141 | return obs_n, reward_n, done_n, info_n 142 | 143 | def seed(self, seed=None): 144 | if seed is None: 145 | np.random.seed(1) 146 | else: 147 | np.random.seed(seed) 148 | 149 | def reset(self): 150 | # reset world 151 | self.reset_callback(self.world) 152 | self.total_time = 0.0 153 | # record observations for each vehicle 154 | obs_n = [] 155 | self.world._update_laser_sim_step() 156 | for vehicle_list in self.world.vehicle_list: 157 | obs_n.append(self._get_obs(vehicle_list)) 158 | return obs_n 159 | 160 | def render(self, mode = 'human'): 161 | if self.GUI is None: 162 | self.GUI = GUI(port_type='direct', gui_port=self, fps = 24) 163 | self.GUI.init_viewer() 164 | self.GUI.init_object() 165 | self.GUI._render() 166 | 167 | def ros_step(self,total_time): 168 | self.total_time = total_time 169 | self.world.assign_data_step() 170 | self.world._check_collision() 171 | if self.updata_callback is not None: 172 | self.updata_callback(self.world) 173 | self.dumpGUI() 174 | 175 | def dumpGUI(self, port_type = 'file'): 176 | GUI_data = {'field_range':self.world.field_range, 177 | 'total_time':self.total_time, 178 | 'vehicle_list':self.world.vehicle_list, 179 | 'landmark_list':self.world.landmark_list, 180 | 'obstacle_list':self.world.obstacle_list, 181 | 'info':self.world.data_slot} 182 | if port_type == 'direct': 183 | return copy.deepcopy(GUI_data) 184 | if port_type == 'file': 185 | if self.GUI_port is not None and self.GUI_file is None: 186 | try: 187 | self.GUI_file = open(self.GUI_port, "w+b") 188 | except IOError: 189 | print('open GUI_file %s failed'%self.GUI_port) 190 | if self.GUI_port is not None: 191 | self.GUI_file.seek(0) 192 | pickle.dump(GUI_data,self.GUI_file) 193 | self.GUI_file.flush() -------------------------------------------------------------------------------- /src/MultiVehicleEnv/evaluate.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import termios 3 | import contextlib 4 | from .environment import MultiVehicleEnv 5 | from .GUI import GUI 6 | import numpy as np 7 | from typing import Callable, List, Optional, Sequence 8 | import threading 9 | 10 | 11 | T_Policy = Callable[[np.ndarray],int] 12 | T_action = List[int] 13 | 14 | class EvaluateWrap(object): 15 | def __init__(self, env:MultiVehicleEnv, 16 | policy_list:Sequence[T_Policy]): 17 | self.env = env 18 | self.policy_list:Optional[Sequence[T_Policy]] = policy_list 19 | if env.world.GUI_port is not None: 20 | self.GUI = GUI(port_type='direct',gui_port = self.env) 21 | else: 22 | self.GUI = None 23 | if self.GUI is not None: 24 | GUI_t = threading.Thread(target=self.GUI._render_target) 25 | GUI_t.setDaemon(True) 26 | GUI_t.start() 27 | 28 | self.main_spin() 29 | 30 | def run_test(self): 31 | n_obs = self.env.reset() 32 | for step_idx in range(1000000): 33 | if self.stop_signal: 34 | break 35 | n_action:T_action = [] 36 | for obs,policy in zip(n_obs,self.policy_list): 37 | action = policy(obs) 38 | n_action.append(action) 39 | n_obs,reward,done,info = self.env.step(n_action) 40 | 41 | @contextlib.contextmanager 42 | def raw_mode(self, file): 43 | old_attrs = termios.tcgetattr(file.fileno()) 44 | new_attrs = old_attrs[:] 45 | new_attrs[3] = new_attrs[3] & ~(termios.ECHO | termios.ICANON) 46 | try: 47 | termios.tcsetattr(file.fileno(), termios.TCSADRAIN, new_attrs) 48 | yield 49 | finally: 50 | termios.tcsetattr(file.fileno(), termios.TCSADRAIN, old_attrs) 51 | 52 | def main_spin(self): 53 | decoder = {'e':0,'q':1,'z':2,'c':3} 54 | while True: 55 | cmd = input('waiting for cmd: ') 56 | self.stop_signal = False 57 | print(cmd) 58 | if cmd == 's': 59 | self.test_thread= threading.Thread(target=self.run_test) 60 | self.test_thread.setDaemon(True) 61 | self.test_thread.start() 62 | print('start for keyboard ctrl') 63 | with self.raw_mode(sys.stdin): 64 | try: 65 | while True: 66 | ch = sys.stdin.read(1) 67 | # key esc is pressed 68 | if ch == chr(27): 69 | break 70 | if ch in decoder.keys(): 71 | self.env.world.data_slot['key_direction'] = decoder[ch] 72 | #print ('%s' % ch) 73 | self.stop_signal = True 74 | except (KeyboardInterrupt, EOFError): 75 | pass 76 | if cmd == 'x': 77 | print("finished!") 78 | break 79 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/geometry.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def intersection_line_circle(point, angle, circle_c, circle_r): 4 | r = circle_r 5 | moved_p1_coord = point - circle_c 6 | angle = angle%(2*np.pi) 7 | if angle == np.pi/2 or angle == -np.pi/2: 8 | x = moved_p1_coord[0] 9 | if abs(x) > r: 10 | intersections = [] 11 | elif abs(x) == r: 12 | intersections = [np.array([x, 0.0])] 13 | else: 14 | temp = (r**2 - x**2)**0.5 15 | intersections = [np.array([x, temp]), np.array([x, -temp])] 16 | else: 17 | k = np.tan(angle) 18 | b = moved_p1_coord[1] - k * moved_p1_coord[0] 19 | delta = 4 * (r**2+r**2*k**2-b**2) 20 | if delta < 0: 21 | intersections = [] 22 | elif delta == 0: 23 | x0 = -k*b / (k**2+1) 24 | y0 = k*x0 + b 25 | intersections = [np.array([x0,y0])] 26 | else: 27 | x0 = -k*b / (k**2+1) 28 | temp = delta**0.5 / (2*(k**2+1)) 29 | x1 = x0 - temp 30 | y1 = k*x1 + b 31 | x2 = x0 + temp 32 | y2 = k*x2 + b 33 | intersections = [np.array([x1, y1]), np.array([x2, y2])] 34 | return [np.array([p[0]+circle_c[0], p[1]+circle_c[1]]) for p in intersections] 35 | 36 | def laser_circle_dist(center, angle, max_range, circle_c, circle_r): 37 | moved_circle_c = circle_c - center 38 | intensities = 0.0 39 | interserction = intersection_line_circle(np.array([0.0,0.0]), angle, moved_circle_c, circle_r) 40 | if len(interserction) == 0: 41 | return max_range, intensities 42 | final_dist = max_range 43 | for point in interserction: 44 | vec = np.array([np.cos(angle), np.sin(angle)]) 45 | dist = np.dot(point, vec) 46 | if (dist > 0) and (dist < final_dist): 47 | final_dist = dist 48 | intensities = 1.0 49 | return final_dist, intensities 50 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/rendering.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D rendering framework 3 | """ 4 | from __future__ import division 5 | import os 6 | import six 7 | import sys 8 | import math 9 | import numpy as np 10 | 11 | if "Apple" in sys.version: 12 | if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ: 13 | os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib' 14 | # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite 15 | try: 16 | import pyglet 17 | except ImportError as e: 18 | pass 19 | # reraise(suffix="HINT: you can install pyglet directly via 'pip install pyglet'. But if you really just want to install all Gym dependencies and not have to think about it, 'pip install -e .[all]' or 'pip install gym[all]' will do it.") 20 | 21 | try: 22 | from pyglet.gl import * 23 | except ImportError as e: 24 | pass 25 | # reraise(prefix="Error occured while running `from pyglet.gl import *`",suffix="HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. If you're running on a server, you may need a virtual frame buffer; something like this should work: 'xvfb-run -s \"-screen 0 1400x900x24\" python '") 26 | 27 | RAD2DEG = 57.29577951308232 28 | 29 | def get_display(spec): 30 | """Convert a display specification (such as :0) into an actual Display 31 | object. 32 | 33 | Pyglet only supports multiple Displays on Linux. 34 | """ 35 | if spec is None: 36 | return None 37 | elif isinstance(spec, six.string_types): 38 | return pyglet.canvas.Display(spec) 39 | else: 40 | pass 41 | #raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec)) 42 | 43 | class Viewer(object): 44 | def __init__(self, width, height, display=None): 45 | display = get_display(display) 46 | 47 | self.width = width 48 | self.height = height 49 | self.closed = False 50 | 51 | self.window = pyglet.window.Window(width=width, height=height, display=display) 52 | self.window.on_close = self.window_closed_by_user 53 | self.geoms = [] 54 | self.onetime_geoms = [] 55 | self.transform = Transform() 56 | 57 | glEnable(GL_BLEND) 58 | # glEnable(GL_MULTISAMPLE) 59 | glEnable(GL_LINE_SMOOTH) 60 | # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE) 61 | glHint(GL_LINE_SMOOTH_HINT, GL_NICEST) 62 | glLineWidth(2.0) 63 | glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) 64 | 65 | def close(self): 66 | self.window.close() 67 | self.closed = True 68 | 69 | def window_closed_by_user(self): 70 | self.close() 71 | 72 | def set_bounds(self, left, right, bottom, top): 73 | assert right > left and top > bottom 74 | scalex = self.width/(right-left) 75 | scaley = self.height/(top-bottom) 76 | self.transform = Transform( 77 | translation=(-left*scalex, -bottom*scaley), 78 | scale=(scalex, scaley)) 79 | 80 | def add_geom(self, geom): 81 | self.geoms.append(geom) 82 | 83 | def add_onetime(self, geom): 84 | self.onetime_geoms.append(geom) 85 | 86 | def render(self,time = '0',info = '', return_rgb_array=False): 87 | glClearColor(1,1,1,1) 88 | self.window.clear() 89 | self.window.switch_to() 90 | self.window.dispatch_events() 91 | self.transform.enable() 92 | 93 | 94 | for geom in self.geoms: 95 | geom.render() 96 | for geom in self.onetime_geoms: 97 | geom.render() 98 | self.transform.disable() 99 | time_label = pyglet.text.Label(time, 100 | font_name='Times New Roman', 101 | font_size=20, 102 | color=(0,0,0,255), 103 | x=self.window.width//20, 104 | y=self.window.height//20, 105 | anchor_x='left', anchor_y='baseline') 106 | 107 | time_label.draw() 108 | 109 | info_label = pyglet.text.Label(info, 110 | font_name='Times New Roman', 111 | font_size=20, 112 | color=(0,0,0,255), 113 | x=self.window.width//20, 114 | y=self.window.height//20*19, 115 | anchor_x='left', anchor_y='baseline') 116 | info_label.draw() 117 | 118 | arr = None 119 | if return_rgb_array: 120 | buffer = pyglet.image.get_buffer_manager().get_color_buffer() 121 | image_data = buffer.get_image_data() 122 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 123 | # In https://github.com/openai/gym-http-api/issues/2, we 124 | # discovered that someone using Xmonad on Arch was having 125 | # a window of size 598 x 398, though a 600 x 400 window 126 | # was requested. (Guess Xmonad was preserving a pixel for 127 | # the boundary.) So we use the buffer height/width rather 128 | # than the requested one. 129 | arr = arr.reshape(buffer.height, buffer.width, 4) 130 | arr = arr[::-1,:,0:3] 131 | 132 | self.window.flip() 133 | self.onetime_geoms = [] 134 | return arr 135 | 136 | # Convenience 137 | def draw_circle(self, radius=10, res=30, filled=True, **attrs): 138 | geom = make_circle(radius=radius, res=res, filled=filled) 139 | _add_attrs(geom, attrs) 140 | self.add_onetime(geom) 141 | return geom 142 | 143 | def draw_polygon(self, v, filled=True, **attrs): 144 | geom = make_polygon(v=v, filled=filled) 145 | _add_attrs(geom, attrs) 146 | self.add_onetime(geom) 147 | return geom 148 | 149 | def draw_polyline(self, v, **attrs): 150 | geom = make_polyline(v=v) 151 | _add_attrs(geom, attrs) 152 | self.add_onetime(geom) 153 | return geom 154 | 155 | def draw_line(self, start, end, **attrs): 156 | geom = Line(start, end) 157 | _add_attrs(geom, attrs) 158 | self.add_onetime(geom) 159 | return geom 160 | 161 | def get_array(self): 162 | self.window.flip() 163 | image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() 164 | self.window.flip() 165 | arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') 166 | arr = arr.reshape(self.height, self.width, 4) 167 | return arr[::-1,:,0:3] 168 | 169 | def _add_attrs(geom, attrs): 170 | if "color" in attrs: 171 | geom.set_color(*attrs["color"]) 172 | if "linewidth" in attrs: 173 | geom.set_linewidth(attrs["linewidth"]) 174 | 175 | class Geom(object): 176 | def __init__(self): 177 | self._color=Color((0, 0, 0, 1.0)) 178 | self.attrs = [self._color] 179 | def render(self): 180 | for attr in reversed(self.attrs): 181 | attr.enable() 182 | self.render1() 183 | for attr in self.attrs: 184 | attr.disable() 185 | def render1(self): 186 | raise NotImplementedError 187 | def add_attr(self, attr): 188 | self.attrs.append(attr) 189 | def set_color(self, r, g, b, alpha=1): 190 | self._color.vec4 = (r, g, b, alpha) 191 | 192 | class Attr(object): 193 | def enable(self): 194 | raise NotImplementedError 195 | def disable(self): 196 | pass 197 | 198 | class Transform(Attr): 199 | def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1,1)): 200 | self.set_translation(*translation) 201 | self.set_rotation(rotation) 202 | self.set_scale(*scale) 203 | def enable(self): 204 | glPushMatrix() 205 | glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint 206 | glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0) 207 | glScalef(self.scale[0], self.scale[1], 1) 208 | def disable(self): 209 | glPopMatrix() 210 | def set_translation(self, newx, newy): 211 | self.translation = (float(newx), float(newy)) 212 | def set_rotation(self, new): 213 | self.rotation = float(new) 214 | def set_scale(self, newx, newy): 215 | self.scale = (float(newx), float(newy)) 216 | 217 | class Color(Attr): 218 | def __init__(self, vec4): 219 | self.vec4 = vec4 220 | def enable(self): 221 | glColor4f(*self.vec4) 222 | 223 | class LineStyle(Attr): 224 | def __init__(self, style): 225 | self.style = style 226 | def enable(self): 227 | glEnable(GL_LINE_STIPPLE) 228 | glLineStipple(1, self.style) 229 | def disable(self): 230 | glDisable(GL_LINE_STIPPLE) 231 | 232 | class LineWidth(Attr): 233 | def __init__(self, stroke): 234 | self.stroke = stroke 235 | def enable(self): 236 | glLineWidth(self.stroke) 237 | 238 | class Point(Geom): 239 | def __init__(self): 240 | Geom.__init__(self) 241 | def render1(self): 242 | glBegin(GL_POINTS) # draw point 243 | glVertex3f(0.0, 0.0, 0.0) 244 | glEnd() 245 | 246 | class FilledPolygon(Geom): 247 | def __init__(self, v): 248 | Geom.__init__(self) 249 | self.v = v 250 | def render1(self): 251 | if len(self.v) == 4 : glBegin(GL_QUADS) 252 | elif len(self.v) > 4 : glBegin(GL_POLYGON) 253 | else: glBegin(GL_TRIANGLES) 254 | for p in self.v: 255 | glVertex3f(p[0], p[1],0) # draw each vertex 256 | glEnd() 257 | 258 | color = (self._color.vec4[0] * 0.5, self._color.vec4[1] * 0.5, self._color.vec4[2] * 0.5, self._color.vec4[3] * 0.5) 259 | glColor4f(*color) 260 | glBegin(GL_LINE_LOOP) 261 | for p in self.v: 262 | glVertex3f(p[0], p[1],0) # draw each vertex 263 | glEnd() 264 | 265 | def make_line(start, end): 266 | return Line(start, end) 267 | 268 | def make_circle(radius=10, res=30, filled=True): 269 | points = [] 270 | for i in range(res): 271 | ang = 2*math.pi*i / res 272 | points.append((math.cos(ang)*radius, math.sin(ang)*radius)) 273 | if filled: 274 | return FilledPolygon(points) 275 | else: 276 | return PolyLine(points, True) 277 | 278 | def make_polygon(v, filled=True): 279 | if filled: return FilledPolygon(v) 280 | else: return PolyLine(v, True) 281 | 282 | def make_polyline(v): 283 | return PolyLine(v, False) 284 | 285 | def make_capsule(length, width): 286 | l, r, t, b = 0, length, width/2, -width/2 287 | box = make_polygon([(l,b), (l,t), (r,t), (r,b)]) 288 | circ0 = make_circle(width/2) 289 | circ1 = make_circle(width/2) 290 | circ1.add_attr(Transform(translation=(length, 0))) 291 | geom = Compound([box, circ0, circ1]) 292 | return geom 293 | 294 | class Compound(Geom): 295 | def __init__(self, gs): 296 | Geom.__init__(self) 297 | self.gs = gs 298 | for g in self.gs: 299 | g.attrs = [a for a in g.attrs if not isinstance(a, Color)] 300 | def render1(self): 301 | for g in self.gs: 302 | g.render() 303 | 304 | class PolyLine(Geom): 305 | def __init__(self, v, close): 306 | Geom.__init__(self) 307 | self.v = v 308 | self.close = close 309 | self.linewidth = LineWidth(1) 310 | self.add_attr(self.linewidth) 311 | def render1(self): 312 | glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP) 313 | for p in self.v: 314 | glVertex3f(p[0], p[1],0) # draw each vertex 315 | glEnd() 316 | def set_linewidth(self, x): 317 | self.linewidth.stroke = x 318 | 319 | class Line(Geom): 320 | def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)): 321 | Geom.__init__(self) 322 | self.start = start 323 | self.end = end 324 | self.linewidth = LineWidth(1) 325 | self.add_attr(self.linewidth) 326 | 327 | def render1(self): 328 | glBegin(GL_LINES) 329 | glVertex2f(*self.start) 330 | glVertex2f(*self.end) 331 | glEnd() 332 | 333 | class Image(Geom): 334 | def __init__(self, fname, width, height): 335 | Geom.__init__(self) 336 | self.width = width 337 | self.height = height 338 | img = pyglet.image.load(fname) 339 | self.img = img 340 | self.flip = False 341 | def render1(self): 342 | self.img.blit(-self.width/2, -self.height/2, width=self.width, height=self.height) 343 | 344 | # ================================================================ 345 | 346 | class SimpleImageViewer(object): 347 | def __init__(self, display=None): 348 | self.window = None 349 | self.isopen = False 350 | self.display = display 351 | def imshow(self, arr): 352 | if self.window is None: 353 | height, width, channels = arr.shape 354 | self.window = pyglet.window.Window(width=width, height=height, display=self.display) 355 | self.width = width 356 | self.height = height 357 | self.isopen = True 358 | assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape" 359 | image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3) 360 | self.window.clear() 361 | self.window.switch_to() 362 | self.window.dispatch_events() 363 | image.blit(0,0) 364 | self.window.flip() 365 | def close(self): 366 | if self.isopen: 367 | self.window.close() 368 | self.isopen = False 369 | def __del__(self): 370 | self.close() 371 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenario.py: -------------------------------------------------------------------------------- 1 | from .basic import World 2 | 3 | # defines scenario upon which the world is built 4 | class BaseScenario(object): 5 | # create elements of the world 6 | def make_world(self): 7 | raise NotImplementedError() 8 | # create initial conditions of the world 9 | def reset_world(self, world:World): 10 | raise NotImplementedError() -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/1p.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, naive_inference 5 | 6 | 7 | class Scenario(BaseScenario): 8 | def make_world(self,args): 9 | # init a world instance 10 | world = World() 11 | 12 | # direction reward score coefficient 13 | self.direction_alpha:float = args.direction_alpha 14 | 15 | # direction reward type, train:init in reset, keyboard:set by keyboard, disable:no direction reward 16 | self.add_direction_encoder:str = args.add_direction_encoder 17 | assert self.add_direction_encoder in ['train','keyboard','disable'] 18 | 19 | #for simulate real world 20 | world.step_t = args.step_t 21 | world.sim_step = args.sim_step 22 | world.field_range = [-2.0,-2.0,2.0,2.0] 23 | 24 | # define the task parameters 25 | # one agent can know where is the lanmark 26 | world.data_slot['view_threshold'] = 10.0 27 | # store the keyboard direction 28 | world.data_slot['key_direction'] = 0 29 | 30 | # set world.GUI_port as port dir when usegui is true 31 | if args.usegui: 32 | world.GUI_port = args.gui_port 33 | else: 34 | world.GUI_port = None 35 | 36 | # add 3 agents 37 | agent_number = 1 38 | world.vehicle_list = [] 39 | for idx in range(agent_number): 40 | agent = Vehicle() 41 | agent.r_safe = 0.17 42 | agent.L_car = 0.25 43 | agent.W_car = 0.18 44 | agent.L_axis = 0.2 45 | agent.K_vel = 0.707 46 | agent.K_phi = 0.298 47 | agent.dv_dt = 2.0 48 | agent.dphi_dt = 3.0 49 | agent.color = [[1.0,idx/3.0,idx/3.0],1.0] 50 | world.vehicle_list.append(agent) 51 | 52 | 53 | # add landmark_list 54 | landmark_number = 1 55 | world.landmark_list = [] 56 | for idx in range(landmark_number): 57 | entity = Entity() 58 | entity.radius = 0.4 59 | entity.collideable = False 60 | entity.color = [[0.0,1.0,0.0],0.1] 61 | world.landmark_list.append(entity) 62 | 63 | # add obstacle_list 64 | obstacle_number = 0 65 | world.obstacle_list = [] 66 | for idx in range(obstacle_number): 67 | entity = Entity() 68 | entity.radius = 0.2 69 | entity.collideable = True 70 | entity.color = [[0.0,0.0,0.0],1.0] 71 | world.obstacle_list.append(entity) 72 | 73 | world.data_slot['max_step_number'] = 20 74 | # make initial conditions 75 | self.reset_world(world) 76 | return world 77 | 78 | def reset_world(self, world:World): 79 | world.data_slot['total_step_number'] = 0 80 | # set random initial states 81 | for agent in world.vehicle_list: 82 | agent.state.theta = np.random.uniform(0,2*3.14159) 83 | agent.state.vel_b = 0 84 | agent.state.phi = 0 85 | agent.state.ctrl_vel_b = 0 86 | agent.state.ctrl_phi = 0 87 | agent.state.movable = True 88 | agent.state.crashed = False 89 | 90 | # place all landmark,obstacle and vehicles in the field with out conflict 91 | conflict = True 92 | while conflict: 93 | conflict = False 94 | all_circle = [] 95 | for landmark in world.landmark_list: 96 | for idx in range(2): 97 | scale = world.field_half_size[idx] 98 | trans = world.field_center[idx] 99 | norm_pos = np.random.uniform(-0.5,+0.5) 100 | norm_pos = norm_pos + (0.5 if norm_pos>0 else -0.5) 101 | landmark.state.coordinate[idx] = norm_pos * scale + trans 102 | all_circle.append((landmark.state.coordinate[0],landmark.state.coordinate[1],landmark.radius)) 103 | 104 | for obstacle in world.obstacle_list: 105 | for idx in range(2): 106 | scale = world.field_half_size[idx] 107 | trans = world.field_center[idx] 108 | norm_pos = np.random.uniform(-1,+1) 109 | obstacle.state.coordinate[idx] = norm_pos * scale*0.5 + trans 110 | all_circle.append((obstacle.state.coordinate[0],obstacle.state.coordinate[1],obstacle.radius)) 111 | 112 | for agent in world.vehicle_list: 113 | for idx in range(2): 114 | scale = world.field_half_size[idx] 115 | trans = world.field_center[idx] 116 | norm_pos = np.random.uniform(-1,+1) 117 | agent.state.coordinate[idx] = norm_pos * scale + trans 118 | all_circle.append((agent.state.coordinate[0],agent.state.coordinate[1],agent.r_safe)) 119 | 120 | for idx_a in range(len(all_circle)): 121 | for idx_b in range(idx_a+1,len(all_circle)): 122 | x_a = all_circle[idx_a][0] 123 | y_a = all_circle[idx_a][1] 124 | r_a = all_circle[idx_a][2] 125 | x_b = all_circle[idx_b][0] 126 | y_b = all_circle[idx_b][1] 127 | r_b = all_circle[idx_b][2] 128 | dis = ((x_a - x_b)**2 + (y_a - y_b)**2)**0.5 129 | if dis < r_a + r_b: 130 | conflict = True 131 | break 132 | if conflict: 133 | break 134 | for landmark in world.landmark_list: 135 | landmark.color[1] = 0.1 136 | # set real landmark and make it color solid 137 | world.data_slot['real_landmark'] = np.random.randint(len(world.landmark_list)) 138 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 139 | real_landmark.color[1] = 1.0 140 | 141 | # encode 4 directions into [0,1,2,3] 142 | def encode_direction(direction): 143 | if direction[0] > 0 and direction[1] > 0: 144 | return 0 145 | if direction[0] < 0 and direction[1] > 0: 146 | return 1 147 | if direction[0] < 0 and direction[1] < 0: 148 | return 2 149 | if direction[0] > 0 and direction[1] < 0: 150 | return 3 151 | # decode 4 direction code [0,1,2,3] into onehot vector 152 | world.data_slot['direction_decoder'] = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]] 153 | # give each agent a direction code as a human hint 154 | for agent in world.vehicle_list: 155 | direction = [real_landmark.state.coordinate[0] - agent.state.coordinate[0], 156 | real_landmark.state.coordinate[1] - agent.state.coordinate[1]] 157 | agent.data_slot['direction_obs'] = world.data_slot['direction_decoder'][encode_direction(direction)] 158 | 159 | def reward(self, agent:Vehicle, world:World): 160 | # Adversaries are rewarded for collisions with agents 161 | rew:float = 0.0 162 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 163 | 164 | # direction reward 165 | prefer_action = naive_inference(real_landmark.state.coordinate[0] - agent.state.coordinate[0], 166 | real_landmark.state.coordinate[1] - agent.state.coordinate[1], 167 | agent.state.theta) 168 | same_direction = np.sign(agent.state.ctrl_vel_b) == prefer_action[0] and np.sign(agent.state.ctrl_phi) == prefer_action[1] 169 | if same_direction: 170 | rew += self.direction_alpha * 1.0 171 | 172 | 173 | # reach reward 174 | Allreach = True 175 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 176 | for agent_a in world.vehicle_list: 177 | dist = coord_dist(agent_a.state.coordinate, real_landmark.state.coordinate) 178 | if dist > agent_a.r_safe +real_landmark.radius: 179 | Allreach = False 180 | if Allreach: 181 | rew += 1.0 182 | 183 | # collision reward 184 | if agent.state.crashed: 185 | rew -= 1.0 186 | 187 | world.data_slot['total_step_number'] += 1 188 | return rew 189 | 190 | def observation(self,agent:Vehicle, world:World): 191 | def get_pos(obj , main_agent:Vehicle = None): 192 | main_shift_x = 0.0 if main_agent is None else main_agent.state.coordinate[0] 193 | main_shift_y = 0.0 if main_agent is None else main_agent.state.coordinate[1] 194 | x = obj.state.coordinate[0] - main_shift_x 195 | y = obj.state.coordinate[1] - main_shift_y 196 | 197 | if isinstance(obj,Vehicle): 198 | ctheta = np.cos(obj.state.theta) 199 | stheta = np.sin(obj.state.theta) 200 | return [x, y, ctheta, stheta] 201 | elif isinstance(obj,Entity): 202 | return [x, y] 203 | else: 204 | raise TypeError 205 | 206 | 207 | # get positions of all obstacle_list in this agent's reference frame 208 | obstacle_pos = [] 209 | for obstacle in world.obstacle_list: 210 | epos = np.array([obstacle.state.coordinate[0] - agent.state.coordinate[0], 211 | obstacle.state.coordinate[1] - agent.state.coordinate[1]]) 212 | obstacle_pos.append(epos) 213 | 214 | agent_pos = [get_pos(agent)] 215 | 216 | # check in view 217 | in_view = 0.0 218 | landmark_pos = [] 219 | for landmark in world.landmark_list: 220 | dist = coord_dist(agent.state.coordinate, landmark.state.coordinate) 221 | if dist < world.data_slot['view_threshold']: 222 | in_view = 1.0 223 | landmark_pos.append(get_pos(landmark, agent)) 224 | else: 225 | landmark_pos.append([0,0]) 226 | 227 | # communication of all other agents 228 | other_pos = [] 229 | 230 | for other in world.vehicle_list: 231 | if other is agent: continue 232 | other_pos.append(get_pos(other,agent)) 233 | 234 | if self.add_direction_encoder == 'train': 235 | return np.concatenate([agent.data_slot['direction_obs']]+agent_pos + landmark_pos + obstacle_pos + other_pos + [[in_view]] ) 236 | elif self.add_direction_encoder == 'keyboard': 237 | key_direction = world.data_slot['key_direction'] 238 | one_hot_direction = world.data_slot['direction_decoder'][key_direction] 239 | return np.concatenate([[one_hot_direction] + agent_pos + landmark_pos + obstacle_pos + other_pos + [in_view]]) 240 | elif self.add_direction_encoder == 'disable': 241 | return np.concatenate(agent_pos + landmark_pos + obstacle_pos + other_pos) 242 | 243 | def updata_callback(self, world:world): 244 | for other in world.vehicle_list: 245 | if other is agent: continue 246 | other_pos.append(get_pos(other,agent)) 247 | 248 | 249 | def info(self, agent:Vehicle, world:World): 250 | agent_info:dict = {} 251 | return agent_info -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/3p1t2f.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, naive_inference 5 | 6 | 7 | class Scenario(BaseScenario): 8 | def make_world(self,args): 9 | # init a world instance 10 | world = World() 11 | 12 | # direction reward score coefficient 13 | self.direction_alpha:float = args.direction_alpha 14 | 15 | # direction reward type, train:init in reset, keyboard:set by keyboard, disable:no direction reward 16 | self.add_direction_encoder:str = args.add_direction_encoder 17 | assert self.add_direction_encoder in ['train','keyboard','disable'] 18 | 19 | #for simulate real world 20 | world.step_t = args.step_t 21 | world.sim_step = args.sim_step 22 | world.field_range = [-2.0,-2.0,2.0,2.0] 23 | 24 | # define the task parameters 25 | # one agent can know where is the lanmark 26 | world.data_slot['view_threshold'] = 10.0 27 | # store the keyboard direction 28 | world.data_slot['key_direction'] = 0 29 | 30 | # set world.GUI_port as port dir when usegui is true 31 | if args.usegui: 32 | world.GUI_port = args.gui_port 33 | else: 34 | world.GUI_port = None 35 | 36 | # add 3 agents 37 | agent_number = 3 38 | world.vehicle_list = [] 39 | for idx in range(agent_number): 40 | agent = Vehicle() 41 | agent.r_safe = 0.17 42 | agent.L_car = 0.25 43 | agent.W_car = 0.18 44 | agent.L_axis = 0.2 45 | agent.K_vel = 0.707 46 | agent.K_phi = 0.298 47 | agent.dv_dt = 2.0 48 | agent.dphi_dt = 3.0 49 | agent.color = [[1.0,idx/3.0,idx/3.0],1.0] 50 | world.vehicle_list.append(agent) 51 | 52 | 53 | # add landmark_list 54 | landmark_number = 1 55 | world.landmark_list = [] 56 | for idx in range(landmark_number): 57 | entity = Entity() 58 | entity.radius = 0.4 59 | entity.collideable = False 60 | entity.color = [[0.0,1.0,0.0],0.1] 61 | world.landmark_list.append(entity) 62 | 63 | # add obstacle_list 64 | obstacle_number = 2 65 | world.obstacle_list = [] 66 | for idx in range(obstacle_number): 67 | entity = Entity() 68 | entity.radius = 0.2 69 | entity.collideable = True 70 | entity.color = [[0.0,0.0,0.0],1.0] 71 | world.obstacle_list.append(entity) 72 | 73 | world.data_slot['max_step_number'] = 20 74 | # make initial conditions 75 | self.reset_world(world) 76 | return world 77 | 78 | def reset_world(self, world:World): 79 | world.data_slot['total_step_number'] = 0 80 | # set random initial states 81 | for agent in world.vehicle_list: 82 | agent.state.theta = np.random.uniform(0,2*3.14159) 83 | agent.state.vel_b = 0 84 | agent.state.phi = 0 85 | agent.state.ctrl_vel_b = 0 86 | agent.state.ctrl_phi = 0 87 | agent.state.movable = True 88 | agent.state.crashed = False 89 | 90 | # place all landmark,obstacle and vehicles in the field with out conflict 91 | conflict = True 92 | while conflict: 93 | conflict = False 94 | all_circle = [] 95 | for landmark in world.landmark_list: 96 | for idx in range(2): 97 | scale = world.field_half_size[idx] 98 | trans = world.field_center[idx] 99 | norm_pos = np.random.uniform(-0.5,+0.5) 100 | norm_pos = norm_pos + (0.5 if norm_pos>0 else -0.5) 101 | landmark.state.coordinate[idx] = norm_pos * scale + trans 102 | all_circle.append((landmark.state.coordinate[0],landmark.state.coordinate[1],landmark.radius)) 103 | 104 | for obstacle in world.obstacle_list: 105 | for idx in range(2): 106 | scale = world.field_half_size[idx] 107 | trans = world.field_center[idx] 108 | norm_pos = np.random.uniform(-1,+1) 109 | obstacle.state.coordinate[idx] = norm_pos * scale*0.5 + trans 110 | all_circle.append((obstacle.state.coordinate[0],obstacle.state.coordinate[1],obstacle.radius)) 111 | 112 | for agent in world.vehicle_list: 113 | for idx in range(2): 114 | scale = world.field_half_size[idx] 115 | trans = world.field_center[idx] 116 | norm_pos = np.random.uniform(-1,+1) 117 | agent.state.coordinate[idx] = norm_pos * scale + trans 118 | all_circle.append((agent.state.coordinate[0],agent.state.coordinate[1],agent.r_safe)) 119 | 120 | for idx_a in range(len(all_circle)): 121 | for idx_b in range(idx_a+1,len(all_circle)): 122 | x_a = all_circle[idx_a][0] 123 | y_a = all_circle[idx_a][1] 124 | r_a = all_circle[idx_a][2] 125 | x_b = all_circle[idx_b][0] 126 | y_b = all_circle[idx_b][1] 127 | r_b = all_circle[idx_b][2] 128 | dis = ((x_a - x_b)**2 + (y_a - y_b)**2)**0.5 129 | if dis < r_a + r_b: 130 | conflict = True 131 | break 132 | if conflict: 133 | break 134 | for landmark in world.landmark_list: 135 | landmark.color[1] = 0.1 136 | # set real landmark and make it color solid 137 | world.data_slot['real_landmark'] = np.random.randint(len(world.landmark_list)) 138 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 139 | real_landmark.color[1] = 1.0 140 | 141 | # encode 4 directions into [0,1,2,3] 142 | def encode_direction(direction): 143 | if direction[0] > 0 and direction[1] > 0: 144 | return 0 145 | if direction[0] < 0 and direction[1] > 0: 146 | return 1 147 | if direction[0] < 0 and direction[1] < 0: 148 | return 2 149 | if direction[0] > 0 and direction[1] < 0: 150 | return 3 151 | # decode 4 direction code [0,1,2,3] into onehot vector 152 | world.data_slot['direction_decoder'] = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]] 153 | # give each agent a direction code as a human hint 154 | for agent in world.vehicle_list: 155 | direction = [real_landmark.state.coordinate[0] - agent.state.coordinate[0], 156 | real_landmark.state.coordinate[1] - agent.state.coordinate[1]] 157 | agent.data_slot['direction_obs'] = world.data_slot['direction_decoder'][encode_direction(direction)] 158 | 159 | def reward(self, agent:Vehicle, world:World, old_world:World): 160 | # Adversaries are rewarded for collisions with agents 161 | rew:float = 0.0 162 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 163 | 164 | # direction reward 165 | prefer_action = naive_inference(real_landmark.state.coordinate[0] - agent.state.coordinate[0], 166 | real_landmark.state.coordinate[1] - agent.state.coordinate[1], 167 | agent.state.theta) 168 | same_direction = np.sign(agent.state.ctrl_vel_b) == prefer_action[0] and np.sign(agent.state.ctrl_phi) == prefer_action[1] 169 | if same_direction: 170 | rew += self.direction_alpha * 1.0 171 | 172 | 173 | # reach reward 174 | Allreach = True 175 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 176 | for agent_a in world.vehicle_list: 177 | dist = coord_dist(agent_a.state.coordinate, real_landmark.state.coordinate) 178 | if dist > agent_a.r_safe +real_landmark.radius: 179 | Allreach = False 180 | if Allreach: 181 | rew += 1.0 182 | 183 | # collision reward 184 | if agent.state.crashed: 185 | rew -= 1.0 186 | 187 | world.data_slot['total_step_number'] += 1 188 | return rew 189 | 190 | def observation(self,agent:Vehicle, world:World): 191 | def get_pos(obj , main_agent:Vehicle = None): 192 | main_shift_x = 0.0 if main_agent is None else main_agent.state.coordinate[0] 193 | main_shift_y = 0.0 if main_agent is None else main_agent.state.coordinate[1] 194 | x = obj.state.coordinate[0] - main_shift_x 195 | y = obj.state.coordinate[1] - main_shift_y 196 | 197 | if isinstance(obj,Vehicle): 198 | ctheta = np.cos(obj.state.theta) 199 | stheta = np.sin(obj.state.theta) 200 | return [x, y, ctheta, stheta] 201 | elif isinstance(obj,Entity): 202 | return [x, y] 203 | else: 204 | raise TypeError 205 | 206 | 207 | # get positions of all obstacle_list in this agent's reference frame 208 | obstacle_pos = [] 209 | for obstacle in world.obstacle_list: 210 | epos = np.array([obstacle.state.coordinate[0] - agent.state.coordinate[0], 211 | obstacle.state.coordinate[1] - agent.state.coordinate[1]]) 212 | obstacle_pos.append(epos) 213 | 214 | agent_pos = [get_pos(agent)] 215 | 216 | # check in view 217 | in_view = 0.0 218 | landmark_pos = [] 219 | for landmark in world.landmark_list: 220 | dist = coord_dist(agent.state.coordinate, landmark.state.coordinate) 221 | if dist < world.data_slot['view_threshold']: 222 | in_view = 1.0 223 | landmark_pos.append(get_pos(landmark, agent)) 224 | else: 225 | landmark_pos.append([0,0]) 226 | 227 | # communication of all other agents 228 | other_pos = [] 229 | 230 | for other in world.vehicle_list: 231 | if other is agent: continue 232 | other_pos.append(get_pos(other,agent)) 233 | 234 | if self.add_direction_encoder == 'train': 235 | return np.concatenate([agent.data_slot['direction_obs']]+agent_pos + landmark_pos + obstacle_pos + other_pos + [[in_view]] ) 236 | elif self.add_direction_encoder == 'keyboard': 237 | key_direction = world.data_slot['key_direction'] 238 | one_hot_direction = world.data_slot['direction_decoder'][key_direction] 239 | return np.concatenate([[one_hot_direction] + agent_pos + landmark_pos + obstacle_pos + other_pos + [in_view]]) 240 | elif self.add_direction_encoder == 'disable': 241 | return np.concatenate(agent_pos + landmark_pos + obstacle_pos + other_pos) 242 | 243 | 244 | 245 | def info(self, agent:Vehicle, world:World): 246 | agent_info:dict = {} 247 | return agent_info -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/3p2t2f.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, naive_inference 5 | 6 | 7 | class Scenario(BaseScenario): 8 | def make_world(self,args): 9 | # init a world instance 10 | world = World() 11 | 12 | # direction reward score coefficient 13 | self.direction_alpha:float = args.direction_alpha 14 | 15 | # direction reward type, train:init in reset, keyboard:set by keyboard, disable:no direction reward 16 | self.add_direction_encoder:str = args.add_direction_encoder 17 | assert self.add_direction_encoder in ['train','keyboard','disable'] 18 | 19 | #for simulate real world 20 | world.step_t = args.step_t 21 | world.sim_step = args.sim_step 22 | world.field_range = [-2.0,-2.0,2.0,2.0] 23 | # define the task parameters 24 | # one agent can know where is the lanmark 25 | world.data_slot['view_threshold'] = 10.0 26 | # store the keyboard direction 27 | world.data_slot['key_direction'] = 0 28 | 29 | # set world.GUI_port as port dir when usegui is true 30 | if args.usegui: 31 | world.GUI_port = args.gui_port 32 | else: 33 | world.GUI_port = None 34 | 35 | # add 3 agents 36 | agent_number = 3 37 | world.vehicle_list = [] 38 | for idx in range(agent_number): 39 | agent = Vehicle() 40 | agent.r_safe = 0.17 41 | agent.L_car = 0.25 42 | agent.W_car = 0.18 43 | agent.L_axis = 0.2 44 | agent.K_vel = 0.707 45 | agent.K_phi = 0.298 46 | agent.dv_dt = 2.0 47 | agent.dphi_dt = 3.0 48 | agent.color = [[1.0,idx/3.0,idx/3.0],1.0] 49 | world.vehicle_list.append(agent) 50 | 51 | 52 | # add landmark_list 53 | landmark_number = 2 54 | world.landmark_list = [] 55 | for idx in range(landmark_number): 56 | entity = Entity() 57 | entity.radius = 0.2 58 | entity.collideable = False 59 | entity.color = [[0.0,1.0,0.0],0.1] 60 | world.landmark_list.append(entity) 61 | 62 | # add obstacle_list 63 | obstacle_number = 2 64 | world.obstacle_list = [] 65 | for idx in range(obstacle_number): 66 | entity = Entity() 67 | entity.radius = 0.2 68 | entity.collideable = True 69 | entity.color = [[0.0,0.0,0.0],1.0] 70 | world.obstacle_list.append(entity) 71 | 72 | world.data_slot['max_step_number'] = 20 73 | # make initial conditions 74 | self.reset_world(world) 75 | return world 76 | 77 | def reset_world(self, world:World): 78 | world.data_slot['total_step_number'] = 0 79 | # set random initial states 80 | for agent in world.vehicle_list: 81 | agent.state.theta = np.random.uniform(0,2*3.14159) 82 | agent.state.vel_b = 0 83 | agent.state.phi = 0 84 | agent.state.ctrl_vel_b = 0 85 | agent.state.ctrl_phi = 0 86 | agent.state.movable = True 87 | agent.state.crashed = False 88 | 89 | # place all landmark,obstacle and vehicles in the field with out conflict 90 | conflict = True 91 | while conflict: 92 | conflict = False 93 | all_circle = [] 94 | for landmark in world.landmark_list: 95 | for idx in range(2): 96 | scale = world.field_half_size[idx] 97 | trans = world.field_center[idx] 98 | norm_pos = np.random.uniform(-0.5,+0.5) 99 | norm_pos = norm_pos + (0.5 if norm_pos>0 else -0.5) 100 | landmark.state.coordinate[idx] = norm_pos * scale + trans 101 | all_circle.append((landmark.state.coordinate[0],landmark.state.coordinate[1],landmark.radius)) 102 | 103 | for obstacle in world.obstacle_list: 104 | for idx in range(2): 105 | scale = world.field_half_size[idx] 106 | trans = world.field_center[idx] 107 | norm_pos = np.random.uniform(-1,+1) 108 | obstacle.state.coordinate[idx] = norm_pos * scale*0.5 + trans 109 | all_circle.append((obstacle.state.coordinate[0],obstacle.state.coordinate[1],obstacle.radius)) 110 | 111 | for agent in world.vehicle_list: 112 | for idx in range(2): 113 | scale = world.field_half_size[idx] 114 | trans = world.field_center[idx] 115 | norm_pos = np.random.uniform(-1,+1) 116 | agent.state.coordinate[idx] = norm_pos * scale + trans 117 | all_circle.append((agent.state.coordinate[0],agent.state.coordinate[1],agent.r_safe)) 118 | 119 | for idx_a in range(len(all_circle)): 120 | for idx_b in range(idx_a+1,len(all_circle)): 121 | x_a = all_circle[idx_a][0] 122 | y_a = all_circle[idx_a][1] 123 | r_a = all_circle[idx_a][2] 124 | x_b = all_circle[idx_b][0] 125 | y_b = all_circle[idx_b][1] 126 | r_b = all_circle[idx_b][2] 127 | dis = ((x_a - x_b)**2 + (y_a - y_b)**2)**0.5 128 | if dis < r_a + r_b: 129 | conflict = True 130 | break 131 | if conflict: 132 | break 133 | for landmark in world.landmark_list: 134 | landmark.color[1] = 0.1 135 | # set real landmark and make it color solid 136 | world.data_slot['real_landmark'] = np.random.randint(len(world.landmark_list)) 137 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 138 | real_landmark.color[1] = 1.0 139 | 140 | # encode 4 directions into [0,1,2,3] 141 | def encode_direction(direction): 142 | if direction[0] > 0 and direction[1] > 0: 143 | return 0 144 | if direction[0] < 0 and direction[1] > 0: 145 | return 1 146 | if direction[0] < 0 and direction[1] < 0: 147 | return 2 148 | if direction[0] > 0 and direction[1] < 0: 149 | return 3 150 | # decode 4 direction code [0,1,2,3] into onehot vector 151 | world.data_slot['direction_decoder'] = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]] 152 | # give each agent a direction code as a human hint 153 | for agent in world.vehicle_list: 154 | direction = [real_landmark.state.coordinate[0] - agent.state.coordinate[0], 155 | real_landmark.state.coordinate[1] - agent.state.coordinate[1]] 156 | agent.data_slot['direction_obs'] = world.data_slot['direction_decoder'][encode_direction(direction)] 157 | 158 | def reward(self, agent:Vehicle, world:World, old_world:World): 159 | # Adversaries are rewarded for collisions with agents 160 | rew:float = 0.0 161 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 162 | 163 | # direction reward 164 | prefer_action = naive_inference(real_landmark.state.coordinate[0] - agent.state.coordinate[0], 165 | real_landmark.state.coordinate[1] - agent.state.coordinate[1], 166 | agent.state.theta) 167 | same_direction = np.sign(agent.state.ctrl_vel_b) == prefer_action[0] and np.sign(agent.state.ctrl_phi) == prefer_action[1] 168 | if same_direction: 169 | rew += self.direction_alpha * 1.0 170 | 171 | 172 | # reach reward 173 | Allreach = True 174 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 175 | for agent_a in world.vehicle_list: 176 | dist = coord_dist(agent_a.state.coordinate, real_landmark.state.coordinate) 177 | if dist > agent_a.r_safe +real_landmark.radius: 178 | Allreach = False 179 | if Allreach: 180 | rew += 1.0 181 | 182 | # collision reward 183 | if agent.state.crashed: 184 | rew -= 1.0 185 | 186 | world.data_slot['total_step_number'] += 1 187 | return rew 188 | 189 | def observation(self,agent:Vehicle, world:World): 190 | def get_pos(obj , main_agent:Vehicle = None): 191 | main_shift_x = 0.0 if main_agent is None else main_agent.state.coordinate[0] 192 | main_shift_y = 0.0 if main_agent is None else main_agent.state.coordinate[1] 193 | x = obj.state.coordinate[0] - main_shift_x 194 | y = obj.state.coordinate[1] - main_shift_y 195 | 196 | if isinstance(obj,Vehicle): 197 | ctheta = np.cos(obj.state.theta) 198 | stheta = np.sin(obj.state.theta) 199 | return [x, y, ctheta, stheta] 200 | elif isinstance(obj,Entity): 201 | return [x, y] 202 | else: 203 | raise TypeError 204 | 205 | 206 | # get positions of all obstacle_list in this agent's reference frame 207 | obstacle_pos = [] 208 | for obstacle in world.obstacle_list: 209 | epos = np.array([obstacle.state.coordinate[0] - agent.state.coordinate[0], 210 | obstacle.state.coordinate[1] - agent.state.coordinate[1]]) 211 | obstacle_pos.append(epos) 212 | 213 | agent_pos = [get_pos(agent)] 214 | 215 | # check in view 216 | in_view = 0.0 217 | landmark_pos = [] 218 | for landmark in world.landmark_list: 219 | dist = coord_dist(agent.state.coordinate, landmark.state.coordinate) 220 | if dist < world.data_slot['view_threshold']: 221 | in_view = 1.0 222 | landmark_pos.append(get_pos(landmark, agent)) 223 | else: 224 | landmark_pos.append([0,0]) 225 | 226 | # communication of all other agents 227 | other_pos = [] 228 | 229 | for other in world.vehicle_list: 230 | if other is agent: continue 231 | other_pos.append(get_pos(other,agent)) 232 | 233 | if self.add_direction_encoder == 'train': 234 | return np.concatenate([agent.data_slot['direction_obs']]+agent_pos + landmark_pos + obstacle_pos + other_pos + [[in_view]] ) 235 | elif self.add_direction_encoder == 'keyboard': 236 | key_direction = world.data_slot['key_direction'] 237 | one_hot_direction = world.data_slot['direction_decoder'][key_direction] 238 | return np.concatenate([[one_hot_direction] + agent_pos + landmark_pos + obstacle_pos + other_pos + [in_view]]) 239 | elif self.add_direction_encoder == 'disable': 240 | return np.concatenate(agent_pos + landmark_pos + obstacle_pos + other_pos) 241 | 242 | 243 | 244 | def info(self, agent:Vehicle, world:World): 245 | agent_info:dict = {} 246 | return agent_info -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | import imp 2 | import os.path as osp 3 | 4 | 5 | def load(name): 6 | pathname = osp.join(osp.dirname(__file__), name) 7 | return imp.load_source('', pathname) -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/fast_multi_reach_road.py: -------------------------------------------------------------------------------- 1 | /home/ubuntu/project/FastSimulatorExp/src/scenarios/multi_reach_road_sensor.py -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/multi_reach.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, hsv2rgb 5 | 6 | def default_vehicle(): 7 | agent = Vehicle() 8 | agent.r_safe = 0.17 9 | agent.L_car = 0.25 10 | agent.W_car = 0.18 11 | agent.L_axis = 0.2 12 | agent.K_vel = 0.707 13 | agent.K_phi = 0.298 14 | agent.dv_dt = 2.0 15 | agent.dphi_dt = 3.0 16 | agent.color = [[1.0,0.0,0.0],1.0] 17 | agent.discrete_table = None 18 | return agent 19 | 20 | def place_wo_conflict(world:World, agent_list, landmark_list, obstacle_list): 21 | for agent_idx in agent_list: 22 | world.vehicle_list[agent_idx].state.reset() 23 | world.vehicle_list[agent_idx].state.theta = np.random.uniform(0,2*3.14159) 24 | safe_count = 1000 25 | while True: 26 | if safe_count<0: 27 | raise RuntimeError 28 | safe_count -= 1 29 | for agent_idx in agent_list: 30 | for idx in range(2): 31 | scale = world.field_half_size[idx] 32 | trans = world.field_center[idx] 33 | norm_pos = np.random.uniform(-1,+1) 34 | world.vehicle_list[agent_idx].state.coordinate[idx] = norm_pos * scale + trans 35 | 36 | for landmark_idx in landmark_list: 37 | for idx in range(2): 38 | scale = world.field_half_size[idx] 39 | trans = world.field_center[idx] 40 | norm_pos = np.random.uniform(-1,+1) 41 | world.landmark_list[landmark_idx].state.coordinate[idx] = norm_pos * scale + trans 42 | 43 | for obstacle_idx in obstacle_list: 44 | for idx in range(2): 45 | scale = world.field_half_size[idx] 46 | trans = world.field_center[idx] 47 | norm_pos = np.random.uniform(-1,+1) 48 | world.obstacle_list[obstacle_idx].state.coordinate[idx] = norm_pos * scale + trans 49 | 50 | for landmark_idx in landmark_list: 51 | landmark = world.landmark_list[landmark_idx] 52 | for obstacle in world.obstacle_list: 53 | dis = coord_dist(landmark.state.coordinate, obstacle.state.coordinate) 54 | if dis < landmark.radius + obstacle.radius: 55 | continue 56 | 57 | for obstacle_idx in obstacle_list: 58 | obstacle = world.obstacle_list[obstacle_idx] 59 | for landmark in world.landmark_list: 60 | dis = coord_dist(obstacle.state.coordinate, landmark.state.coordinate) 61 | if dis < obstacle.radius + landmark.radius: 62 | continue 63 | 64 | for agent_idx in agent_list: 65 | agent = world.vehicle_list[agent_idx] 66 | for obstacle in world.obstacle_list: 67 | dis = coord_dist(agent.state.coordinate, obstacle.state.coordinate) 68 | if dis < agent.r_safe + obstacle.radius: 69 | continue 70 | landmark = world.landmark_list[agent_idx] 71 | dis = coord_dist(agent.state.coordinate, landmark.state.coordinate) 72 | if dis < agent.r_safe + landmark.radius: 73 | continue 74 | break 75 | 76 | def check_conflict(world:World): 77 | obstacle_coord_list = [] 78 | obstacle_radius_list = [] 79 | for obstacle in world.obstacle_list: 80 | obstacle_coord_list.append(obstacle.state.coordinate) 81 | obstacle_radius_list.append(obstacle.radius) 82 | 83 | landmark_coord_list = [] 84 | landmark_radius_list = [] 85 | for landmark in world.landmark_list: 86 | landmark_coord_list.append(landmark.state.coordinate) 87 | landmark_radius_list.append(landmark.radius) 88 | 89 | agent_coord_list = [] 90 | agent_radius_list = [] 91 | for agent in world.vehicle_list: 92 | agent_coord_list.append(agent.state.coordinate) 93 | agent_radius_list.append(agent.r_safe) 94 | 95 | for idx_a in range(len(agent_coord_list)): 96 | for idx in range(len(obstacle_coord_list)): 97 | dis = coord_dist(agent_coord_list[idx_a], obstacle_coord_list[idx]) 98 | if dis < agent_radius_list[idx_a] + obstacle_radius_list[idx]: 99 | return True 100 | for idx_b in range(idx_a+1,len(agent_coord_list)): 101 | dis = coord_dist(agent_coord_list[idx_a], agent_coord_list[idx_b]) 102 | if dis < agent_radius_list[idx_a] + agent_radius_list[idx_b]: 103 | return True 104 | return False 105 | 106 | class Scenario(BaseScenario): 107 | def make_world(self,args): 108 | self.reward_coef = 1.0 109 | self.control_coef = 1.0 110 | if 'reward_coef' in args.__dict__.keys(): 111 | self.reward_coef = args.reward_coef 112 | if 'reward_coef' in args.__dict__.keys(): 113 | self.control_coef = args.control_coef 114 | # init a world instance 115 | world = World() 116 | 117 | #for simulate real world 118 | world.step_t = args.step_t 119 | world.sim_step = args.sim_step 120 | world.field_range = [-2.0,-2.0,2.0,2.0] 121 | 122 | # set world.GUI_port as port dir when usegui is true 123 | if args.usegui: 124 | world.GUI_port = args.gui_port 125 | else: 126 | world.GUI_port = None 127 | 128 | # add 3 agents 129 | agent_number = 3 130 | world.vehicle_list = [] 131 | for idx in range(agent_number): 132 | agent = default_vehicle() 133 | agent.color = [hsv2rgb((idx/agent_number)*360,1.0,1.0), 1.0] 134 | world.vehicle_list.append(agent) 135 | 136 | # add landmark_list 137 | landmark_number = agent_number 138 | world.landmark_list = [] 139 | for idx in range(landmark_number): 140 | entity = Entity() 141 | entity.radius = 0.2 142 | entity.collideable = False 143 | entity.color = [hsv2rgb((idx/agent_number)*360,1.0,1.0), 1.0] 144 | world.landmark_list.append(entity) 145 | 146 | # add obstacle_list 147 | obstacle_number = 0 148 | world.obstacle_list = [] 149 | for idx in range(obstacle_number): 150 | entity = Entity() 151 | entity.radius = 0.2 152 | entity.collideable = True 153 | entity.color = [[0.0,0.0,0.0],1.0] 154 | world.obstacle_list.append(entity) 155 | 156 | world.data_slot['max_step_number'] = 40 157 | # make initial conditions 158 | self.reset_world(world) 159 | return world 160 | 161 | def reset_world(self, world:World): 162 | world.data_slot['total_step_number'] = 0 163 | # set random initial states 164 | for agent in world.vehicle_list: 165 | agent.state.reset() 166 | agent.state.theta = np.random.uniform(0,2*3.14159) 167 | agent_list = [idx for idx in range(len(world.vehicle_list))] 168 | landmark_list = [idx for idx in range(len(world.landmark_list))] 169 | obstacle_list = [idx for idx in range(len(world.obstacle_list))] 170 | place_wo_conflict(world, agent_list, landmark_list, obstacle_list) 171 | 172 | def done(self, agent:Vehicle, world:World): 173 | return False 174 | 175 | def reward(self, agent:Vehicle, world:World, old_world:World = None): 176 | rew:float = 0.0 177 | 178 | for agent_old, agent_new, landmark in zip(old_world.vehicle_list, world.vehicle_list, world.landmark_list): 179 | dist_old = coord_dist(agent_old.state.coordinate, landmark.state.coordinate) 180 | dist_new = coord_dist(agent_new.state.coordinate, landmark.state.coordinate) 181 | rew += self.reward_coef*(dist_old-dist_new) 182 | if dist_new < agent_new.r_safe + landmark.radius: 183 | rew += 10.0 184 | 185 | for agent_a in world.vehicle_list: 186 | rew += self.control_coef * agent_a.state.ctrl_vel_b**2 187 | 188 | # collision reward 189 | if agent.state.crashed: 190 | rew -= 10.0 191 | 192 | world.data_slot['total_step_number'] += 1 193 | return rew 194 | 195 | def observation(self,agent:Vehicle, world:World): 196 | def get_pos(obj , main_agent:Vehicle = None): 197 | main_shift_x = 0.0 if main_agent is None else main_agent.state.coordinate[0] 198 | main_shift_y = 0.0 if main_agent is None else main_agent.state.coordinate[1] 199 | x = obj.state.coordinate[0] - main_shift_x 200 | y = obj.state.coordinate[1] - main_shift_y 201 | 202 | if isinstance(obj,Vehicle): 203 | ctheta = np.cos(obj.state.theta) 204 | stheta = np.sin(obj.state.theta) 205 | return [x, y, ctheta, stheta] 206 | elif isinstance(obj,Entity): 207 | return [x, y] 208 | else: 209 | raise TypeError 210 | 211 | 212 | # get positions of all obstacle_list in this agent's reference frame 213 | obstacle_pos = [] 214 | for obstacle in world.obstacle_list: 215 | epos = np.array([obstacle.state.coordinate[0] - agent.state.coordinate[0], 216 | obstacle.state.coordinate[1] - agent.state.coordinate[1]]) 217 | obstacle_pos.append(epos) 218 | 219 | agent_pos = [get_pos(agent)] 220 | 221 | # check in view 222 | 223 | landmark_pos = [] 224 | idx = world.vehicle_list.index(agent) 225 | landmark_pos.append(get_pos(world.landmark_list[idx], agent)) 226 | 227 | # communication of all other agents 228 | other_pos = [] 229 | 230 | for other in world.vehicle_list: 231 | if other is agent: continue 232 | other_pos.append(get_pos(other,agent)) 233 | return np.concatenate(agent_pos + landmark_pos + obstacle_pos + other_pos) 234 | 235 | def updata_callback(self, world:World): 236 | crashed_list = [] 237 | reach_list = [] 238 | for idx in range(len(world.vehicle_list)): 239 | agent = world.vehicle_list[idx] 240 | landmark = world.landmark_list[idx] 241 | if agent.state.crashed: 242 | crashed_list.append(idx) 243 | dist = coord_dist(agent.state.coordinate, landmark.state.coordinate) 244 | if dist < agent.r_safe + landmark.radius: 245 | reach_list.append(idx) 246 | place_wo_conflict(world,crashed_list,reach_list,[]) 247 | 248 | def info(self, agent:Vehicle, world:World): 249 | agent_info:dict = {} 250 | return agent_info 251 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/multi_reach_road.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, hsv2rgb 5 | 6 | 7 | def encode_direction(coord): 8 | x = coord[0] 9 | y = coord[1] 10 | code = (2 if x < y else 0) + (1 if x < -y else 0) 11 | return code 12 | 13 | 14 | def default_vehicle(): 15 | agent = Vehicle() 16 | agent.r_safe = 0.17 17 | agent.L_car = 0.250 18 | agent.W_car = 0.185 19 | agent.L_axis = 0.20 20 | agent.K_vel = 0.361 21 | agent.K_phi = 0.561 22 | agent.dv_dt = 2.17 23 | agent.dphi_dt = 2.10 24 | agent.color = [[1.0, 0.0, 0.0], 1.0] 25 | agent.discrete_table = None 26 | return agent 27 | 28 | 29 | def place_wo_conflict(world: World, agent_list, landmark_list, obstacle_list): 30 | for agent_idx in agent_list: 31 | state = world.vehicle_list[agent_idx].state 32 | state.reset() 33 | state.theta = np.random.uniform(0, 2*3.14159) 34 | safe_count = 1000 35 | while True: 36 | if safe_count < 0: 37 | raise RuntimeError 38 | safe_count -= 1 39 | for agent_idx in agent_list: 40 | # place all agent around the ground 41 | x = np.random.uniform(-2, +2) 42 | y = 2.25*(np.random.randint(2)*2-1) 43 | if np.random.randint(2) == 1: 44 | x, y = y, x 45 | world.vehicle_list[agent_idx].state.coordinate[0] = x 46 | world.vehicle_list[agent_idx].state.coordinate[1] = y 47 | 48 | for landmark_idx in landmark_list: 49 | # place all landmark around the ground 50 | x = np.random.uniform(-2, +2) 51 | y = 2.25*(np.random.randint(2)*2-1) 52 | if np.random.randint(2) == 1: 53 | x, y = y, x 54 | world.landmark_list[landmark_idx].state.coordinate[0] = x 55 | world.landmark_list[landmark_idx].state.coordinate[1] = y 56 | 57 | for obstacle_idx in obstacle_list: 58 | for idx in range(2): 59 | scale = world.field_half_size[idx] 60 | trans = world.field_center[idx] 61 | norm_pos = np.random.uniform(-1, +1) 62 | state = world.obstacle_list[obstacle_idx].state 63 | state.coordinate[idx] = norm_pos * scale + trans 64 | 65 | no_conflict_flag = True 66 | for landmark_idx in landmark_list: 67 | landmark = world.landmark_list[landmark_idx] 68 | agent = world.vehicle_list[landmark_idx] 69 | code1 = encode_direction(landmark.state.coordinate) 70 | code2 = encode_direction(agent.state.coordinate) 71 | if code1 == code2: 72 | no_conflict_flag = False 73 | for obstacle in world.obstacle_list: 74 | dis = coord_dist(landmark.state.coordinate, 75 | obstacle.state.coordinate) 76 | if dis < landmark.radius + obstacle.radius: 77 | no_conflict_flag = False 78 | for agent in world.vehicle_list: 79 | dis = coord_dist(agent.state.coordinate, 80 | landmark.state.coordinate) 81 | if dis < landmark.radius + agent.r_safe: 82 | no_conflict_flag = False 83 | 84 | for obstacle_idx in obstacle_list: 85 | obstacle = world.obstacle_list[obstacle_idx] 86 | for landmark in world.landmark_list: 87 | dis = coord_dist(obstacle.state.coordinate, 88 | landmark.state.coordinate) 89 | if dis < obstacle.radius + landmark.radius: 90 | no_conflict_flag = False 91 | for agent in world.vehicle_list: 92 | dis = coord_dist(agent.state.coordinate, 93 | obstacle.state.coordinate) 94 | if dis < obstacle.radius + agent.r_safe: 95 | no_conflict_flag = False 96 | 97 | for agent_idx in agent_list: 98 | agent = world.vehicle_list[agent_idx] 99 | for obstacle in world.obstacle_list: 100 | dis = coord_dist(agent.state.coordinate, 101 | obstacle.state.coordinate) 102 | if dis < agent.r_safe + obstacle.radius: 103 | no_conflict_flag = False 104 | landmark = world.landmark_list[agent_idx] 105 | dis = coord_dist(agent.state.coordinate, landmark.state.coordinate) 106 | if dis < agent.r_safe + landmark.radius: 107 | no_conflict_flag = False 108 | 109 | code1 = encode_direction(landmark.state.coordinate) 110 | code2 = encode_direction(agent.state.coordinate) 111 | if code1 == code2: 112 | no_conflict_flag = False 113 | if no_conflict_flag: 114 | break 115 | 116 | 117 | def check_conflict(world: World): 118 | obstacle_coord_list = [] 119 | obstacle_radius_list = [] 120 | for obstacle in world.obstacle_list: 121 | obstacle_coord_list.append(obstacle.state.coordinate) 122 | obstacle_radius_list.append(obstacle.radius) 123 | 124 | landmark_coord_list = [] 125 | landmark_radius_list = [] 126 | for landmark in world.landmark_list: 127 | landmark_coord_list.append(landmark.state.coordinate) 128 | landmark_radius_list.append(landmark.radius) 129 | 130 | agent_coord_list = [] 131 | agent_radius_list = [] 132 | for agent in world.vehicle_list: 133 | agent_coord_list.append(agent.state.coordinate) 134 | agent_radius_list.append(agent.r_safe) 135 | 136 | for idx_a in range(len(agent_coord_list)): 137 | for idx in range(len(obstacle_coord_list)): 138 | dis = coord_dist(agent_coord_list[idx_a], obstacle_coord_list[idx]) 139 | if dis < agent_radius_list[idx_a] + obstacle_radius_list[idx]: 140 | return True 141 | for idx_b in range(idx_a+1, len(agent_coord_list)): 142 | dis = coord_dist(agent_coord_list[idx_a], agent_coord_list[idx_b]) 143 | if dis < agent_radius_list[idx_a] + agent_radius_list[idx_b]: 144 | return True 145 | return False 146 | 147 | 148 | class Scenario(BaseScenario): 149 | def make_world(self, args): 150 | self.reward_coef = 1.0 151 | self.control_coef = 1.0 152 | if 'reward_coef' in args.__dict__.keys(): 153 | self.reward_coef = args.reward_coef 154 | if 'reward_coef' in args.__dict__.keys(): 155 | self.control_coef = args.control_coef 156 | # init a world instance 157 | world = World() 158 | 159 | # for simulate real world 160 | world.step_t = args.step_t 161 | world.sim_step = args.sim_step 162 | world.field_range = [-2.5, -2.5, 2.5, 2.5] 163 | 164 | # set world.GUI_port as port dir when usegui is true 165 | if args.usegui: 166 | world.GUI_port = args.gui_port 167 | else: 168 | world.GUI_port = None 169 | 170 | # add 3 agents 171 | agent_number = 1 172 | world.vehicle_list = [] 173 | for idx in range(agent_number): 174 | vehicle_id = 'AKM_'+str(idx+1) 175 | agent = default_vehicle() 176 | agent.vehicle_id = vehicle_id 177 | agent.color = [hsv2rgb((idx/agent_number)*360, 1.0, 1.0), 0.8] 178 | world.vehicle_list.append(agent) 179 | world.vehicle_id_list.append(vehicle_id) 180 | world.data_interface[vehicle_id] = {} 181 | world.data_interface[vehicle_id]['x'] = 0.0 182 | world.data_interface[vehicle_id]['y'] = 0.0 183 | world.data_interface[vehicle_id]['theta'] = 0.0 184 | 185 | # add landmark_list 186 | landmark_number = agent_number 187 | world.landmark_list = [] 188 | for idx in range(landmark_number): 189 | entity = Entity() 190 | entity.radius = 0.2 191 | entity.collideable = False 192 | entity.color = [hsv2rgb((idx/agent_number)*360, 1.0, 1.0), 0.8] 193 | world.landmark_list.append(entity) 194 | 195 | # add obstacle_list 196 | obstacle_number = 1 197 | world.obstacle_list = [] 198 | for idx in range(obstacle_number): 199 | entity = Entity() 200 | entity.radius = 0.2 201 | entity.collideable = True 202 | entity.color = [[0.0, 0.0, 0.0], 1.0] 203 | world.obstacle_list.append(entity) 204 | 205 | world.data_slot['max_step_number'] = 40 206 | # make initial conditions 207 | self.reset_world(world) 208 | return world 209 | 210 | def reset_world(self, world: World): 211 | world.data_slot['total_step_number'] = 0 212 | # set random initial states 213 | agent_list = [idx for idx in range(len(world.vehicle_list))] 214 | landmark_list = [idx for idx in range(len(world.landmark_list))] 215 | obstacle_list = [idx for idx in range(len(world.obstacle_list))] 216 | place_wo_conflict(world, agent_list, landmark_list, obstacle_list) 217 | 218 | def done(self, agent: Vehicle, world: World): 219 | return False 220 | 221 | def reward(self, agent: Vehicle, world: World, old_world: World = None): 222 | rew: float = 0.0 223 | zip_list = zip(old_world.vehicle_list, 224 | world.vehicle_list, 225 | world.landmark_list) 226 | for agent_old, agent_new, landmark in zip_list: 227 | dist_old = coord_dist(agent_old.state.coordinate, 228 | landmark.state.coordinate) 229 | dist_new = coord_dist(agent_new.state.coordinate, 230 | landmark.state.coordinate) 231 | rew += self.reward_coef*(dist_old-dist_new) 232 | if dist_new < agent_new.r_safe + landmark.radius: 233 | rew += 10.0 234 | 235 | for agent_a in world.vehicle_list: 236 | rew += self.control_coef * agent_a.state.ctrl_vel_b**2 237 | 238 | # collision reward 239 | if agent.state.crashed: 240 | rew -= 10.0 241 | 242 | world.data_slot['total_step_number'] += 1 243 | return rew 244 | 245 | def observation(self, agent: Vehicle, world: World): 246 | agent_data = world.data_interface[agent.vehicle_id] 247 | 248 | def get_pos(obj, main_agent: dict = None): 249 | if isinstance(obj, Entity): 250 | obj_x = obj.state.coordinate[0] 251 | obj_y = obj.state.coordinate[1] 252 | else: 253 | obj_x = obj['x'] 254 | obj_y = obj['y'] 255 | 256 | main_shift_x = 0.0 if main_agent is None else main_agent['x'] 257 | main_shift_y = 0.0 if main_agent is None else main_agent['y'] 258 | x = obj_x - main_shift_x 259 | y = obj_y - main_shift_y 260 | 261 | if isinstance(obj, Entity): 262 | return [x, y] 263 | elif isinstance(obj, dict): 264 | ctheta = np.cos(obj['theta']) 265 | stheta = np.sin(obj['theta']) 266 | return [x, y, ctheta, stheta] 267 | else: 268 | raise TypeError 269 | 270 | # get positions of all obstacle_list in this agent's reference frame 271 | obstacle_pos = [] 272 | for obstacle in world.obstacle_list: 273 | obstacle_pos.append(get_pos(obstacle, agent_data)) 274 | 275 | agent_pos = [get_pos(agent_data)] 276 | 277 | # check in view 278 | landmark_pos = [] 279 | idx = world.vehicle_list.index(agent) 280 | landmark_pos.append(get_pos(world.landmark_list[idx])) 281 | 282 | # communication of all other agents 283 | other_pos = [] 284 | 285 | for other in world.vehicle_list: 286 | if other is agent: 287 | continue 288 | other_data = world.data_interface[other.vehicle_id] 289 | other_pos.append(get_pos(other_data, agent_data)) 290 | return np.concatenate(agent_pos + landmark_pos + 291 | obstacle_pos + other_pos) 292 | 293 | def updata_callback(self, world: World): 294 | crashed_list = [] 295 | reach_list = [] 296 | for idx in range(len(world.vehicle_list)): 297 | agent = world.vehicle_list[idx] 298 | landmark = world.landmark_list[idx] 299 | if agent.state.crashed: 300 | crashed_list.append(idx) 301 | dist = coord_dist(agent.state.coordinate, 302 | landmark.state.coordinate) 303 | if dist < landmark.radius: 304 | reach_list.append(idx) 305 | place_wo_conflict(world, crashed_list, reach_list, []) 306 | 307 | def ros_updata_callback(self, world: World): 308 | crashed_list = [] 309 | reach_list = [] 310 | for idx in range(len(world.vehicle_list)): 311 | agent = world.vehicle_list[idx] 312 | landmark = world.landmark_list[idx] 313 | if agent.state.crashed: 314 | crashed_list.append(idx) 315 | dist = coord_dist(agent.state.coordinate, 316 | landmark.state.coordinate) 317 | if dist < landmark.radius: 318 | reach_list.append(idx) 319 | place_wo_conflict(world, [], reach_list, []) 320 | 321 | def info(self, agent: Vehicle, world: World): 322 | agent_info: dict = {} 323 | return agent_info 324 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/sparse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_dist, naive_inference 5 | 6 | def reset_state(agent:Vehicle): 7 | agent.state.vel_b = 0 8 | agent.state.phi = 0 9 | agent.state.ctrl_vel_b = 0 10 | agent.state.ctrl_phi = 0 11 | agent.state.movable = True 12 | agent.state.crashed = False 13 | agent.state.theta 14 | agent.state.coordinate[0] = 0 15 | agent.state.coordinate[1] = 0 16 | 17 | def check_confilict(world:World): 18 | coord_list = [] 19 | radius_list = [] 20 | for landmark in world.landmark_list: 21 | coord_list.append(landmark.state.coordinate) 22 | radius_list.append(landmark.radius) 23 | for obstacle in world.obstacle_list: 24 | coord_list.append(obstacle.state.coordinate) 25 | radius_list.append(obstacle.radius) 26 | for agent in world.vehicle_list: 27 | coord_list.append(agent.state.coordinate) 28 | radius_list.append(agent.r_safe) 29 | 30 | for idx_a in range(len(coord_list)): 31 | for idx_b in range(idx_a+1,len(coord_list)): 32 | dis = coord_dist(coord_list[idx_a], coord_list[idx_b]) 33 | if dis < radius_list[idx_a] + radius_list[idx_b]: 34 | return True 35 | return False 36 | 37 | class Scenario(BaseScenario): 38 | def make_world(self,args): 39 | self.reward_coef = args.reward_coef 40 | self.control_coef = args.control_coef 41 | # init a world instance 42 | world = World() 43 | 44 | #for simulate real world 45 | world.step_t = args.step_t 46 | world.sim_step = args.sim_step 47 | world.field_range = [-2.0,-2.0,2.0,2.0] 48 | 49 | # set world.GUI_port as port dir when usegui is true 50 | if args.usegui: 51 | world.GUI_port = args.gui_port 52 | else: 53 | world.GUI_port = None 54 | 55 | # add 3 agents 56 | agent_number = 1 57 | world.vehicle_list = [] 58 | for idx in range(agent_number): 59 | agent = Vehicle() 60 | agent.r_safe = 0.17 61 | agent.L_car = 0.25 62 | agent.W_car = 0.18 63 | agent.L_axis = 0.2 64 | agent.K_vel = 0.707 65 | agent.K_phi = 0.298 66 | agent.dv_dt = 2.0 67 | agent.dphi_dt = 3.0 68 | agent.color = [[1.0,idx/3.0,idx/3.0],1.0] 69 | agent.discrete_table = None 70 | world.vehicle_list.append(agent) 71 | 72 | 73 | # add landmark_list 74 | landmark_number = 1 75 | world.landmark_list = [] 76 | for idx in range(landmark_number): 77 | entity = Entity() 78 | entity.radius = 0.4 79 | entity.collideable = False 80 | entity.color = [[0.0,1.0,0.0],0.1] 81 | world.landmark_list.append(entity) 82 | 83 | # add obstacle_list 84 | obstacle_number = 0 85 | world.obstacle_list = [] 86 | for idx in range(obstacle_number): 87 | entity = Entity() 88 | entity.radius = 0.2 89 | entity.collideable = True 90 | entity.color = [[0.0,0.0,0.0],1.0] 91 | world.obstacle_list.append(entity) 92 | 93 | world.data_slot['max_step_number'] = 40 94 | # make initial conditions 95 | self.reset_world(world) 96 | return world 97 | 98 | def reset_world(self, world:World): 99 | world.data_slot['total_step_number'] = 0 100 | # set random initial states 101 | for agent in world.vehicle_list: 102 | reset_state(agent) 103 | agent.state.theta = np.random.uniform(0,2*3.14159) 104 | 105 | 106 | # place all landmark,obstacle and vehicles in the field with out conflict 107 | safe_count = 1000 108 | while True: 109 | if safe_count == 0: 110 | print('can not place objects with no conflict') 111 | safe_count -= 1 112 | for landmark in world.landmark_list: 113 | for idx in range(2): 114 | scale = world.field_half_size[idx] 115 | trans = world.field_center[idx] 116 | norm_pos = np.random.uniform(-1,+1) 117 | landmark.state.coordinate[idx] = norm_pos * scale + trans 118 | 119 | for obstacle in world.obstacle_list: 120 | for idx in range(2): 121 | scale = world.field_half_size[idx] 122 | trans = world.field_center[idx] 123 | norm_pos = np.random.uniform(-1,+1) 124 | obstacle.state.coordinate[idx] = norm_pos * scale + trans 125 | 126 | for agent in world.vehicle_list: 127 | for idx in range(2): 128 | scale = world.field_half_size[idx] 129 | trans = world.field_center[idx] 130 | norm_pos = np.random.uniform(-1,+1) 131 | agent.state.coordinate[idx] = norm_pos * scale + trans 132 | 133 | conflict = check_confilict(world) 134 | if not conflict: 135 | break 136 | 137 | 138 | 139 | def done(self, agent:Vehicle, world:World): 140 | return False 141 | 142 | def reward(self, agent:Vehicle, world:World, old_world:World = None): 143 | rew:float = 0.0 144 | 145 | # reach reward 146 | Allreach = True 147 | 148 | for agent_old, agent_new, landmark in zip(old_world.vehicle_list, world.vehicle_list, world.landmark_list): 149 | dist_old = coord_dist(agent_old.state.coordinate, landmark.state.coordinate) 150 | dist_new = coord_dist(agent_new.state.coordinate, landmark.state.coordinate) 151 | rew += self.reward_coef*(dist_old-dist_new) 152 | if dist_new < landmark.radius: 153 | rew += 10.0 154 | 155 | for agent_a in world.vehicle_list: 156 | rew += self.control_coef * agent_a.state.ctrl_vel_b**2 157 | 158 | # collision reward 159 | if agent.state.crashed: 160 | rew -= 10.0 161 | 162 | world.data_slot['total_step_number'] += 1 163 | return rew 164 | 165 | def observation(self,agent:Vehicle, world:World): 166 | def get_pos(obj , main_agent:Vehicle = None): 167 | main_shift_x = 0.0 if main_agent is None else main_agent.state.coordinate[0] 168 | main_shift_y = 0.0 if main_agent is None else main_agent.state.coordinate[1] 169 | x = obj.state.coordinate[0] - main_shift_x 170 | y = obj.state.coordinate[1] - main_shift_y 171 | 172 | if isinstance(obj,Vehicle): 173 | ctheta = np.cos(obj.state.theta) 174 | stheta = np.sin(obj.state.theta) 175 | return [x, y, ctheta, stheta] 176 | elif isinstance(obj,Entity): 177 | return [x, y] 178 | else: 179 | raise TypeError 180 | 181 | 182 | # get positions of all obstacle_list in this agent's reference frame 183 | obstacle_pos = [] 184 | for obstacle in world.obstacle_list: 185 | epos = np.array([obstacle.state.coordinate[0] - agent.state.coordinate[0], 186 | obstacle.state.coordinate[1] - agent.state.coordinate[1]]) 187 | obstacle_pos.append(epos) 188 | 189 | agent_pos = [get_pos(agent)] 190 | 191 | # check in view 192 | landmark_pos = [] 193 | for landmark in world.landmark_list: 194 | landmark_pos.append(get_pos(landmark)) 195 | 196 | 197 | # communication of all other agents 198 | other_pos = [] 199 | 200 | for other in world.vehicle_list: 201 | if other is agent: continue 202 | other_pos.append(get_pos(other,agent)) 203 | return np.concatenate(agent_pos + landmark_pos + obstacle_pos + other_pos) 204 | 205 | def updata_callback(self, world:World): 206 | crashed_list = [] 207 | reach_list = [] 208 | for agent, landmark in zip(world.vehicle_list,world.landmark_list): 209 | if agent.state.crashed: 210 | crashed_list.append(agent) 211 | dist = coord_dist(agent.state.coordinate, landmark.state.coordinate) 212 | if dist < landmark.radius: 213 | reach_list.append(landmark) 214 | for agent in crashed_list: 215 | reset_state(agent) 216 | agent.state.theta = np.random.uniform(0,2*3.14159) 217 | check_confilict(world) 218 | safe_count = 1000 219 | while True: 220 | if safe_count == 0: 221 | print('can not place objects with no conflict') 222 | safe_count -= 1 223 | for landmark in reach_list: 224 | for idx in range(2): 225 | scale = world.field_half_size[idx] 226 | trans = world.field_center[idx] 227 | norm_pos = np.random.uniform(-1,+1) 228 | landmark.state.coordinate[idx] = norm_pos * scale + trans 229 | 230 | for agent in crashed_list: 231 | for idx in range(2): 232 | scale = world.field_half_size[idx] 233 | trans = world.field_center[idx] 234 | norm_pos = np.random.uniform(-1,+1) 235 | agent.state.coordinate[idx] = norm_pos * scale + trans 236 | 237 | conflict = check_confilict(world) 238 | if not conflict: 239 | break 240 | 241 | 242 | def info(self, agent:Vehicle, world:World): 243 | agent_info:dict = {} 244 | return agent_info 245 | -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/yyz.py: -------------------------------------------------------------------------------- 1 | # one agent test script 2 | 3 | import numpy as np 4 | from MultiVehicleEnv.basic import World, Vehicle,Obstacle,Landmark 5 | from MultiVehicleEnv.scenario import BaseScenario 6 | from MultiVehicleEnv.utils import coord_data_dist, naive_inference, hsv2rgb 7 | 8 | import math 9 | from numpy.linalg import norm 10 | 11 | def default_vehicle(): 12 | agent = Vehicle() 13 | agent.r_safe = 0.17 14 | agent.L_car = 0.25 15 | agent.W_car = 0.18 16 | agent.L_axis = 0.2 17 | agent.K_vel = 0.707 18 | agent.K_phi = 0.596 19 | agent.dv_dt = 10.0 20 | agent.dphi_dt = 10.0 21 | agent.color = [[1.0,0.0,0.0],1.0] 22 | return agent 23 | 24 | 25 | def f_Relative(groundtruth, P, N): 26 | ''' 27 | Inputs: 28 | groundtruth 29 | P: predicted topo 30 | N: number of nodes 31 | Outputs: 32 | P_final: topo rotated and transited 33 | Gamma: rotation mat 34 | t: transit argument 35 | ''' 36 | I_Na = np.eye(N) 37 | # SVD 38 | M_h = P.T 39 | L = I_Na - np.ones([N, N]) * 1/N 40 | M = groundtruth.T 41 | S = np.matmul( np.matmul(M_h.T, L), M ) 42 | U1, Lambda1, V1 = np.linalg.svd(S) 43 | 44 | #TODO: why should we use.T 45 | U1 = U1.T 46 | V1 = V1.T 47 | 48 | # calculate gamma mat and T mat 49 | gamma = np.matmul(V1, U1.T) 50 | t = np.matmul((M.T - np.matmul(gamma, M_h.T)), np.ones([N,1])) * 1 / N 51 | 52 | # V mat 53 | v_x = np.kron(np.ones([N,1]), np.expand_dims(np.array([1,0]),axis=-1)) 54 | v_y = np.kron(np.ones([N,1]), np.expand_dims(np.array([0,1]),axis=-1)) 55 | 56 | # rotation correction and transit correction 57 | P_array = P 58 | P_final = np.expand_dims(np.matmul(np.kron(I_Na, gamma), P_array.flatten(order='F')), axis=-1) + np.matmul(np.concatenate([v_x, v_y], axis=1), t) 59 | 60 | return P_final, gamma, t 61 | 62 | 63 | 64 | def error_rel_g(p1, p2, n): 65 | p2_rel,_,_ = f_Relative(p1, p2, n) 66 | error2 = p2_rel - np.expand_dims(p1.reshape(-1, order='F'), axis=-1) 67 | error_rel = np.sqrt(1/n * np.sum(error2 ** 2)) 68 | 69 | return error_rel 70 | 71 | class Scenario(BaseScenario): 72 | def make_world(self,args): 73 | # init a world instance 74 | self.args = args 75 | world = World() 76 | 77 | #for simulate real world 78 | world.step_t = args.step_t 79 | world.sim_step = args.sim_step 80 | world.field_range = [-2.0,-2.0,2.0,2.0] 81 | 82 | num_agent_ray = 60 83 | 84 | 85 | # set world.GUI_port as port dir when usegui is true 86 | if args.usegui: 87 | world.GUI_port = args.guiport 88 | else: 89 | world.GUI_port = None 90 | 91 | # add 4 agents 92 | agent_number = args.num_agents 93 | 94 | # ideal formation topo side length 95 | world.ideal_side_len = args.ideal_side_len 96 | 97 | # calculate the ideal formation topo 98 | world.ideal_topo_point = [[], []] 99 | for i in range(agent_number): 100 | world.ideal_topo_point[0].append(world.ideal_side_len / np.sqrt(2) * np.cos(i/agent_number*2*np.pi)) 101 | world.ideal_topo_point[1].append(world.ideal_side_len / np.sqrt(2) * np.sin(i/agent_number*2*np.pi)) 102 | 103 | world.vehicle_list = [] 104 | for idx in range(agent_number): 105 | vehicle_id = 'AKM_'+str(idx+1) 106 | agent = default_vehicle() 107 | agent.vehicle_id = vehicle_id 108 | agent.ray = np.zeros((num_agent_ray, 2)) 109 | agent.min_ray = np.array([0, len(agent.ray)//2]) 110 | agent.color = [hsv2rgb((idx/agent_number)*360,1.0,1.0), 0.8] 111 | world.vehicle_list.append(agent) 112 | world.vehicle_id_list.append(vehicle_id) 113 | world.data_interface[vehicle_id] = {} 114 | world.data_interface[vehicle_id]['x'] = 0.0 115 | world.data_interface[vehicle_id]['y'] = 0.0 116 | world.data_interface[vehicle_id]['theta'] = 0.0 117 | 118 | 119 | 120 | # add landmark_list 121 | landmark_number = args.num_landmarks 122 | world.landmark_list = [] 123 | for idx in range(landmark_number): 124 | entity = Landmark() 125 | entity.radius = 0.4 126 | entity.collideable = False 127 | entity.color = [[0.0,1.0,0.0],0.1] 128 | world.landmark_list.append(entity) 129 | 130 | # add obstacle_list 131 | obstacle_number = args.num_obstacles 132 | world.obstacle_list = [] 133 | for idx in range(obstacle_number): 134 | entity = Obstacle() 135 | entity.radius = 0.2 136 | entity.collideable = True 137 | entity.color = [[0.0,0.0,0.0],1.0] 138 | world.obstacle_list.append(entity) 139 | 140 | world.data_slot['max_step_number'] = 20 141 | # make initial conditions 142 | self.reset_world(world) 143 | return world 144 | 145 | def reset_world(self, world:World): 146 | world.data_slot['total_step_number'] = 0 147 | # set random initial states 148 | for agent in world.vehicle_list: 149 | agent.state.theta = np.random.uniform(0,1/2*3.14159) 150 | #agent.state.theta = 1/4 * 3.14159 151 | agent.state.vel_b = 0 152 | agent.state.phi = 0 153 | agent.state.ctrl_vel_b = 0 154 | agent.state.ctrl_phi = 0 155 | agent.state.movable = True 156 | agent.state.crashed = False 157 | 158 | # place all landmark,obstacle and vehicles in the field with out conflict 159 | conflict = True 160 | while conflict: 161 | conflict = False 162 | all_circle = [] 163 | for landmark in world.landmark_list: 164 | for idx in range(2): 165 | scale = world.field_half_size[idx] 166 | trans = world.field_center[idx] 167 | #norm_pos = np.random.uniform(-0.5,+0.5) 168 | norm_pos = 8 169 | norm_pos = norm_pos + (0.5 if norm_pos>0 else -0.5) 170 | landmark.state.coordinate[idx] = norm_pos * scale + trans 171 | all_circle.append((landmark.state.coordinate[0],landmark.state.coordinate[1],landmark.radius)) 172 | 173 | for obstacle in world.obstacle_list: 174 | for idx in range(2): 175 | scale = world.field_half_size[idx] 176 | trans = world.field_center[idx] 177 | norm_pos = np.random.uniform(-1,+1) 178 | obstacle.state.coordinate[idx] = norm_pos * scale*0.5 + trans # Why? 179 | all_circle.append((obstacle.state.coordinate[0],obstacle.state.coordinate[1],obstacle.radius)) 180 | 181 | 182 | for i, agent in enumerate(world.vehicle_list): 183 | for idx in range(2): 184 | scale = world.field_half_size[idx] 185 | trans = world.field_center[idx] 186 | norm_pos = np.random.uniform(-1,1) 187 | #norm_pos = world.ideal_topo_point[idx][i] 188 | agent.state.coordinate[idx] = norm_pos * scale + trans 189 | #agent.state.coordinate[idx] = norm_pos 190 | all_circle.append((agent.state.coordinate[0],agent.state.coordinate[1],agent.r_safe)) 191 | 192 | for idx_a in range(len(all_circle)): 193 | for idx_b in range(idx_a+1,len(all_circle)): 194 | x_a = all_circle[idx_a][0] 195 | y_a = all_circle[idx_a][1] 196 | r_a = all_circle[idx_a][2] 197 | x_b = all_circle[idx_b][0] 198 | y_b = all_circle[idx_b][1] 199 | r_b = all_circle[idx_b][2] 200 | dis = ((x_a - x_b)**2 + (y_a - y_b)**2)**0.5 201 | if dis < r_a + r_b: 202 | conflict = True 203 | break 204 | if conflict: 205 | break 206 | for agent in world.vehicle_list: 207 | agent_data = world.data_interface[agent.vehicle_id] 208 | target_x = world.landmark_list[0].state.coordinate[0] 209 | target_y = world.landmark_list[0].state.coordinate[1] 210 | target_data = {'x':target_x, 'y':target_y} 211 | agent.dis2goal = coord_data_dist(agent_data, target_data) 212 | agent.dis2goal_prev = agent.dis2goal 213 | 214 | for landmark in world.landmark_list: 215 | landmark.color[1] = 0.1 216 | # set real landmark and make it color solid 217 | world.data_slot['real_landmark'] = np.random.randint(len(world.landmark_list)) 218 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 219 | real_landmark.color[1] = 1.0 220 | 221 | # encode 4 directions into [0,1,2,3] 222 | def encode_direction(direction): 223 | if direction[0] > 0 and direction[1] > 0: 224 | return 0 225 | if direction[0] < 0 and direction[1] > 0: 226 | return 1 227 | if direction[0] < 0 and direction[1] < 0: 228 | return 2 229 | if direction[0] > 0 and direction[1] < 0: 230 | return 3 231 | # decode 4 direction code [0,1,2,3] into onehot vector 232 | world.data_slot['direction_decoder'] = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]] 233 | # give each agent a direction code as a human hint 234 | for agent in world.vehicle_list: 235 | direction = [real_landmark.state.coordinate[0] - agent.state.coordinate[0], 236 | real_landmark.state.coordinate[1] - agent.state.coordinate[1]] 237 | agent.data_slot['direction_obs'] = world.data_slot['direction_decoder'][encode_direction(direction)] 238 | 239 | 240 | 241 | 242 | 243 | def reward(self, agent:Vehicle, world:World, old_world:World = None): 244 | 245 | return 0 246 | 247 | def get_relAngle(self, center, target): 248 | ang = math.atan2(target.state.coordinate[1] - center['y'], target.state.coordinate[0] - center['x']) - center['theta'] 249 | if abs(ang) > np.pi: 250 | ang -= np.sign(ang) * 2 * np.pi 251 | return ang 252 | 253 | def formation_reward(self, agent, world): 254 | agent_data = world.data_interface[agent.vehicle_id] 255 | pos_rel = [[],[]] 256 | for any_agent in world.vehicle_list: 257 | any_agent_data = world.data_interface[any_agent.vehicle_id] 258 | pos_rel[0].append(any_agent_data['x'] - agent_data['x']) 259 | pos_rel[1].append(any_agent_data['y'] - agent_data['y']) 260 | topo_err = error_rel_g(np.array(world.ideal_topo_point), np.array(pos_rel), len(world.vehicle_list)) 261 | 262 | return -topo_err 263 | 264 | def observation(self,agent:Vehicle, world:World): 265 | 266 | agent_data = world.data_interface[agent.vehicle_id] 267 | print(agent_data) 268 | agent.dis2goal_prev = agent.dis2goal 269 | 270 | target_x = world.landmark_list[0].state.coordinate[0] 271 | target_y = world.landmark_list[0].state.coordinate[1] 272 | target_data = {'x':target_x, 'y':target_y} 273 | agent.dis2goal = coord_data_dist(agent_data, target_data) 274 | agent.ang2goal = self.get_relAngle(agent_data, world.landmark_list[0]) 275 | # get positions of all obstacle_list in this agent's reference frame 276 | 277 | 278 | 279 | agt_dis = [] 280 | agt_ang = [] 281 | target_dis = [np.array([agent.dis2goal])] 282 | target_ang = [np.array([self.get_relAngle(agent_data, world.landmark_list[0])])] 283 | #formation_err = [np.array([self.formation_reward(agent, world)])] 284 | formation_err = [np.array([0])] 285 | 286 | for entity in world.vehicle_list: 287 | if entity == agent: 288 | continue 289 | dis2agt = np.array([norm(np.array(entity.state.coordinate) - np.array(agent.state.coordinate))]) 290 | ang = self.get_relAngle(agent_data, entity) 291 | agt_dis.append(dis2agt) 292 | agt_ang.append(np.array([ang])) 293 | 294 | #for _ in range(2): 295 | # agt_dis.append(np.array([0.0])) 296 | # agt_ang.append(np.array([0.0])) 297 | #print(agt_dis , agt_ang , target_ang , target_dis , formation_err) 298 | return np.concatenate(agt_dis + agt_ang + target_ang + target_dis + formation_err) 299 | 300 | 301 | def info(self, agent:Vehicle, world:World): 302 | agent_info:dict = {} 303 | return agent_info -------------------------------------------------------------------------------- /src/MultiVehicleEnv/scenarios/yyz2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from MultiVehicleEnv.basic import World, Vehicle, Entity 3 | from MultiVehicleEnv.scenario import BaseScenario 4 | from MultiVehicleEnv.utils import coord_data_dist, naive_inference, hsv2rgb 5 | 6 | import math 7 | from numpy.linalg import norm 8 | 9 | def default_vehicle(): 10 | agent = Vehicle() 11 | agent.r_safe = 0.17 12 | agent.L_car = 0.25 13 | agent.W_car = 0.18 14 | agent.L_axis = 0.2 15 | agent.K_vel = 0.707 16 | agent.K_phi = 0.596 17 | agent.dv_dt = 10.0 18 | agent.dphi_dt = 10.0 19 | agent.color = [[1.0,0.0,0.0],1.0] 20 | return agent 21 | 22 | 23 | def f_Relative(groundtruth, P, N): 24 | ''' 25 | Inputs: 26 | groundtruth 27 | P: predicted topo 28 | N: number of nodes 29 | Outputs: 30 | P_final: topo rotated and transited 31 | Gamma: rotation mat 32 | t: transit argument 33 | ''' 34 | I_Na = np.eye(N) 35 | # SVD 36 | M_h = P.T 37 | L = I_Na - np.ones([N, N]) * 1/N 38 | M = groundtruth.T 39 | S = np.matmul( np.matmul(M_h.T, L), M ) 40 | U1, Lambda1, V1 = np.linalg.svd(S) 41 | 42 | #TODO: why should we use.T 43 | U1 = U1.T 44 | V1 = V1.T 45 | 46 | # calculate gamma mat and T mat 47 | gamma = np.matmul(V1, U1.T) 48 | t = np.matmul((M.T - np.matmul(gamma, M_h.T)), np.ones([N,1])) * 1 / N 49 | 50 | # V mat 51 | v_x = np.kron(np.ones([N,1]), np.expand_dims(np.array([1,0]),axis=-1)) 52 | v_y = np.kron(np.ones([N,1]), np.expand_dims(np.array([0,1]),axis=-1)) 53 | 54 | # rotation correction and transit correction 55 | P_array = P 56 | P_final = np.expand_dims(np.matmul(np.kron(I_Na, gamma), P_array.flatten(order='F')), axis=-1) + np.matmul(np.concatenate([v_x, v_y], axis=1), t) 57 | 58 | return P_final, gamma, t 59 | 60 | 61 | 62 | def error_rel_g(p1, p2, n): 63 | p2_rel,_,_ = f_Relative(p1, p2, n) 64 | error2 = p2_rel - np.expand_dims(p1.reshape(-1, order='F'), axis=-1) 65 | error_rel = np.sqrt(1/n * np.sum(error2 ** 2)) 66 | 67 | return error_rel 68 | 69 | class Scenario(BaseScenario): 70 | def make_world(self,args): 71 | # init a world instance 72 | self.args = args 73 | world = World() 74 | 75 | #for simulate real world 76 | world.step_t = args.step_t 77 | world.sim_step = args.sim_step 78 | world.field_range = [-2.0,-2.0,2.0,2.0] 79 | 80 | num_agent_ray = 60 81 | 82 | 83 | # set world.GUI_port as port dir when usegui is true 84 | if args.usegui: 85 | world.GUI_port = args.guiport 86 | else: 87 | world.GUI_port = None 88 | 89 | # add 4 agents 90 | agent_number = args.num_agents 91 | 92 | # ideal formation topo side length 93 | world.ideal_side_len = args.ideal_side_len 94 | 95 | # calculate the ideal formation topo 96 | world.ideal_topo_point = [[], []] 97 | for i in range(agent_number): 98 | world.ideal_topo_point[0].append(world.ideal_side_len / np.sqrt(2) * np.cos(i/agent_number*2*np.pi)) 99 | world.ideal_topo_point[1].append(world.ideal_side_len / np.sqrt(2) * np.sin(i/agent_number*2*np.pi)) 100 | 101 | world.vehicle_list = [] 102 | for idx in range(agent_number): 103 | vehicle_id = 'AKM_'+str(idx+1) 104 | agent = default_vehicle() 105 | agent.vehicle_id = vehicle_id 106 | agent.ray = np.zeros((num_agent_ray, 2)) 107 | agent.min_ray = np.array([0, len(agent.ray)//2]) 108 | agent.color = [hsv2rgb((idx/agent_number)*360,1.0,1.0), 0.8] 109 | world.vehicle_list.append(agent) 110 | world.vehicle_id_list.append(vehicle_id) 111 | world.data_interface[vehicle_id] = {} 112 | world.data_interface[vehicle_id]['x'] = 0.0 113 | world.data_interface[vehicle_id]['y'] = 0.0 114 | world.data_interface[vehicle_id]['theta'] = 0.0 115 | 116 | 117 | 118 | # add landmark_list 119 | landmark_number = args.num_landmarks 120 | world.landmark_list = [] 121 | for idx in range(landmark_number): 122 | entity = Entity() 123 | entity.radius = 0.4 124 | entity.collideable = False 125 | entity.color = [[0.0,1.0,0.0],0.1] 126 | world.landmark_list.append(entity) 127 | 128 | # add obstacle_list 129 | obstacle_number = args.num_obstacles 130 | world.obstacle_list = [] 131 | for idx in range(obstacle_number): 132 | entity = Entity() 133 | entity.radius = 0.2 134 | entity.collideable = True 135 | entity.color = [[0.0,0.0,0.0],1.0] 136 | world.obstacle_list.append(entity) 137 | 138 | world.data_slot['max_step_number'] = 20 139 | # make initial conditions 140 | self.reset_world(world) 141 | return world 142 | 143 | def reset_world(self, world:World): 144 | world.data_slot['total_step_number'] = 0 145 | # set random initial states 146 | for agent in world.vehicle_list: 147 | agent.state.theta = np.random.uniform(0,1/2*3.14159) 148 | #agent.state.theta = 1/4 * 3.14159 149 | agent.state.vel_b = 0 150 | agent.state.phi = 0 151 | agent.state.ctrl_vel_b = 0 152 | agent.state.ctrl_phi = 0 153 | agent.state.movable = True 154 | agent.state.crashed = False 155 | 156 | # place all landmark,obstacle and vehicles in the field with out conflict 157 | conflict = True 158 | while conflict: 159 | conflict = False 160 | all_circle = [] 161 | for landmark in world.landmark_list: 162 | for idx in range(2): 163 | scale = world.field_half_size[idx] 164 | trans = world.field_center[idx] 165 | #norm_pos = np.random.uniform(-0.5,+0.5) 166 | norm_pos = 8 167 | norm_pos = norm_pos + (0.5 if norm_pos>0 else -0.5) 168 | landmark.state.coordinate[idx] = norm_pos * scale + trans 169 | all_circle.append((landmark.state.coordinate[0],landmark.state.coordinate[1],landmark.radius)) 170 | 171 | for obstacle in world.obstacle_list: 172 | for idx in range(2): 173 | scale = world.field_half_size[idx] 174 | trans = world.field_center[idx] 175 | norm_pos = np.random.uniform(-1,+1) 176 | obstacle.state.coordinate[idx] = norm_pos * scale*0.5 + trans # Why? 177 | all_circle.append((obstacle.state.coordinate[0],obstacle.state.coordinate[1],obstacle.radius)) 178 | 179 | 180 | for i, agent in enumerate(world.vehicle_list): 181 | for idx in range(2): 182 | scale = world.field_half_size[idx] 183 | trans = world.field_center[idx] 184 | norm_pos = np.random.uniform(-1,1) 185 | #norm_pos = world.ideal_topo_point[idx][i] 186 | agent.state.coordinate[idx] = norm_pos * scale + trans 187 | #agent.state.coordinate[idx] = norm_pos 188 | all_circle.append((agent.state.coordinate[0],agent.state.coordinate[1],agent.r_safe)) 189 | 190 | for idx_a in range(len(all_circle)): 191 | for idx_b in range(idx_a+1,len(all_circle)): 192 | x_a = all_circle[idx_a][0] 193 | y_a = all_circle[idx_a][1] 194 | r_a = all_circle[idx_a][2] 195 | x_b = all_circle[idx_b][0] 196 | y_b = all_circle[idx_b][1] 197 | r_b = all_circle[idx_b][2] 198 | dis = ((x_a - x_b)**2 + (y_a - y_b)**2)**0.5 199 | if dis < r_a + r_b: 200 | conflict = True 201 | break 202 | if conflict: 203 | break 204 | 205 | for agent in world.vehicle_list: 206 | agent_data = world.data_interface[agent.vehicle_id] 207 | target_x = world.landmark_list[0].state.coordinate[0] 208 | target_y = world.landmark_list[0].state.coordinate[1] 209 | target_data = {'x':target_x, 'y':target_y} 210 | agent.dis2goal = coord_data_dist(agent_data, target_data) 211 | agent.dis2goal_prev = agent.dis2goal 212 | 213 | for landmark in world.landmark_list: 214 | landmark.color[1] = 0.1 215 | # set real landmark and make it color solid 216 | world.data_slot['real_landmark'] = np.random.randint(len(world.landmark_list)) 217 | real_landmark = world.landmark_list[world.data_slot['real_landmark']] 218 | real_landmark.color[1] = 1.0 219 | 220 | # encode 4 directions into [0,1,2,3] 221 | def encode_direction(direction): 222 | if direction[0] > 0 and direction[1] > 0: 223 | return 0 224 | if direction[0] < 0 and direction[1] > 0: 225 | return 1 226 | if direction[0] < 0 and direction[1] < 0: 227 | return 2 228 | if direction[0] > 0 and direction[1] < 0: 229 | return 3 230 | # decode 4 direction code [0,1,2,3] into onehot vector 231 | world.data_slot['direction_decoder'] = [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]] 232 | # give each agent a direction code as a human hint 233 | for agent in world.vehicle_list: 234 | direction = [real_landmark.state.coordinate[0] - agent.state.coordinate[0], 235 | real_landmark.state.coordinate[1] - agent.state.coordinate[1]] 236 | agent.data_slot['direction_obs'] = world.data_slot['direction_decoder'][encode_direction(direction)] 237 | 238 | 239 | 240 | 241 | 242 | def reward(self, agent:Vehicle, world:World, old_world:World = None): 243 | 244 | return 0 245 | 246 | def get_relAngle(self, center, target): 247 | ang = math.atan2(target.state.coordinate[1] - center['y'], target.state.coordinate[0] - center['x']) - center['theta'] 248 | if abs(ang) > np.pi: 249 | ang -= np.sign(ang) * 2 * np.pi 250 | return ang 251 | 252 | def formation_reward(self, agent, world): 253 | agent_data = world.data_interface[agent.vehicle_id] 254 | pos_rel = [[],[]] 255 | for any_agent in world.vehicle_list: 256 | any_agent_data = world.data_interface[any_agent.vehicle_id] 257 | pos_rel[0].append(any_agent_data['x'] - agent_data['x']) 258 | pos_rel[1].append(any_agent_data['y'] - agent_data['y']) 259 | topo_err = error_rel_g(np.array(world.ideal_topo_point), np.array(pos_rel), len(world.vehicle_list)) 260 | 261 | return -topo_err 262 | 263 | def observation(self,agent:Vehicle, world:World): 264 | 265 | agent_data = world.data_interface[agent.vehicle_id] 266 | print(agent_data) 267 | agent.dis2goal_prev = agent.dis2goal 268 | 269 | target_x = world.landmark_list[0].state.coordinate[0] 270 | target_y = world.landmark_list[0].state.coordinate[1] 271 | target_data = {'x':target_x, 'y':target_y} 272 | agent.dis2goal = coord_data_dist(agent_data, target_data) 273 | agent.ang2goal = self.get_relAngle(agent_data, world.landmark_list[0]) 274 | # get positions of all obstacle_list in this agent's reference frame 275 | 276 | 277 | 278 | agt_dis = [] 279 | agt_ang = [] 280 | target_dis = [np.array([agent.dis2goal])] 281 | target_ang = [np.array([self.get_relAngle(agent_data, world.landmark_list[0])])] 282 | formation_err = [np.array([self.formation_reward(agent, world)])] 283 | 284 | for entity in world.vehicle_list: 285 | if entity == agent: 286 | continue 287 | dis2agt = np.array([norm(np.array(entity.state.coordinate) - np.array(agent.state.coordinate))]) 288 | ang = self.get_relAngle(agent_data, entity) 289 | agt_dis.append(dis2agt) 290 | agt_ang.append(np.array([ang])) 291 | 292 | # for _ in range(2): 293 | # agt_dis.append(np.array([0.0])) 294 | # agt_ang.append(np.array([0.0])) 295 | print(agt_dis , agt_ang , target_ang , target_dis , formation_err) 296 | return np.concatenate(agt_dis + agt_ang + target_ang + target_dis + formation_err) 297 | 298 | 299 | def info(self, agent:Vehicle, world:World): 300 | agent_info:dict = {} 301 | return agent_info -------------------------------------------------------------------------------- /src/MultiVehicleEnv/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import * 3 | 4 | def coord_dist(coord_a:List[float],coord_b:List[float]) -> float: 5 | return ((coord_a[0]-coord_b[0])**2 + (coord_a[1]-coord_b[1])**2)**0.5 6 | 7 | def coord_data_dist(coord_a,coord_b) -> float: 8 | return ((coord_a['x']-coord_b['x'])**2 + (coord_a['y']-coord_b['y'])**2)**0.5 9 | 10 | 11 | def naive_inference(target_x:float, target_y:float, theta:float, 12 | dist:float=0.0, min_r:float=0.0, delta_theta = None) -> Tuple[float,float]: 13 | r = (target_x**2+target_y**2)**0.5 14 | relate_theta = np.arctan2(target_y,target_x)-theta 15 | yt = np.sin(relate_theta)*r 16 | xt = np.cos(relate_theta)*r 17 | d = abs(np.tan(relate_theta)*r) 18 | if d < dist*0.5: 19 | vel = np.sign(xt) 20 | phi = 0 21 | else: 22 | in_min_r = (xt**2+(abs(yt)-min_r)**2)< min_r**2 23 | vel = -1 if (bool(in_min_r) ^ bool(xt<0)) else 1 24 | phi = -1 if (bool(in_min_r) ^ bool(yt<0)) else 1 25 | if delta_theta is not None: 26 | if abs(np.tan(relate_theta)) < np.tan(delta_theta): 27 | phi = phi*abs(np.tan(relate_theta))/np.tan(delta_theta) 28 | 29 | 30 | return vel,phi 31 | 32 | def hsv2rgb(h, s, v): 33 | h = float(h) 34 | s = float(s) 35 | v = float(v) 36 | h60 = h / 60.0 37 | h60f = np.floor(h60) 38 | hi = int(h60f) % 6 39 | f = h60 - h60f 40 | p = v * (1 - s) 41 | q = v * (1 - f * s) 42 | t = v * (1 - (1 - f) * s) 43 | r, g, b = 0, 0, 0 44 | if hi == 0: r, g, b = v, t, p 45 | elif hi == 1: r, g, b = q, v, p 46 | elif hi == 2: r, g, b = p, v, t 47 | elif hi == 3: r, g, b = p, q, v 48 | elif hi == 4: r, g, b = t, p, v 49 | elif hi == 5: r, g, b = v, p, q 50 | return r, g, b -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='MultiVehicleEnv', 4 | version='0.0.1', 5 | description='Multi-Vehicle Environment', 6 | author='Jiantao Qiu', 7 | author_email='qjt15@mails.tsinghua.edu.cn', 8 | packages=find_packages(), 9 | include_package_data=True, 10 | zip_safe=False 11 | #install_requires=['gym', 'numpy-stl', 'numpy', 'six', 'pyglet'] 12 | ) 13 | -------------------------------------------------------------------------------- /test/test_GUI_threading.py: -------------------------------------------------------------------------------- 1 | from MultiVehicleEnv.GUI import GUI 2 | import argparse 3 | import time 4 | import threading 5 | 6 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 7 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 8 | parser.add_argument('--fps',type=int,default=24) 9 | args = parser.parse_args() 10 | 11 | GUI_instance = GUI(port_type = 'file',gui_port = '/dev/shm/gui_port2' , fps = 24) 12 | GUI_t = threading.Thread(target=GUI_instance._render_target()) 13 | GUI_t.setDaemon(True) 14 | GUI_t.start() 15 | GUI_t.join() 16 | -------------------------------------------------------------------------------- /test/test_eval_auto_with_gui.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import argparse 4 | from MultiVehicleEnv.evaluate import EvaluateWrap 5 | 6 | 7 | def make_env(scenario_name, args): 8 | from MultiVehicleEnv.environment import MultiVehicleEnv 9 | import MultiVehicleEnv.scenarios as scenarios 10 | 11 | # load scenario from script 12 | scenario = scenarios.load(scenario_name + ".py").Scenario() 13 | # create world 14 | world = scenario.make_world(args) 15 | # create multiagent environment 16 | 17 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info) 18 | return env 19 | 20 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 21 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 22 | parser.add_argument('--fps',type=int,default=24) 23 | parser.add_argument('--usegui', action='store_true', default=True) 24 | parser.add_argument('--step-t',type=float,default=1.0) 25 | parser.add_argument('--sim-step',type=int,default=100) 26 | parser.add_argument('--direction_alpha', type=float, default=1.0) 27 | parser.add_argument('--add_direction_encoder',type=str, default='keyboard') 28 | 29 | 30 | args = parser.parse_args() 31 | def policy(obs:np.ndarray)->int: 32 | return [random.random()*2-1, random.random()*2-1] 33 | env = make_env('multi_reach', args) 34 | policy_list = [policy,policy,policy] 35 | wrap = EvaluateWrap(env,policy_list) 36 | -------------------------------------------------------------------------------- /test/test_eval_step_with_gui.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import argparse 4 | from MultiVehicleEnv.evaluate import EvaluateWrap 5 | import time 6 | from MultiVehicleEnv.utils import naive_inference 7 | 8 | def make_env(scenario_name, args): 9 | from MultiVehicleEnv.environment import MultiVehicleEnv 10 | import MultiVehicleEnv.scenarios as scenarios 11 | 12 | # load scenario from script 13 | scenario = scenarios.load(scenario_name + ".py").Scenario() 14 | # create world 15 | world = scenario.make_world(args) 16 | # create multiagent environment 17 | 18 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info,None,scenario.updata_callback) 19 | return env 20 | 21 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 22 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 23 | parser.add_argument('--fps',type=int,default=24) 24 | parser.add_argument('--usegui', action='store_true', default=False) 25 | parser.add_argument('--step-t',type=float,default=0.1) 26 | parser.add_argument('--sim-step',type=int,default=100) 27 | parser.add_argument('--direction_alpha', type=float, default=1.0) 28 | parser.add_argument('--add_direction_encoder',type=str, default='train') 29 | 30 | discrete_table = {0:( 0.0, 0.0), 31 | 1:( 1.0, 0.0), 2:( 1.0, 1.0), 3:( 1.0, -1.0), 32 | 4:(-1.0, 0.0), 5:(-1.0, 1.0), 6:(-1.0, -1.0)} 33 | args = parser.parse_args() 34 | 35 | env = make_env('multi_reach_road', args) 36 | while True: 37 | obs = env.reset() 38 | print('reset env') 39 | for idx in range(100): 40 | action = [] 41 | for i in range(len(obs)): 42 | tx = obs[i][4] - obs[i][0] 43 | ty = obs[i][5] - obs[i][1] 44 | theta = np.arctan2(obs[i][3],obs[i][2]) 45 | c_action = naive_inference(tx,ty,theta,dist= 0.2,min_r = 0.3) 46 | action.append(c_action) 47 | obs,reward,done,info = env.step(action) 48 | env.render() 49 | #time.sleep(0.1) 50 | -------------------------------------------------------------------------------- /test/test_eval_step_with_gui_conti.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import argparse 4 | from MultiVehicleEnv.evaluate import EvaluateWrap 5 | import time 6 | from MultiVehicleEnv.utils import naive_inference 7 | 8 | def make_env(scenario_name, args): 9 | from MultiVehicleEnv.environment import MultiVehicleEnv 10 | import MultiVehicleEnv.scenarios as scenarios 11 | 12 | # load scenario from script 13 | scenario = scenarios.load(scenario_name + ".py").Scenario() 14 | # create world 15 | world = scenario.make_world(args) 16 | # create multiagent environment 17 | 18 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info) 19 | return env 20 | 21 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 22 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 23 | parser.add_argument('--fps',type=int,default=24) 24 | parser.add_argument('--usegui', action='store_true', default=False) 25 | parser.add_argument('--step-t',type=float,default=1.0) 26 | parser.add_argument('--sim-step',type=int,default=100) 27 | parser.add_argument('--direction_alpha', type=float, default=1.0) 28 | parser.add_argument('--add_direction_encoder',type=str, default='train') 29 | 30 | args = parser.parse_args() 31 | def policy(obs:np.ndarray)->int: 32 | return random.randint(0,6) 33 | env = make_env('sparse', args) 34 | while True: 35 | obs = env.reset() 36 | print('reset env') 37 | for idx in range(25): 38 | action = [] 39 | for i in range(1): 40 | tx = obs[i][4] 41 | ty = obs[i][5] 42 | theta = np.arctan2(obs[i][3],obs[i][2]) 43 | c_action = naive_inference(tx,ty,theta) 44 | action.append(c_action) 45 | 46 | obs,reward,done,info = env.step(action) 47 | print(reward) 48 | env.render() 49 | time.sleep(0.01) 50 | -------------------------------------------------------------------------------- /test/test_eval_wo_gui.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import argparse 4 | from MultiVehicleEnv.utils import naive_inference 5 | import numpy as np 6 | 7 | def make_env(scenario_name, args): 8 | from MultiVehicleEnv.environment import MultiVehicleEnv 9 | import MultiVehicleEnv.scenarios as scenarios 10 | 11 | # load scenario from script 12 | scenario = scenarios.load(scenario_name + ".py").Scenario() 13 | # create world 14 | world = scenario.make_world(args) 15 | # create multiagent environment 16 | 17 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info) 18 | return env 19 | 20 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 21 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 22 | parser.add_argument('--usegui', action='store_true', default=False) 23 | parser.add_argument('--step-t',type=float,default=1.0) 24 | parser.add_argument('--sim-step',type=int,default=100) 25 | parser.add_argument('--direction_alpha', type=float, default=1.0) 26 | parser.add_argument('--add_direction_encoder',type=str, default='train') 27 | 28 | discrete_table = {0:( 0.0, 0.0), 29 | 1:( 1.0, 0.0), 2:( 1.0, 1.0), 3:( 1.0, -1.0), 30 | 4:(-1.0, 0.0), 5:(-1.0, 1.0), 6:(-1.0, -1.0)} 31 | args = parser.parse_args() 32 | 33 | env = make_env('3p1t2f', args) 34 | while True: 35 | obs = env.reset() 36 | print('reset env') 37 | for idx in range(100): 38 | action = [] 39 | for i in range(3): 40 | tx = obs[i][16] 41 | ty = obs[i][17] 42 | theta = np.arctan2(obs[i][7],obs[i][6]) 43 | c_action = naive_inference(tx,ty,theta) 44 | for idx in range(7): 45 | if c_action[0] == discrete_table[idx][0] and c_action[1] == discrete_table[idx][1]: 46 | action.append(idx) 47 | break 48 | obs,reward,done,info = env.step(action) 49 | #time.sleep(0.5) 50 | #train RL 51 | -------------------------------------------------------------------------------- /test/test_multi_car.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import argparse 4 | from MultiVehicleEnv.evaluate import EvaluateWrap 5 | import time 6 | from MultiVehicleEnv.utils import naive_inference 7 | 8 | def make_env(scenario_name, args): 9 | from MultiVehicleEnv.environment import MultiVehicleEnv 10 | import MultiVehicleEnv.scenarios as scenarios 11 | 12 | # load scenario from script 13 | scenario = scenarios.load(scenario_name + ".py").Scenario() 14 | # create world 15 | world = scenario.make_world(args) 16 | # create multiagent environment 17 | 18 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info,None,scenario.updata_callback) 19 | return env 20 | 21 | step_t = 1.0 22 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 23 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 24 | parser.add_argument('--fps',type=int,default=24) 25 | parser.add_argument('--usegui', action='store_true', default=False) 26 | parser.add_argument('--step-t',type=float,default=step_t) 27 | parser.add_argument('--sim-step',type=int,default=100) 28 | parser.add_argument('--direction_alpha', type=float, default=1.0) 29 | parser.add_argument('--add_direction_encoder',type=str, default='train') 30 | 31 | discrete_table = {0:( 0.0, 0.0), 32 | 1:( 1.0, 0.0), 2:( 1.0, 1.0), 3:( 1.0, -1.0), 33 | 4:(-1.0, 0.0), 5:(-1.0, 1.0), 6:(-1.0, -1.0)} 34 | args = parser.parse_args() 35 | 36 | env = make_env('multi_reach_road_sensor', args) 37 | while True: 38 | obs = env.reset() 39 | print('reset env') 40 | #start_time = time.time() 41 | for idx in range(10000): 42 | action = [] 43 | for i in range(len(obs)): 44 | tx = obs[i][4] - obs[i][0] 45 | ty = obs[i][5] - obs[i][1] 46 | theta = np.arctan2(obs[i][3],obs[i][2]) 47 | c_action = naive_inference(tx,ty,theta,dist= 0.4,min_r = 0.3,delta_theta=step_t * 1.1342) 48 | # c_action = np.random.rand(2)*2-1 49 | action.append(c_action) 50 | obs,reward,done,info = env.step(action) 51 | env.render() 52 | #end_time = time.time() 53 | #print(end_time-start_time) 54 | 55 | #time.sleep(0.1) 56 | -------------------------------------------------------------------------------- /test/test_train_mode.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import argparse 4 | 5 | 6 | 7 | 8 | def make_env(scenario_name, args): 9 | from MultiVehicleEnv.environment import MultiVehicleEnv 10 | import MultiVehicleEnv.scenarios as scenarios 11 | 12 | # load scenario from script 13 | scenario = scenarios.load(scenario_name + ".py").Scenario() 14 | # create world 15 | world = scenario.make_world(args) 16 | # create multiagent environment 17 | 18 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info) 19 | return env 20 | 21 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 22 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 23 | parser.add_argument('--usegui', action='store_true', default=False) 24 | parser.add_argument('--step-t',type=float,default=1.0) 25 | parser.add_argument('--sim-step',type=int,default=100) 26 | parser.add_argument('--direction_alpha', type=float, default=1.0) 27 | parser.add_argument('--add_direction_encoder',type=str, default='train') 28 | 29 | 30 | args = parser.parse_args() 31 | 32 | env = make_env('3p2t2f', args) 33 | while True: 34 | env.reset() 35 | print('reset env') 36 | for idx in range(10): 37 | action = [random.randint(0,4) for _ in range(3)] 38 | obs,reward,done,info = env.step(action) 39 | #time.sleep(0.5) 40 | #train RL 41 | -------------------------------------------------------------------------------- /test/test_yyz.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import argparse 4 | from MultiVehicleEnv.evaluate import EvaluateWrap 5 | import time 6 | from MultiVehicleEnv.utils import naive_inference 7 | 8 | def make_env(scenario_name, args): 9 | from MultiVehicleEnv.environment import MultiVehicleEnv 10 | import MultiVehicleEnv.scenarios as scenarios 11 | 12 | # load scenario from script 13 | scenario = scenarios.load(scenario_name + ".py").Scenario() 14 | # create world 15 | world = scenario.make_world(args) 16 | # create multiagent environment 17 | 18 | env = MultiVehicleEnv(world, scenario.reset_world, scenario.reward, scenario.observation,scenario.info,None,None) 19 | return env 20 | 21 | parser = argparse.ArgumentParser(description="GUI for Multi-VehicleEnv") 22 | parser.add_argument('--gui-port',type=str,default='/dev/shm/gui_port') 23 | parser.add_argument('--fps',type=int,default=24) 24 | parser.add_argument('--usegui', action='store_true', default=False) 25 | parser.add_argument('--step-t',type=float,default=0.1) 26 | parser.add_argument('--sim-step',type=int,default=100) 27 | parser.add_argument('--direction_alpha', type=float, default=1.0) 28 | parser.add_argument('--num_agents', type=int, default=3) 29 | parser.add_argument('--ideal_side_len', type=float, default=5.0) 30 | parser.add_argument('--num_landmarks', type=int, default=1) 31 | parser.add_argument('--num_obstacles', type=int, default=0) 32 | 33 | parser.add_argument('--add_direction_encoder',type=str, default='train') 34 | 35 | discrete_table = {0:( 0.0, 0.0), 36 | 1:( 1.0, 0.0), 2:( 1.0, 1.0), 3:( 1.0, -1.0), 37 | 4:(-1.0, 0.0), 5:(-1.0, 1.0), 6:(-1.0, -1.0)} 38 | args = parser.parse_args() 39 | 40 | env = make_env('yyz2', args) 41 | while True: 42 | obs = env.reset() 43 | print('reset env') 44 | for idx in range(100): 45 | action = [] 46 | for i in range(len(obs)): 47 | 48 | action.append(random.randint(0,6)) 49 | obs,reward,done,info = env.step(action) 50 | env.render() 51 | #time.sleep(0.1) 52 | 53 | --------------------------------------------------------------------------------