├── .gitignore ├── LICENSE ├── README.md ├── gym_env ├── dynamic_obs_test.yaml ├── gym_env │ ├── __init__.py │ └── envs │ │ ├── __init__.py │ │ ├── ir_gym.py │ │ ├── mrnav.py │ │ ├── reciprocal_vel_obs.py │ │ └── rvo_inter.py ├── gym_env_test.py ├── gym_test_world.yaml ├── readme.md └── setup.py ├── rl_rvo_nav ├── gif │ ├── rl_rvo_cir_10.gif │ ├── rl_rvo_cir_14.gif │ ├── rl_rvo_cir_16.gif │ ├── rl_rvo_cir_20.gif │ ├── rl_rvo_cir_6.gif │ ├── rl_rvo_cor_8.gif │ ├── rl_rvo_dyna_obs.gif │ ├── rl_rvo_random_10.gif │ ├── rl_rvo_random_14.gif │ ├── rl_rvo_random_16.gif │ ├── rl_rvo_random_20.gif │ └── rl_rvo_random_6.gif ├── policy │ └── policy_rnn_ac.py ├── policy_test │ ├── policy_test.py │ ├── policy_test_world.yaml │ ├── policy_test_world_lines.yaml │ └── post_train.py ├── policy_train │ ├── dynamic_obs_test.yaml │ ├── multi_ppo.py │ ├── train_process.py │ ├── train_process_s1.py │ ├── train_process_s2.py │ └── train_world.yaml └── pre_trained_model │ ├── policy_test_pre_train.py │ ├── pre_trained │ ├── pre_train │ ├── pre_train.pt │ ├── pre_train.txt │ ├── pre_train_check_point_1000.pt │ └── pre_train_world.yaml │ ├── test_world.yaml │ ├── test_world_dyna_obs.yaml │ └── test_world_lines.yaml ├── setup.py └── setup.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | model_save/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Ruihua Han 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rl_rvo_nav 2 | 3 | ## Introduction 4 | 5 | This repository is the source code of the paper "Reinforcement Learned Distributed Multi-Robot Navigation with Reciprocal Velocity Obstacle Shaped Rewards" [RA-Letter] 6 | 7 | **[`PDF_ieee`](https://ieeexplore.ieee.org/document/9740403)** | **[`PDF_arxiv`](https://arxiv.org/pdf/2203.10229.pdf)** | **[`Video_Youtube`](https://www.youtube.com/watch?v=-8a8Vqm6epM)** | **[`Video_Bilibili`](https://www.bilibili.com/video/BV1oS4y1U71B?spm_id_from=333.999.0.0)** 8 | 9 | Circle 10 | Circle 16 | Circle 20 10 | :-------------------------:|:-------------------------:|:-------------------------: 11 | ![](rl_rvo_nav/gif/rl_rvo_cir_10.gif) | ![](rl_rvo_nav/gif/rl_rvo_cir_16.gif) | ![](rl_rvo_nav/gif/rl_rvo_cir_20.gif) 12 | 13 | Random 10 | Random 16 | Circle 20 14 | :-------------------------:|:-------------------------:|:-------------------------: 15 | ![](rl_rvo_nav/gif/rl_rvo_random_10.gif) | ![](rl_rvo_nav/gif/rl_rvo_random_16.gif) | ![](rl_rvo_nav/gif/rl_rvo_random_20.gif) 16 | 17 | ## Prerequisites 18 | 19 | - Python >= 3.8 20 | - Pytorch >= 1.6.0 21 | - [intelligent-robot-simulator](https://github.com/hanruihua/intelligent-robot-simulator) == v2.5 22 | 23 | ``` 24 | git clone -b v2.5 https://github.com/hanruihua/intelligent-robot-simulator.git 25 | cd intelligent-robot-simulator 26 | pip install -e . 27 | ``` 28 | 29 | ## Test environment 30 | 31 | - Ubuntu 20.04, 18.04 32 | - Windows 10, 11 33 | 34 | ## Installation 35 | 36 | ``` 37 | git clone https://github.com/hanruihua/rl_rvo_nav.git 38 | cd rl_rvo_nav 39 | ./setup.sh 40 | ``` 41 | 42 | ## Policy Train 43 | 44 | - First stage: circle scenario with 4 robots. 45 | 46 | ``` 47 | python train_process.py --use_gpu 48 | ``` 49 | 50 | or 51 | 52 | ``` 53 | python train_process_s1.py 54 | ``` 55 | 56 | - Second stage: continue to train in circle scenario with 10 robots. 57 | 58 | ``` 59 | python train_process.py --robot_number 10 --train_epoch 2000 --load_name YOUR_MODEL_PATH --use_gpu --con_train 60 | ``` 61 | 62 | or 63 | 64 | ``` 65 | python train_process_s2.py 66 | ``` 67 | 68 | ## Policy Test 69 | 70 | You can test the policy trained from the previous steps by following command: 71 | 72 | ``` 73 | python policy_test.py --robot_number 10 --dis_mode 3 --model_name YOUR_MODEL_NAME --render 74 | ``` 75 | 76 | **Note1:** dis_mode, 3, circle scenario; 2 random scenario 77 | **Note2:** YOUR_MODEL_NAME refer to the path and name of the check point file in the *policy_train/model_save* folder 78 | 79 | ## Pretrained model 80 | 81 | We provide the pre_trained model, you can test this model by following command: 82 | 83 | ``` 84 | python policy_test_pre_train.py --render 85 | ``` 86 | 87 | ## Citation 88 | 89 | If you find this code or paper is helpful, you can star this repository and cite our paper: 90 | 91 | ``` 92 | @article{han2022reinforcement, 93 | title={Reinforcement Learned Distributed Multi-Robot Navigation With Reciprocal Velocity Obstacle Shaped Rewards}, 94 | author={Han, Ruihua and Chen, Shengduo and Wang, Shuaijun and Zhang, Zeqing and Gao, Rui and Hao, Qi and Pan, Jia}, 95 | journal={IEEE Robotics and Automation Letters}, 96 | volume={7}, 97 | number={3}, 98 | pages={5896--5903}, 99 | year={2022}, 100 | publisher={IEEE} 101 | } 102 | ``` 103 | 104 | ## Author 105 | 106 | Han Ruihua 107 | Contact: hanrh@connect.hku.hk 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /gym_env/dynamic_obs_test.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.1 11 | interval: 1 12 | square: [0, 0, 9, 9] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 17 | obs_cirs: 18 | number: 5 19 | dist_mode: 2 20 | obs_model: 'dynamic' # static, dynamic 21 | obs_step_mode: 'wander' # default, wander 22 | obs_state_list: [[25, 34], [35, 26]] 23 | obs_goal_list: [[45, 24], [15, 36]] 24 | obs_radius_list: [0.1, 0.1] 25 | obs_square: [2, 2, 8, 8] 26 | obs_interval: 1 27 | random_radius: False 28 | vel_max: [0.3, 0.3] 29 | -------------------------------------------------------------------------------- /gym_env/gym_env/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='mrnav-v1', 5 | entry_point='gym_env.envs:mrnav', 6 | ) 7 | -------------------------------------------------------------------------------- /gym_env/gym_env/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym_env.envs.mrnav import mrnav 2 | from gym_env.envs.ir_gym import ir_gym 3 | 4 | -------------------------------------------------------------------------------- /gym_env/gym_env/envs/ir_gym.py: -------------------------------------------------------------------------------- 1 | from ir_sim.env import env_base 2 | from math import sqrt, pi 3 | from gym import spaces 4 | from gym_env.envs.rvo_inter import rvo_inter 5 | import numpy as np 6 | 7 | class ir_gym(env_base): 8 | def __init__(self, world_name, neighbors_region=5, neighbors_num=10, vxmax = 1.5, vymax = 1.5, env_train=True, acceler = 0.5, **kwargs): 9 | super(ir_gym, self).__init__(world_name=world_name, **kwargs) 10 | 11 | # self.obs_mode = kwargs.get('obs_mode', 0) # 0 drl_rvo, 1 drl_nrvo 12 | # self.reward_mode = kwargs.get('reward_mode', 0) 13 | 14 | self.radius_exp = kwargs.get('radius_exp', 0.2) 15 | 16 | self.env_train = env_train 17 | 18 | self.nr = neighbors_region 19 | self.nm = neighbors_num 20 | 21 | self.rvo = rvo_inter(neighbors_region, neighbors_num, vxmax, vymax, acceler, env_train, self.radius_exp) 22 | 23 | self.observation_space = spaces.Box(-np.inf, np.inf, shape=(5,), dtype=np.float32) 24 | self.action_space = spaces.Box(low=np.array([-1, -1]), high=np.array([1, 1]), dtype=np.float32) 25 | 26 | self.reward_parameter = kwargs.get('reward_parameter', (0.2, 0.1, 0.1, 0.2, 0.2, 1, -20, 20)) 27 | self.acceler = acceler 28 | self.arrive_flag_cur = False 29 | 30 | self.rvo_state_dim = 8 31 | 32 | 33 | def cal_des_omni_list(self): 34 | des_vel_list = [robot.cal_des_vel_omni() for robot in self.robot_list] 35 | return des_vel_list 36 | 37 | 38 | def rvo_reward_list_cal(self, action_list, **kwargs): 39 | ts = self.components['robots'].total_states() # robot_state_list, nei_state_list, obs_circular_list, obs_line_list 40 | 41 | rvo_reward_list = list(map(lambda robot_state, action: self.rvo_reward_cal(robot_state, ts[1], ts[2], ts[3], action, self.reward_parameter, **kwargs), ts[0], action_list)) 42 | 43 | return rvo_reward_list 44 | 45 | def rvo_reward_cal(self, robot_state, nei_state_list, obs_cir_list, obs_line_list, action, reward_parameter=(0.2, 0.1, 0.1, 0.2, 0.2, 1, -10, 20), **kwargs): 46 | 47 | vo_flag, min_exp_time, min_dis = self.rvo.config_vo_reward(robot_state, nei_state_list, obs_cir_list, obs_line_list, action, **kwargs) 48 | 49 | des_vel = np.round(np.squeeze(robot_state[-2:]), 2) 50 | 51 | p1, p2, p3, p4, p5, p6, p7, p8 = reward_parameter 52 | 53 | dis_des = sqrt((action[0] - des_vel[0] )**2 + (action[1] - des_vel[1])**2) 54 | max_dis_des = 3 55 | dis_des_reward = - dis_des / max_dis_des # (0-1) 56 | exp_time_reward = - 0.2/(min_exp_time+0.2) # (0-1) 57 | 58 | # rvo reward 59 | if vo_flag: 60 | rvo_reward = p2 + p3 * dis_des_reward + p4 * exp_time_reward 61 | 62 | if min_exp_time < 0.1: 63 | rvo_reward = p2 + p1 * p4 * exp_time_reward 64 | else: 65 | rvo_reward = p5 + p6 * dis_des_reward 66 | 67 | rvo_reward = np.round(rvo_reward, 2) 68 | 69 | return rvo_reward 70 | 71 | def obs_move_reward_list(self, action_list, **kwargs): 72 | ts = self.components['robots'].total_states() # robot_state_list, nei_state_list, obs_circular_list, obs_line_list 73 | 74 | obs_reward_list = list(map(lambda robot, action: self.observation_reward(robot, ts[1], ts[2], ts[3], action, **kwargs), self.robot_list, action_list)) 75 | 76 | obs_list = [l[0] for l in obs_reward_list] 77 | reward_list = [l[1] for l in obs_reward_list] 78 | done_list = [l[2] for l in obs_reward_list] 79 | info_list = [l[3] for l in obs_reward_list] 80 | 81 | return obs_list, reward_list, done_list, info_list 82 | 83 | def observation_reward(self, robot, nei_state_list, obs_circular_list, obs_line_list, action, **kwargs): 84 | 85 | robot_omni_state = robot.omni_state() 86 | des_vel = np.squeeze(robot.cal_des_vel_omni()) 87 | 88 | done = False 89 | 90 | if robot.arrive() and not robot.arrive_flag: 91 | robot.arrive_flag = True 92 | arrive_reward_flag = True 93 | else: 94 | arrive_reward_flag = False 95 | 96 | obs_vo_list, vo_flag, min_exp_time, collision_flag = self.rvo.config_vo_inf(robot_omni_state, nei_state_list, obs_circular_list, obs_line_list, action, **kwargs) 97 | 98 | radian = robot.state[2] 99 | cur_vel = np.squeeze(robot.vel_omni) 100 | radius = robot.radius_collision* np.ones(1,) 101 | 102 | propri_obs = np.concatenate([ cur_vel, des_vel, radian, radius]) 103 | 104 | if len(obs_vo_list) == 0: 105 | exter_obs = np.zeros((self.rvo_state_dim,)) 106 | else: 107 | exter_obs = np.concatenate(obs_vo_list) # vo list 108 | 109 | observation = np.round(np.concatenate([propri_obs, exter_obs]), 2) 110 | 111 | # dis2goal = sqrt( robot.state[0:2] - robot.goal[0:2]) 112 | mov_reward = self.mov_reward(collision_flag, arrive_reward_flag, self.reward_parameter, min_exp_time) 113 | 114 | reward = mov_reward 115 | 116 | done = True if collision_flag else False 117 | info = True if robot.arrive_flag else False 118 | 119 | return [observation, reward, done, info] 120 | 121 | def mov_reward(self, collision_flag, arrive_reward_flag, reward_parameter=(0.2, 0.1, 0.1, 0.2, 0.2, 1, -20, 15), min_exp_time=100, dis2goal=100): 122 | 123 | p1, p2, p3, p4, p5, p6, p7, p8 = reward_parameter 124 | 125 | collision_reward = p7 if collision_flag else 0 126 | arrive_reward = p8 if arrive_reward_flag else 0 127 | time_reward = 0 128 | 129 | mov_reward = collision_reward + arrive_reward + time_reward 130 | 131 | return mov_reward 132 | 133 | def osc_reward(self, state_list): 134 | # to avoid oscillation 135 | dif_rad_list = [] 136 | 137 | if len(state_list) < 3: 138 | return 0 139 | 140 | for i in range(len(state_list) - 1): 141 | dif = ir_gym.wraptopi(state_list[i+1][2, 0] - state_list[i][2, 0]) 142 | dif_rad_list.append(round(dif, 2)) 143 | 144 | for j in range(len(dif_rad_list)-3): 145 | 146 | if dif_rad_list[j] * dif_rad_list[j+1] < -0.05 and dif_rad_list[j+1] * dif_rad_list[j+2] < -0.05 and dif_rad_list[j+2] * dif_rad_list[j+3] < -0.05: 147 | print('osc', dif_rad_list[j], dif_rad_list[j+1], dif_rad_list[j+2], dif_rad_list[j+3]) 148 | return -10 149 | return 0 150 | 151 | def observation(self, robot, nei_state_list, obs_circular_list, obs_line_list): 152 | 153 | robot_omni_state = robot.omni_state() 154 | des_vel = np.squeeze(robot_omni_state[-2:]) 155 | 156 | obs_vo_list, _, min_exp_time, _ = self.rvo.config_vo_inf(robot_omni_state, nei_state_list, obs_circular_list, obs_line_list) 157 | 158 | cur_vel = np.squeeze(robot.vel_omni) 159 | radian = robot.state[2] 160 | radius = robot.radius_collision* np.ones(1,) 161 | 162 | if len(obs_vo_list) == 0: 163 | exter_obs = np.zeros((self.rvo_state_dim,)) 164 | else: 165 | exter_obs = np.concatenate(obs_vo_list) # vo list 166 | 167 | 168 | propri_obs = np.concatenate([ cur_vel, des_vel, radian, radius]) 169 | observation = np.round(np.concatenate([propri_obs, exter_obs]), 2) 170 | 171 | return observation 172 | 173 | def env_reset(self, reset_mode=1, **kwargs): 174 | 175 | self.components['robots'].robots_reset(reset_mode, **kwargs) 176 | ts = self.components['robots'].total_states() 177 | obs_list = list(map(lambda robot: self.observation(robot, ts[1], ts[2], ts[3]), self.robot_list)) 178 | 179 | return obs_list 180 | 181 | def env_reset_one(self, id): 182 | self.robot_reset(id) 183 | 184 | def env_observation(self): 185 | ts = self.components['robots'].total_states() 186 | obs_list = list(map(lambda robot: self.observation(robot, ts[1], ts[2], ts[3]), self.robot_list)) 187 | 188 | return obs_list 189 | 190 | @staticmethod 191 | def wraptopi(theta): 192 | 193 | if theta > pi: 194 | theta = theta - 2*pi 195 | 196 | if theta < -pi: 197 | theta = theta + 2*pi 198 | 199 | return theta 200 | -------------------------------------------------------------------------------- /gym_env/gym_env/envs/mrnav.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym_env.envs.ir_gym import ir_gym 3 | 4 | class mrnav(gym.Env): 5 | def __init__(self, world_name=None, neighbors_region=5, neighbors_num=10, **kwargs): 6 | 7 | self.ir_gym = ir_gym(world_name, neighbors_region, neighbors_num, **kwargs) 8 | 9 | self.observation_space = self.ir_gym.observation_space 10 | self.action_space = self.ir_gym.action_space 11 | 12 | self.neighbors_region = neighbors_region 13 | self.circular = kwargs.get('circle', [5, 5, 4]) 14 | self.square = kwargs.get('square', [0, 0, 10, 10]) 15 | self.interval = kwargs.get('interval', 1) 16 | 17 | # def step(self, action, vel_type='omni', stop=True, **kwargs): 18 | def step_ir(self, action, vel_type='omni', stop=True, **kwargs): 19 | 20 | if not isinstance(action, list): 21 | action = [action] 22 | 23 | rvo_reward_list = self.ir_gym.rvo_reward_list_cal(action) 24 | self.ir_gym.robot_step(action, vel_type=vel_type, stop=stop) 25 | self.ir_gym.obs_cirs_step() 26 | obs_list, mov_reward, done_list, info_list = self.ir_gym.obs_move_reward_list(action, **kwargs) 27 | 28 | reward_list = [x+y for x, y in zip(rvo_reward_list, mov_reward)] 29 | 30 | return obs_list, reward_list, done_list, info_list 31 | 32 | def render(self, mode = 'human', save=False, path=None, i = 0, **kwargs): 33 | self.ir_gym.render(0.01, **kwargs) 34 | 35 | if save: 36 | self.ir_gym.save_fig(path, i) 37 | 38 | def reset(self, mode=0, **kwargs): 39 | # mode = kwargs.get('mode', 0) 40 | return self.ir_gym.env_reset(mode, circular = self.circular, square=self.square, interval=self.interval, **kwargs) 41 | 42 | def reset_one(self, id): 43 | self.ir_gym.components['robots'].robot_reset(id) 44 | 45 | def show(self): 46 | self.ir_gym.show() -------------------------------------------------------------------------------- /gym_env/gym_env/envs/reciprocal_vel_obs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import sin, cos, atan2, asin, pi, inf, sqrt 3 | from time import time 4 | # state: [x, y, vx, vy, radius, vx_des, vy_des] 5 | # moving_state_list: [[x, y, vx, vy, radius]] 6 | # obstacle_state_list: [[x, y, radius]] 7 | # rvo_vel: [vx, vy] 8 | 9 | class reciprocal_vel_obs: 10 | 11 | def __init__(self, neighbor_region=5, vxmax = 1.5, vymax = 1.5, acceler = 0.5): 12 | 13 | self.vxmax = vxmax 14 | self.vymax = vymax 15 | self.acceler = acceler 16 | self.nr = neighbor_region 17 | 18 | def cal_vel(self, robot_state, nei_state_list=[], obs_cir_list=[], obs_line_list=[], mode = 'rvo'): 19 | 20 | robot_state, ns_list, oc_list, ol_list = self.preprocess(robot_state, nei_state_list, obs_cir_list, obs_line_list) 21 | 22 | # configure the vo or rvo or hrvo 23 | vo_list = self.config_vo(robot_state, ns_list, oc_list, ol_list, mode) 24 | vo_outside, vo_inside = self.vel_candidate(robot_state, vo_list) 25 | rvo_vel = self.vel_select(robot_state, vo_outside, vo_inside, ns_list, oc_list, ol_list, vo_list) 26 | 27 | return rvo_vel 28 | 29 | def preprocess(self, robot_state, nei_state_list, obs_cir_list, obs_line_list): 30 | # components in the region 31 | robot_state = np.squeeze(robot_state) 32 | ns_list = list(filter(lambda x: 0 < reciprocal_vel_obs.distance(robot_state, x) <= self.nr, nei_state_list)) 33 | oc_list = list(filter(lambda y: 0 < reciprocal_vel_obs.distance(robot_state, y) <= self.nr, obs_cir_list)) 34 | ol_list = list(map(lambda z: reciprocal_vel_obs.segment_in_circle(robot_state[0], robot_state[1], self.nr, z), obs_line_list)) 35 | ol_list = [x for x in ol_list if x is not None] 36 | 37 | return robot_state, ns_list, oc_list, ol_list 38 | 39 | def config_vo(self, robot_state, nei_state_list, obs_cir_list, obs_line_list, mode): 40 | # mode: vo, rvo, hrvo 41 | vo_list1 = list(map(lambda x: self.config_vo_circle(robot_state, x, mode), nei_state_list)) 42 | vo_list2 = list(map(lambda y: self.config_vo_circle(robot_state, y, 'vo'), obs_cir_list)) 43 | vo_list3 = list(map(lambda z: self.config_vo_line(robot_state, z), obs_line_list)) 44 | 45 | return vo_list1 + vo_list2 + vo_list3 46 | 47 | def config_vo_circle(self, state, circular, mode='vo'): 48 | 49 | x, y, vx, vy, r = state[0:5] 50 | mx, my, mvx, mvy, mr = circular[0:5] 51 | 52 | dis_mr = sqrt((my - y)**2 + (mx - x)**2) 53 | angle_mr = atan2(my - y, mx - x) 54 | 55 | if dis_mr < r + mr: 56 | dis_mr = r + mr 57 | 58 | ratio = (r + mr)/dis_mr 59 | half_angle = asin( ratio ) 60 | line_left_ori = reciprocal_vel_obs.wraptopi(angle_mr + half_angle) 61 | line_right_ori = reciprocal_vel_obs.wraptopi(angle_mr - half_angle) 62 | 63 | if mode == 'vo': 64 | apex = [mvx, mvy] 65 | 66 | elif mode == 'rvo': 67 | apex = [(vx + mvx)/2, (vy + mvy)/2] 68 | 69 | elif mode == 'hrvo': 70 | 71 | rvo_apex = [(vx + mvx)/2, (vy + mvy)/2] 72 | vo_apex = [mvx, mvy] 73 | 74 | cl_vector = [mx - x, my - y] 75 | 76 | cur_v = [vx - rvo_apex[0], vy - rvo_apex[1]] 77 | 78 | dis_rv = reciprocal_vel_obs.distance(rvo_apex, vo_apex) 79 | radians_rv = atan2(rvo_apex[1] - vo_apex[1], rvo_apex[0] - vo_apex[0]) 80 | 81 | diff = line_left_ori - radians_rv 82 | 83 | temp = pi - 2 * half_angle 84 | 85 | if temp == 0: 86 | temp = temp + 0.01 87 | 88 | dis_diff = dis_rv * sin(diff) / sin(temp) 89 | 90 | if reciprocal_vel_obs.cross_product(cl_vector, cur_v) <= 0: 91 | apex = [ rvo_apex[0] - dis_diff * cos(line_right_ori), rvo_apex[1] - dis_diff * sin(line_right_ori) ] 92 | else: 93 | apex = [ vo_apex[0] + dis_diff * cos(line_right_ori), vo_apex[1] + dis_diff * sin(line_right_ori) ] 94 | 95 | return apex+[line_left_ori, line_right_ori] 96 | 97 | def config_vo_line(self, robot_state, line): 98 | 99 | x, y, vx, vy, r = robot_state[0:5] 100 | 101 | apex = [0, 0] 102 | 103 | theta1 = atan2(line[0][1] - y, line[0][0] - x) 104 | theta2 = atan2(line[1][1] - y, line[1][0] - x) 105 | 106 | dis_mr1 = sqrt( (line[0][1] - y)**2 + (line[0][0] - x)**2 ) 107 | dis_mr2 = sqrt( (line[1][1] - y)**2 + (line[1][0] - x)**2 ) 108 | 109 | half_angle1 = asin(reciprocal_vel_obs.clamp(r/dis_mr1, 0, 1)) 110 | half_angle2 = asin(reciprocal_vel_obs.clamp(r/dis_mr2, 0, 1)) 111 | 112 | if reciprocal_vel_obs.wraptopi(theta2-theta1) > 0: 113 | line_left_ori = reciprocal_vel_obs.wraptopi(theta2 + half_angle2) 114 | line_right_ori = reciprocal_vel_obs.wraptopi(theta1 - half_angle1) 115 | else: 116 | line_left_ori = reciprocal_vel_obs.wraptopi(theta1 + half_angle1) 117 | line_right_ori = reciprocal_vel_obs.wraptopi(theta2 - half_angle2) 118 | 119 | return apex+[line_left_ori, line_right_ori] 120 | 121 | def vel_candidate(self, robot_state, vo_list): 122 | 123 | vo_outside, vo_inside = [], [] 124 | 125 | cur_vx, cur_vy = robot_state[2:4] 126 | cur_vx_range = np.clip([cur_vx-self.acceler, cur_vx+self.acceler], -self.vxmax, self.vxmax) 127 | cur_vy_range = np.clip([cur_vy-self.acceler, cur_vy+self.acceler], -self.vymax, self.vymax) 128 | 129 | for new_vx in np.arange(cur_vx_range[0], cur_vx_range[1], 0.05): 130 | for new_vy in np.arange(cur_vy_range[0], cur_vy_range[1], 0.05): 131 | 132 | if sqrt(new_vx**2 + new_vy**2) < 0.3: 133 | continue 134 | 135 | if self.vo_out2(new_vx, new_vy, vo_list): 136 | vo_outside.append([new_vx, new_vy]) 137 | else: 138 | vo_inside.append([new_vx, new_vy]) 139 | 140 | return vo_outside, vo_inside 141 | 142 | def vo_out(self, vx, vy, vo_list): 143 | 144 | for rvo in vo_list: 145 | theta = atan2(vy - rvo[1], vx - rvo[0]) 146 | if reciprocal_vel_obs.between_angle(rvo[2], rvo[3], theta): 147 | return False 148 | 149 | return True 150 | 151 | def vo_out2(self, vx, vy, vo_list): 152 | 153 | for rvo in vo_list: 154 | line_left_vector = [cos(rvo[2]), sin(rvo[2])] 155 | line_right_vector = [cos(rvo[3]), sin(rvo[3])] 156 | line_vector = [vx - rvo[0], vy - rvo[1]] 157 | if reciprocal_vel_obs.between_vector(line_left_vector, line_right_vector, line_vector): 158 | return False 159 | 160 | return True 161 | 162 | def vel_select(self, robot_state, vo_outside, vo_inside, nei_state_list, obs_cir_list, obs_line_list, vo_list): 163 | 164 | vel_des = [robot_state[5], robot_state[6]] 165 | 166 | if (len(vo_outside) != 0): 167 | temp= min(vo_outside, key = lambda v: reciprocal_vel_obs.distance(v, vel_des)) 168 | return temp 169 | else: 170 | temp = min(vo_inside, key = lambda v: self.penalty(v, vel_des, robot_state, nei_state_list, obs_cir_list, obs_line_list, 1) ) 171 | return temp 172 | 173 | def penalty(self, vel, vel_des, robot_state, nei_state_list, obs_cir_list, obs_line_list, factor): 174 | 175 | tc_list = [] 176 | 177 | for moving in nei_state_list: 178 | 179 | rel_x, rel_y = robot_state[0:2] - moving[0:2] 180 | rel_vx = 2*vel[0] - moving[2] - robot_state[2] 181 | rel_vy = 2*vel[1] - moving[3] - robot_state[3] 182 | 183 | tc = self.cal_exp_tim(rel_x, rel_y, rel_vx, rel_vy, robot_state[4]+moving[4]) 184 | 185 | tc_list.append(tc) 186 | 187 | for obs_cir in obs_cir_list: 188 | 189 | rel_x, rel_y = robot_state[0:2] - obs_cir[0:2] 190 | rel_vx = vel[0] - moving[2] 191 | rel_vy = vel[1] - moving[3] 192 | 193 | tc = self.cal_exp_tim(rel_x, rel_y, rel_vx, rel_vy, robot_state[4]+moving[4]) 194 | 195 | tc_list.append(tc) 196 | 197 | for obs_seg in obs_line_list: 198 | tc = reciprocal_vel_obs.exp_collision_segment(obs_seg, robot_state[0], robot_state[1], vel[0], vel[1], robot_state[4]) 199 | tc_list.append(tc) 200 | 201 | tc_min = min(tc_list) 202 | 203 | if tc_min == 0: 204 | tc_inv = inf 205 | else: 206 | tc_inv = 1/tc_min 207 | 208 | penalty_vel = factor * tc_inv + reciprocal_vel_obs.distance(vel_des, vel) 209 | 210 | return penalty_vel 211 | 212 | # judge the direction by vector 213 | @staticmethod 214 | def between_vector(line_left_vector, line_right_vector, line_vector): 215 | 216 | if reciprocal_vel_obs.cross_product(line_left_vector, line_vector) <= 0 and reciprocal_vel_obs.cross_product(line_right_vector, line_vector) >= 0: 217 | return True 218 | else: 219 | return False 220 | 221 | @staticmethod 222 | def between_angle(line_left_ori, line_right_ori, line_ori): 223 | 224 | if reciprocal_vel_obs.wraptopi(line_ori - line_left_ori) <= 0 and reciprocal_vel_obs.wraptopi(line_ori - line_right_ori) >= 0: 225 | return True 226 | else: 227 | return False 228 | 229 | @staticmethod 230 | def distance(point1, point2): 231 | return sqrt( (point2[0] - point1[0]) ** 2 + (point2[1] - point1[1]) ** 2 ) 232 | 233 | @staticmethod 234 | def cross_product(vector1, vector2): 235 | return float(vector1[0] * vector2[1] - vector2[0] * vector1[1]) 236 | 237 | # calculate the expect collision time 238 | @staticmethod 239 | def cal_exp_tim(rel_x, rel_y, rel_vx, rel_vy, r): 240 | # rel_x: xa - xb 241 | # rel_y: ya - yb 242 | 243 | # (vx2 + vy2)*t2 + (2x*vx + 2*y*vy)*t+x2+y2-(r+mr)2 = 0 244 | 245 | a = rel_vx ** 2 + rel_vy ** 2 246 | b = 2* rel_x * rel_vx + 2* rel_y * rel_vy 247 | c = rel_x ** 2 + rel_y ** 2 - r ** 2 248 | 249 | if c <= 0: 250 | return 0 251 | 252 | temp = b ** 2 - 4 * a * c 253 | 254 | if temp <= 0: 255 | t = inf 256 | else: 257 | t1 = ( -b + sqrt(temp) ) / (2 * a) 258 | t2 = ( -b - sqrt(temp) ) / (2 * a) 259 | 260 | t3 = t1 if t1 >= 0 else inf 261 | t4 = t2 if t2 >= 0 else inf 262 | 263 | t = min(t3, t4) 264 | 265 | return t 266 | 267 | @staticmethod 268 | def segment_in_circle(x, y, r, line): 269 | # 270 | # center: x, y, center point of the circle 271 | # r, radius of the circle 272 | # line: two point 273 | # reference: https://stackoverflow.com/questions/1073336/circle-line-segment-collision-detection-algorithm 274 | start_point = np.array(line[0:2]) 275 | 276 | d = np.array([line[2] - line[0], line[3] - line[1] ]) 277 | f = np.array([line[0] - x, line[1] - y]) 278 | 279 | # t2 * (d · d) + 2t*( f · d ) + ( f · f - r2 ) = 0 280 | a = d@d 281 | b = 2*f@d 282 | c = f@f - r**2 283 | 284 | discriminant = b**2 - 4*a*c 285 | 286 | if discriminant < 0: 287 | return None 288 | else: 289 | t1 = (-b - sqrt(discriminant)) / (2*a) 290 | t2 = (-b + sqrt(discriminant)) / (2*a) 291 | 292 | if t1>=0 and t1<=1 and t2>=0 and t2<=1: 293 | segment_point1 = start_point + t1 * d 294 | segment_point2 = start_point + t2 * d 295 | 296 | elif t1>=0 and t1<=1 and t2 > 1: 297 | segment_point1 = start_point + t1 * d 298 | segment_point2 = np.array(line[2:4]) 299 | 300 | elif t1<0 and t2>=0 and t2<=1: 301 | segment_point1 = np.array(line[0:2]) 302 | segment_point2 = start_point + t2 * d 303 | 304 | elif t1<0 and t2>1: 305 | segment_point1 = np.array(line[0:2]) 306 | segment_point2 = np.array(line[2:4]) 307 | else: 308 | return None 309 | 310 | diff_norm = np.linalg.norm(segment_point1 - segment_point2) 311 | 312 | if diff_norm == 0: 313 | return None 314 | 315 | return [segment_point1, segment_point2] 316 | 317 | @staticmethod 318 | def wraptopi(theta): 319 | 320 | if theta > pi: 321 | theta = theta - 2*pi 322 | 323 | if theta < -pi: 324 | theta = theta + 2*pi 325 | 326 | return theta 327 | 328 | @staticmethod 329 | def clamp(n, minn, maxn): 330 | return max(min(maxn, n), minn) 331 | 332 | @staticmethod 333 | def exp_collision_segment(obs_seg, x, y, vx, vy, r): 334 | 335 | point1 = obs_seg[0] 336 | point2 = obs_seg[1] 337 | 338 | t1 = reciprocal_vel_obs.cal_exp_tim(x - point1[0], y - point1[1], vx, vy, r) 339 | t2 = reciprocal_vel_obs.cal_exp_tim(x - point2[0], y - point2[1], vx, vy, r) 340 | 341 | c_point = np.array([x, y]) 342 | 343 | l0 = (point2 - point1 ) @ (point2 - point1 ) 344 | t = (c_point - point1) @ (point2 - point1) / l0 345 | project = point1 + t * (point2 - point1) 346 | distance = sqrt( (project - c_point) @ (project - c_point) ) 347 | theta1 = atan2( (project - c_point)[1], (project - c_point)[0] ) 348 | theta2 = atan2( vy, vx) 349 | theta3 = reciprocal_vel_obs.wraptopi(theta2 - theta1) 350 | 351 | real_distance = (distance - r) / cos(theta3) 352 | speed = sqrt(vy**2 + vx**2) 353 | 354 | if speed == 0: 355 | t3 = inf 356 | else: 357 | t3 = real_distance / sqrt(vy**2 + vx**2) 358 | 359 | if t3 < 0: 360 | t3 = inf 361 | 362 | return min([t1, t2, t3]) 363 | 364 | 365 | 366 | 367 | -------------------------------------------------------------------------------- /gym_env/gym_env/envs/rvo_inter.py: -------------------------------------------------------------------------------- 1 | from gym_env.envs.reciprocal_vel_obs import reciprocal_vel_obs 2 | from math import sqrt, atan2, asin, sin, pi, cos, inf 3 | import numpy as np 4 | 5 | class rvo_inter(reciprocal_vel_obs): 6 | def __init__(self, neighbor_region=5, neighbor_num=10, vxmax=1.5, vymax=1.5, acceler=0.5, env_train=True, exp_radius=0.2, ctime_threshold=5, ctime_line_threshold=1): 7 | super(rvo_inter, self).__init__(neighbor_region, vxmax, vymax, acceler) 8 | 9 | self.env_train = env_train 10 | self.exp_radius = exp_radius 11 | self.nm = neighbor_num 12 | self.ctime_threshold = ctime_threshold 13 | self.ctime_line_threshold = ctime_line_threshold 14 | 15 | def config_vo_inf(self, robot_state, nei_state_list, obs_cir_list, obs_line_list, action=np.zeros((2,)), **kwargs): 16 | # mode: vo, rvo, hrvo 17 | robot_state, ns_list, oc_list, ol_list = self.preprocess(robot_state, nei_state_list, obs_cir_list, obs_line_list) 18 | 19 | action = np.squeeze(action) 20 | 21 | vo_list1 = list(map(lambda x: self.config_vo_circle2(robot_state, x, action, 'rvo', **kwargs), ns_list)) 22 | vo_list2 = list(map(lambda y: self.config_vo_circle2(robot_state, y,action, 'vo', **kwargs), oc_list)) 23 | vo_list3 = list(map(lambda z: self.config_vo_line2(robot_state, z, action, **kwargs), ol_list)) 24 | 25 | obs_vo_list = [] 26 | 27 | collision_flag = False 28 | vo_flag = False 29 | min_exp_time = inf 30 | 31 | for vo_inf in vo_list1 + vo_list2 + vo_list3: 32 | obs_vo_list.append(vo_inf[0]) 33 | if vo_inf[1] is True: 34 | # obs_vo_list.append(vo_inf[0]) 35 | vo_flag = True 36 | if vo_inf[2] < min_exp_time: 37 | min_exp_time = vo_inf[2] 38 | 39 | if vo_inf[3] is True: collision_flag = True 40 | 41 | obs_vo_list.sort(reverse=True, key=lambda x: (-x[-1], x[-2])) 42 | 43 | if len(obs_vo_list) > self.nm: 44 | obs_vo_list_nm = obs_vo_list[-self.nm:] 45 | else: 46 | obs_vo_list_nm = obs_vo_list 47 | 48 | if self.nm == 0: 49 | obs_vo_list_nm = [] 50 | 51 | return obs_vo_list_nm, vo_flag, min_exp_time, collision_flag 52 | 53 | def config_vo_reward(self, robot_state, nei_state_list, obs_cir_list, obs_line_list, action=np.zeros((2,)), **kwargs): 54 | 55 | robot_state, ns_list, oc_list, ol_list = self.preprocess(robot_state, nei_state_list, obs_cir_list, obs_line_list) 56 | 57 | vo_list1 = list(map(lambda x: self.config_vo_circle2(robot_state, x, action, 'rvo', **kwargs), ns_list)) 58 | vo_list2 = list(map(lambda y: self.config_vo_circle2(robot_state, y,action, 'vo', **kwargs), oc_list)) 59 | vo_list3 = list(map(lambda z: self.config_vo_line2(robot_state, z, action, **kwargs), ol_list)) 60 | 61 | vo_flag = False 62 | min_exp_time = inf 63 | min_dis = inf 64 | 65 | for vo_inf in vo_list1 + vo_list2 + vo_list3: 66 | 67 | if vo_inf[4] < min_dis: 68 | min_dis = vo_inf[4] 69 | 70 | if vo_inf[1] is True: 71 | vo_flag = True 72 | if vo_inf[2] < min_exp_time: 73 | min_exp_time = vo_inf[2] 74 | 75 | new_des_list = [des_v[-1] for des_v in vo_list3] 76 | 77 | return vo_flag, min_exp_time, min_dis 78 | 79 | 80 | def config_vo_observe(self, robot_state, nei_state_list, obs_cir_list, obs_line_list): 81 | 82 | vo_list, _, _, _ = self.config_vo_inf(robot_state, nei_state_list, obs_cir_list, obs_line_list) 83 | 84 | return vo_list 85 | 86 | def config_vo_circle2(self, state, circular, action, mode='rvo', **kwargs): 87 | 88 | x, y, vx, vy, r = state[0:5] 89 | mx, my, mvx, mvy, mr = circular[0:5] 90 | 91 | if mvx == 0 and mvy == 0: 92 | mode = 'vo' 93 | 94 | vo_flag = False 95 | collision_flag = False 96 | 97 | rel_x = x - mx 98 | rel_y = y - my 99 | 100 | dis_mr = sqrt((rel_y)**2 + (rel_x)**2) 101 | angle_mr = atan2(my - y, mx - x) 102 | 103 | real_dis_mr = sqrt((rel_y)**2 + (rel_x)**2) 104 | 105 | env_train = kwargs.get('env_train', self.env_train) 106 | 107 | if env_train: 108 | if dis_mr <= r + mr: 109 | dis_mr = r + mr 110 | collision_flag = True 111 | else: 112 | if dis_mr <= r - self.exp_radius + mr: 113 | collision_flag = True 114 | 115 | if dis_mr <= r + mr: 116 | dis_mr = r + mr 117 | 118 | ratio = (r + mr)/dis_mr 119 | half_angle = asin( ratio ) 120 | line_left_ori = reciprocal_vel_obs.wraptopi(angle_mr + half_angle) 121 | line_right_ori = reciprocal_vel_obs.wraptopi(angle_mr - half_angle) 122 | 123 | if mode == 'vo': 124 | vo = [mvx, mvy, line_left_ori, line_right_ori] 125 | rel_vx = action[0] - mvx 126 | rel_vy = action[1] - mvy 127 | 128 | elif mode == 'rvo': 129 | vo = [(vx + mvx)/2, (vy + mvy)/2, line_left_ori, line_right_ori] 130 | rel_vx = 2*action[0] - mvx - vx 131 | rel_vy = 2*action[1] - mvy - vy 132 | 133 | exp_time = inf 134 | 135 | if self.vo_out_jud_vector(action[0], action[1], vo): 136 | vo_flag = False 137 | exp_time = inf 138 | else: 139 | exp_time = self.cal_exp_tim(rel_x, rel_y, rel_vx, rel_vy, r + mr) 140 | if exp_time < self.ctime_threshold: 141 | vo_flag = True 142 | else: 143 | vo_flag = False 144 | exp_time = inf 145 | 146 | input_exp_time = 1 / (exp_time+0.2) 147 | min_dis = real_dis_mr-mr 148 | 149 | observation_vo = [vo[0], vo[1], cos(vo[2]), sin(vo[2]), cos(vo[3]), sin(vo[3]), min_dis, input_exp_time] 150 | 151 | return [observation_vo, vo_flag, exp_time, collision_flag, min_dis] 152 | 153 | def config_vo_line2(self, robot_state, line, action, **kwargs): 154 | 155 | x, y, vx, vy, r = robot_state[0:5] 156 | 157 | apex = [0, 0] 158 | 159 | theta1 = atan2(line[0][1] - y, line[0][0] - x) 160 | theta2 = atan2(line[1][1] - y, line[1][0] - x) 161 | 162 | dis_mr1 = sqrt( (line[0][1] - y)**2 + (line[0][0] - x)**2 ) 163 | dis_mr2 = sqrt( (line[1][1] - y)**2 + (line[1][0] - x)**2 ) 164 | 165 | half_angle1 = asin(reciprocal_vel_obs.clamp(r/dis_mr1, 0, 1)) 166 | half_angle2 = asin(reciprocal_vel_obs.clamp(r/dis_mr2, 0, 1)) 167 | 168 | if reciprocal_vel_obs.wraptopi(theta2-theta1) > 0: 169 | line_left_ori = reciprocal_vel_obs.wraptopi(theta2 + half_angle2) 170 | line_right_ori = reciprocal_vel_obs.wraptopi(theta1 - half_angle1) 171 | else: 172 | line_left_ori = reciprocal_vel_obs.wraptopi(theta1 + half_angle1) 173 | line_right_ori = reciprocal_vel_obs.wraptopi(theta2 - half_angle2) 174 | 175 | vo = apex + [line_left_ori, line_right_ori] 176 | exp_time = inf 177 | 178 | temp = robot_state[0:2] 179 | temp1 = line 180 | 181 | p2s, p2s_angle = rvo_inter.point2segment(temp, temp1) 182 | 183 | env_train = kwargs.get('env_train', self.env_train) 184 | 185 | if env_train: 186 | collision_flag = True if p2s <= r else False 187 | else: 188 | collision_flag = True if p2s <= r - self.exp_radius else False 189 | 190 | if self.vo_out_jud_vector(action[0], action[1], vo): 191 | vo_flag = False 192 | else: 193 | exp_time = reciprocal_vel_obs.exp_collision_segment(line, x, y, action[0], action[1], r) 194 | if exp_time < self.ctime_line_threshold: 195 | vo_flag = True 196 | else: 197 | vo_flag = False 198 | exp_time = inf 199 | 200 | input_exp_time = 1 / (exp_time+0.2) 201 | min_dis = p2s 202 | 203 | observation_vo = [vo[0], vo[1], cos(vo[2]), sin(vo[2]), cos(vo[3]), sin(vo[3]), min_dis, input_exp_time] 204 | 205 | return [observation_vo, vo_flag, exp_time, collision_flag, min_dis] 206 | 207 | def vo_out_jud(self, vx, vy, vo): 208 | 209 | theta = atan2(vy - vo[1], vx - vo[0]) 210 | 211 | if reciprocal_vel_obs.between_angle(vo[2], vo[3], theta): 212 | return False 213 | else: 214 | return True 215 | 216 | def vo_out_jud_vector(self, vx, vy, vo): 217 | 218 | rel_vector = [vx - vo[0], vy - vo[1]] 219 | line_left_vector = [cos(vo[2]), sin(vo[2]) ] 220 | line_right_vector = [cos(vo[3]), sin(vo[3]) ] 221 | 222 | if reciprocal_vel_obs.between_vector(line_left_vector, line_right_vector, rel_vector): 223 | return False 224 | else: 225 | return True 226 | 227 | @staticmethod 228 | def point2segment(point, segment): 229 | 230 | point = np.squeeze(point[0:2]) 231 | sp = segment[0] 232 | ep = segment[1] 233 | 234 | l2 = (ep - sp) @ (ep - sp) 235 | 236 | if (l2 == 0.0): 237 | return np.linalg.norm(point - sp), 238 | 239 | t = max(0, min(1, ((point-sp) @ (ep-sp)) / l2 )) 240 | 241 | projection = sp + t * (ep-sp) 242 | 243 | relative = projection - point 244 | 245 | distance = np.linalg.norm(relative) 246 | angle = atan2( relative[1], relative[0] ) 247 | 248 | return distance, angle -------------------------------------------------------------------------------- /gym_env/gym_env_test.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_env 3 | from pathlib import Path 4 | 5 | world_name = 'gym_test_world.yaml' 6 | # world_name = 'dynamic_obs_test.yaml' 7 | 8 | env = gym.make('mrnav-v1', world_name=world_name, robot_number=10, robot_init_mode=3, random_bear=True) 9 | env.reset() 10 | 11 | for i in range(300): 12 | 13 | vel_list = env.ir_gym.cal_des_omni_list() 14 | 15 | obs_list, reward_list, done_list, info_list = env.step_ir(vel_list, vel_type='omni') 16 | id_list=[id for id, done in enumerate(done_list) if done==True] 17 | 18 | for id in id_list: 19 | env.reset_one(id) 20 | 21 | env.render() 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /gym_env/gym_test_world.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.1 11 | interval: 1 12 | square: [0, 0, 9, 9] 13 | circular: [5, 5, 4] 14 | 15 | -------------------------------------------------------------------------------- /gym_env/readme.md: -------------------------------------------------------------------------------- 1 | # gym_pysim 2 | 3 | The gym extension for the multi-robot navigation environment based on the [intelligent-robot-simulator](https://github.com/hanruihua/intelligent-robot-simulator) 4 | 5 | ## Installation 6 | 7 | > cd rl_rvo_nav/gym_env 8 | > pip install -e . 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /gym_env/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='gym_env', 4 | version='0.0.1', 5 | install_requires=['gym'] # And any other dependencies foo needs 6 | ) 7 | 8 | -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cir_10.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cir_10.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cir_14.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cir_14.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cir_16.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cir_16.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cir_20.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cir_20.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cir_6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cir_6.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_cor_8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_cor_8.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_dyna_obs.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_dyna_obs.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_random_10.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_random_10.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_random_14.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_random_14.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_random_16.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_random_16.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_random_20.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_random_20.gif -------------------------------------------------------------------------------- /rl_rvo_nav/gif/rl_rvo_random_6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/gif/rl_rvo_random_6.gif -------------------------------------------------------------------------------- /rl_rvo_nav/policy/policy_rnn_ac.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from gym.spaces import Box 5 | from torch.distributions.normal import Normal 6 | from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence 7 | import time 8 | 9 | def mlp(sizes, activation, output_activation=nn.Identity): 10 | layers = [] 11 | 12 | for j in range(len(sizes)-1): 13 | act = activation if j < len(sizes)-2 else output_activation 14 | layers += [nn.Linear(sizes[j], sizes[j+1]), act()] 15 | 16 | return nn.Sequential(*layers) 17 | 18 | def mlp3(sizes, activation=nn.ReLU, output_activation=nn.Identity, drop_p = 0.5): 19 | 20 | layers = [] 21 | for j in range(len(sizes)-1): 22 | if j < len(sizes)-3: 23 | act = activation 24 | layers += [nn.Linear(sizes[j], sizes[j+1]), nn.Dropout(p=drop_p), act()] 25 | else: 26 | layers += [nn.Linear(sizes[j], sizes[j+1]), nn.Tanh()] 27 | 28 | return nn.Sequential(*layers) 29 | 30 | class rnn_ac(nn.Module): 31 | 32 | def __init__(self, observation_space, action_space, state_dim=5, rnn_input_dim=4, 33 | rnn_hidden_dim=64, hidden_sizes_ac=(256, 256), hidden_sizes_v=(16, 16), 34 | activation=nn.ReLU, output_activation=nn.Tanh, output_activation_v= nn.Identity, use_gpu=False, rnn_mode='GRU', drop_p=0): 35 | super().__init__() 36 | 37 | self.use_gpu = use_gpu 38 | torch.cuda.synchronize() 39 | 40 | if rnn_mode == 'biGRU': 41 | obs_dim = (rnn_hidden_dim + state_dim) 42 | elif rnn_mode == 'GRU' or 'LSTM': 43 | obs_dim = (rnn_hidden_dim + state_dim) 44 | 45 | rnn = rnn_Reader(state_dim, rnn_input_dim, rnn_hidden_dim, use_gpu=use_gpu, mode=rnn_mode) 46 | 47 | # policy builder depends on action space 48 | if isinstance(action_space, Box): 49 | self.pi = GaussianActor(obs_dim, action_space.shape[0], hidden_sizes_ac, activation, output_activation, rnn_reader=rnn, use_gpu=use_gpu) 50 | 51 | # build value function 52 | self.v = Critic(obs_dim, hidden_sizes_v, activation, output_activation_v, rnn_reader=rnn, use_gpu=use_gpu) 53 | 54 | 55 | def step(self, obs, std_factor=1): 56 | with torch.no_grad(): 57 | pi_dis = self.pi._distribution(obs, std_factor) 58 | a = pi_dis.sample() 59 | logp_a = self.pi._log_prob_from_distribution(pi_dis, a) 60 | v = self.v(obs) 61 | 62 | if self.use_gpu: 63 | a = a.cpu() 64 | logp_a = logp_a.cpu() 65 | v = v.cpu() 66 | 67 | return a.numpy(), v.numpy(), logp_a.numpy() 68 | 69 | def act(self, obs, std_factor=1): 70 | return self.step(obs, std_factor)[0] 71 | 72 | 73 | class rnn_Reader(nn.Module): 74 | def __init__(self, state_dim, input_dim, hidden_dim, use_gpu=False, mode='GRU'): 75 | super().__init__() 76 | 77 | self.state_dim = state_dim 78 | self.mode = mode 79 | 80 | if mode == 'GRU': 81 | self.rnn_net = nn.GRU(input_dim, hidden_dim, batch_first=True) 82 | elif mode == 'LSTM': 83 | self.rnn_net = nn.LSTM(input_dim, hidden_dim, batch_first=True) 84 | elif mode == 'biGRU': 85 | self.rnn_net = nn.GRU(input_dim, hidden_dim, batch_first=True, bidirectional=True) 86 | 87 | self.use_gpu=use_gpu 88 | 89 | self.input_dim = input_dim 90 | self.hidden_dim = hidden_dim 91 | 92 | des_dim = state_dim + hidden_dim 93 | self.ln = nn.LayerNorm(des_dim) 94 | 95 | if use_gpu: 96 | self.rnn_net = self.rnn_net.cuda() 97 | self.ln = self.ln.cuda() 98 | # self.conv = self.conv.cuda() 99 | 100 | def obs_rnn(self, obs): 101 | 102 | obs = torch.as_tensor(obs, dtype=torch.float32) 103 | 104 | if self.use_gpu: 105 | obs=obs.cuda() 106 | 107 | moving_state = obs[self.state_dim:] 108 | robot_state = obs[:self.state_dim] 109 | mov_len = int(moving_state.size()[0] / self.input_dim) 110 | rnn_input = torch.reshape(moving_state, (1, mov_len, self.input_dim)) 111 | 112 | if self.mode == 'GRU' : 113 | output, hn = self.rnn_net(rnn_input) 114 | elif self.mode == 'biGRU': 115 | output, hn = self.rnn_net(rnn_input) 116 | elif self.mode == 'LSTM': 117 | output, (hn, cn) = self.rnn_net(rnn_input) 118 | 119 | hnv = torch.squeeze(hn) 120 | if self.mode == 'biGRU': 121 | hnv = torch.sum(hnv, 0) 122 | 123 | rnn_obs = torch.cat((robot_state, hnv)) 124 | rnn_obs = self.ln(rnn_obs) 125 | 126 | return rnn_obs 127 | 128 | def obs_rnn_list(self, obs_tensor_list): 129 | 130 | mov_len = [(len(obs_tensor)-self.state_dim)/self.input_dim for obs_tensor in obs_tensor_list] 131 | obs_pad = pad_sequence(obs_tensor_list, batch_first = True) 132 | robot_state_batch = obs_pad[:, :self.state_dim] 133 | batch_size = len(obs_tensor_list) 134 | if self.use_gpu: 135 | robot_state_batch=robot_state_batch.cuda() 136 | 137 | def obs_tensor_reform(obs_tensor): 138 | mov_tensor = obs_tensor[self.state_dim:] 139 | mov_tensor_len = int(len(mov_tensor)/self.input_dim) 140 | re_mov_tensor = torch.reshape(mov_tensor, (mov_tensor_len, self.input_dim)) 141 | return re_mov_tensor 142 | 143 | re_mov_list = list(map(lambda o: obs_tensor_reform(o), obs_tensor_list)) 144 | re_mov_pad = pad_sequence(re_mov_list, batch_first = True) 145 | 146 | if self.use_gpu: 147 | re_mov_pad=re_mov_pad.cuda() 148 | 149 | moving_state_pack = pack_padded_sequence(re_mov_pad, mov_len, batch_first=True, enforce_sorted=False) 150 | 151 | if self.mode == 'GRU': 152 | output, hn= self.rnn_net(moving_state_pack) 153 | elif self.mode == 'biGRU': 154 | output, hn= self.rnn_net(moving_state_pack) 155 | elif self.mode == 'LSTM': 156 | output, (hn, cn) = self.rnn_net(moving_state_pack) 157 | 158 | hnv = torch.squeeze(hn) 159 | 160 | if self.mode == 'biGRU': 161 | hnv = torch.sum(hnv, 0) 162 | 163 | fc_obs_batch = torch.cat((robot_state_batch, hnv), 1) 164 | fc_obs_batch = self.ln(fc_obs_batch) 165 | 166 | return fc_obs_batch 167 | 168 | class Actor(nn.Module): 169 | 170 | def _distribution(self, obs): 171 | raise NotImplementedError 172 | 173 | def _log_prob_from_distribution(self, pi, act): 174 | raise NotImplementedError 175 | 176 | def forward(self, obs, act=None, std_factor=1): 177 | # Produce action distributions for given observations, and 178 | # optionally compute the log likelihood of given actions under 179 | # those distributions. 180 | pi = self._distribution(obs, std_factor) 181 | logp_a = None 182 | 183 | if act is not None: 184 | logp_a = self._log_prob_from_distribution(pi, act) 185 | 186 | return pi, logp_a 187 | 188 | 189 | class GaussianActor(Actor): 190 | 191 | def __init__(self, obs_dim, act_dim, hidden_sizes, activation, output_activation, rnn_reader=None, use_gpu=False): 192 | super().__init__() 193 | 194 | self.rnn_reader = rnn_reader 195 | self.use_gpu = use_gpu 196 | self.net_out=mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation, output_activation) 197 | # self.net_out=mlp3([obs_dim] + list(hidden_sizes) + [act_dim], activation, output_activation, 0) 198 | 199 | log_std = -1 * np.ones(act_dim, dtype=np.float32) 200 | 201 | if use_gpu: 202 | self.log_std = torch.nn.Parameter(torch.as_tensor(log_std, device=torch.device('cuda'))) 203 | self.net_out=self.net_out.cuda() 204 | else: 205 | self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) 206 | 207 | def _distribution(self, obs, std_factor=1): 208 | 209 | if isinstance(obs, list): 210 | obs = self.rnn_reader.obs_rnn_list(obs) 211 | net_out = self.net_out(obs) 212 | else: 213 | obs = self.rnn_reader.obs_rnn(obs) 214 | net_out = self.net_out(obs) 215 | 216 | mu = net_out 217 | std = torch.exp(self.log_std) 218 | std = std_factor * std 219 | 220 | return Normal(mu, std) 221 | 222 | def _log_prob_from_distribution(self, pi, act): 223 | 224 | if self.use_gpu: 225 | act = act.cuda() 226 | 227 | return pi.log_prob(act).sum(axis=-1) # Last axis sum needed for Torch Normal distribution 228 | 229 | 230 | class Critic(nn.Module): 231 | 232 | def __init__(self, obs_dim, hidden_sizes, activation, output_activation, rnn_reader=None, use_gpu=False): 233 | super().__init__() 234 | self.v_net = mlp([obs_dim] + list(hidden_sizes) + [1], activation, output_activation) 235 | 236 | if use_gpu: 237 | self.v_net = self.v_net.cuda() 238 | 239 | self.rnn_reader = rnn_reader 240 | 241 | def forward(self, obs): 242 | 243 | if self.rnn_reader != None: 244 | if isinstance(obs, list): 245 | obs = self.rnn_reader.obs_rnn_list(obs) 246 | else: 247 | obs = self.rnn_reader.obs_rnn(obs) 248 | v = torch.squeeze(self.v_net(obs), -1) 249 | 250 | return v -------------------------------------------------------------------------------- /rl_rvo_nav/policy_test/policy_test.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_env 3 | from pathlib import Path 4 | import pickle 5 | import sys 6 | from rl_rvo_nav.policy_test.post_train import post_train 7 | import argparse 8 | import os 9 | from os.path import dirname, abspath 10 | 11 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 12 | 13 | parser = argparse.ArgumentParser(description='policy test') 14 | parser.add_argument('--policy_type', default='drl') 15 | parser.add_argument('--model_path', default='policy_train/model_save') 16 | parser.add_argument('--model_name', default='r4_0/r4_0_250.pt') # policy_dict=False 17 | # parser.add_argument('--model_name', default='r4_0/r4_0_check_point_250.pt') with check point --> policy_dict=True 18 | parser.add_argument('--arg_name', default='r4_0/r4_0') 19 | parser.add_argument('--world_name', default='policy_test_world.yaml') # policy_test_world_lines.yaml 20 | parser.add_argument('--render', action='store_true') 21 | parser.add_argument('--robot_number', type=int, default='4') 22 | parser.add_argument('--num_episodes', type=int, default='100') 23 | parser.add_argument('--dis_mode', type=int, default='3') # 3 circle, 2 random, 5 for corridor 24 | parser.add_argument('--save', action='store_true') 25 | parser.add_argument('--full', action='store_true') 26 | parser.add_argument('--show_traj', action='store_true') 27 | parser.add_argument('--policy_dict', action='store_true') 28 | parser.add_argument('--once', action='store_true') 29 | 30 | policy_args = parser.parse_args() 31 | 32 | cur_path = Path(__file__).parent.parent 33 | 34 | # model_base_path = str(cur_path) + '/' + policy_args.model_path 35 | model_base_path = dirname(dirname(abspath(__file__))) + '/' + policy_args.model_path 36 | args_path = model_base_path + '/' + policy_args.arg_name 37 | 38 | # args from train 39 | r = open(args_path, 'rb') 40 | args = pickle.load(r) 41 | 42 | if policy_args.policy_type == 'drl': 43 | fname_model = model_base_path + '/' + policy_args.model_name 44 | policy_name = 'drl_rvo' 45 | 46 | env = gym.make('mrnav-v1', world_name=policy_args.world_name, robot_number=policy_args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=policy_args.dis_mode, env_train=False, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, goal_threshold=0.2, full=policy_args.full) 47 | 48 | policy_name = policy_name + '_' + str(policy_args.robot_number) + '_dis' + str(policy_args.dis_mode) 49 | 50 | pt = post_train(env, num_episodes=policy_args.num_episodes, reset_mode=policy_args.dis_mode, render=policy_args.render, std_factor=0.00001, acceler_vel=1.0, max_ep_len=300, neighbors_region=args.neighbors_region, neighbor_num=args.neighbors_num, args=args, save=policy_args.save, show_traj=policy_args.show_traj, figure_format='eps') 51 | pt.policy_test(policy_args.policy_type, fname_model, policy_name, result_path=str(cur_path), result_name='/result.txt', figure_save_path=cur_path / 'figure' , ani_save_path=cur_path / 'gif', policy_dict=policy_args.policy_dict, once=policy_args.once) 52 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_test/policy_test_world.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.2 11 | interval: 1 12 | square: [0, 0, 9, 9] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 17 | 18 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_test/policy_test_world_lines.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 7 4 | step_time: 0.1 5 | offset_x: -1 6 | offset_y: -1 7 | 8 | 9 | robots: 10 | robot_mode: 'diff' # omni or diff 11 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 12 | vel_max: [1.5, 1.5] 13 | radius_exp: 0.1 14 | interval: 1 15 | square: [1, 1, 7, 7] 16 | circular: [5, 5, 4] 17 | 18 | 19 | # obstacle_line_list: [ [0, 0, 0, 8], [0, 8, 5, 8],[5, 8, 5, 0], [5, 0, 0, 0]] 20 | 21 | obs_lines: 22 | obs_line_states: [ [0, 0, 0, 8], [0, 8, 5, 8],[5, 8, 5, 0], [5, 0, 0, 0]] -------------------------------------------------------------------------------- /rl_rvo_nav/policy_test/post_train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from pathlib import Path 4 | import platform 5 | from rl_rvo_nav.policy.policy_rnn_ac import rnn_ac 6 | from math import pi, sin, cos, sqrt 7 | import time 8 | 9 | class post_train: 10 | def __init__(self, env, num_episodes=100, max_ep_len=150, acceler_vel = 1.0, reset_mode=3, render=True, save=False, neighbor_region=4, neighbor_num=5, args=None, **kwargs): 11 | 12 | self.env = env 13 | self.num_episodes=num_episodes 14 | self.max_ep_len = max_ep_len 15 | self.acceler_vel = acceler_vel 16 | self.reset_mode = reset_mode 17 | self.render=render 18 | self.save=save 19 | self.robot_number = self.env.ir_gym.robot_number 20 | self.step_time = self.env.ir_gym.step_time 21 | 22 | self.inf_print = kwargs.get('inf_print', True) 23 | self.std_factor = kwargs.get('std_factor', 0.001) 24 | # self.show_traj = kwargs.get('show_traj', False) 25 | self.show_traj = False 26 | self.traj_type = '' 27 | self.figure_format = kwargs.get('figure_format', 'png') 28 | 29 | self.nr = neighbor_region 30 | self.nm = neighbor_num 31 | self.args = args 32 | 33 | def policy_test(self, policy_type='drl', policy_path=None, policy_name='policy', result_path=None, result_name='/result.txt', figure_save_path=None, ani_save_path=None, policy_dict=False, once=False): 34 | 35 | if policy_type == 'drl': 36 | model_action = self.load_policy(policy_path, self.std_factor, policy_dict=policy_dict) 37 | 38 | o, r, d, ep_ret, ep_len, n = self.env.reset(mode=self.reset_mode), 0, False, 0, 0, 0 39 | ep_ret_list, speed_list, mean_speed_list, ep_len_list, sn = [], [], [], [], 0 40 | 41 | print('Policy Test Start !') 42 | 43 | figure_id = 0 44 | while n < self.num_episodes: 45 | 46 | # if n == 1: 47 | # self.show_traj = True 48 | 49 | action_time_list = [] 50 | 51 | if self.render or self.save: 52 | self.env.render(save=self.save, path=figure_save_path, i = figure_id, show_traj=self.show_traj, traj_type=self.traj_type) 53 | 54 | if policy_type == 'drl': 55 | abs_action_list =[] 56 | for i in range(self.robot_number): 57 | 58 | start_time = time.time() 59 | a_inc = np.round(model_action(o[i]), 2) 60 | end_time = time.time() 61 | 62 | temp = end_time - start_time 63 | action_time_list.append(temp) 64 | 65 | cur_vel = self.env.ir_gym.robot_list[i].vel_omni 66 | abs_action = self.acceler_vel * a_inc + np.squeeze(cur_vel) 67 | abs_action_list.append(abs_action) 68 | 69 | o, r, d, info = self.env.step_ir(abs_action_list, vel_type = 'omni') 70 | 71 | robot_speed_list = [np.linalg.norm(robot.vel_omni) for robot in self.env.ir_gym.robot_list] 72 | avg_speed = np.average(robot_speed_list) 73 | speed_list.append(avg_speed) 74 | 75 | ep_ret += r[0] 76 | ep_len += 1 77 | figure_id += 1 78 | 79 | if np.max(d) or (ep_len == self.max_ep_len) or np.min(info): 80 | speed = np.mean(speed_list) 81 | figure_id = 0 82 | if np.min(info): 83 | ep_len_list.append(ep_len) 84 | if self.inf_print: print('Successful, Episode %d \t EpRet %.3f \t EpLen %d \t EpSpeed %.3f'%(n, ep_ret, ep_len, speed)) 85 | else: 86 | if self.inf_print: print('Fail, Episode %d \t EpRet %.3f \t EpLen %d \t EpSpeed %.3f'%(n, ep_ret, ep_len, speed)) 87 | 88 | ep_ret_list.append(ep_ret) 89 | mean_speed_list.append(speed) 90 | speed_list = [] 91 | 92 | o, r, d, ep_ret, ep_len = self.env.reset(mode=self.reset_mode), 0, False, 0, 0 93 | 94 | n += 1 95 | 96 | if np.min(info): 97 | sn+=1 98 | 99 | # if n == 2: 100 | 101 | if once: 102 | self.env.ir_gym.world_plot.save_gif_figure(figure_save_path, 0, format='eps') 103 | break 104 | 105 | if self.save: 106 | self.env.ir_gym.save_ani(figure_save_path, ani_save_path, ani_name=policy_name) 107 | break 108 | 109 | mean_len = 0 if len(ep_len_list) == 0 else np.round(np.mean(ep_len_list), 2) 110 | std_len = 0 if len(ep_len_list) == 0 else np.round(np.std(ep_len_list), 2) 111 | 112 | average_speed = np.round(np.mean(mean_speed_list),2) 113 | std_speed = np.round(np.std(mean_speed_list), 2) 114 | 115 | f = open( result_path + result_name, 'a') 116 | print( 'policy_name: '+ policy_name, ' successful rate: {:.2%}'.format(sn/self.num_episodes), "average EpLen:", mean_len, "std length", std_len, 'average speed:', average_speed, 'std speed', std_speed, file = f) 117 | f.close() 118 | 119 | print( 'policy_name: '+ policy_name, ' successful rate: {:.2%}'.format(sn/self.num_episodes), "average EpLen:", mean_len, 'std length', std_len, 'average speed:', average_speed, 'std speed', std_speed) 120 | 121 | 122 | def load_policy(self, filename, std_factor=1, policy_dict=False): 123 | 124 | if policy_dict == True: 125 | model = rnn_ac(self.env.observation_space, self.env.action_space, self.args.state_dim, self.args.rnn_input_dim, self.args.rnn_hidden_dim, self.args.hidden_sizes_ac, self.args.hidden_sizes_v, self.args.activation, self.args.output_activation, self.args.output_activation_v, self.args.use_gpu, self.args.rnn_mode) 126 | 127 | check_point = torch.load(filename) 128 | model.load_state_dict(check_point['model_state'], strict=True) 129 | model.eval() 130 | 131 | else: 132 | model = torch.load(filename) 133 | model.eval() 134 | 135 | # model.train() 136 | def get_action(x): 137 | with torch.no_grad(): 138 | x = torch.as_tensor(x, dtype=torch.float32) 139 | action = model.act(x, std_factor) 140 | return action 141 | 142 | return get_action 143 | 144 | def dis(self, p1, p2): 145 | return sqrt( (p2.py - p1.py)**2 + (p2.px - p1.px)**2 ) -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/dynamic_obs_test.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.1 11 | interval: 1 12 | square: [0, 0, 10, 10] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 17 | obs_cirs: 18 | number: 5 19 | dist_mode: 2 20 | obs_model: 'dynamic' # static, dynamic 21 | obs_step_mode: 'wander' # default, wander 22 | obs_state_list: [[25, 34], [35, 26]] 23 | obs_goal_list: [[45, 24], [15, 36]] 24 | obs_radius_list: [0.1, 0.1] 25 | obs_square: [2, 2, 8, 8] 26 | obs_interval: 1 27 | random_radius: False 28 | vel_max: [0.3, 0.3] 29 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/multi_ppo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.optim import Adam 4 | from pathlib import Path 5 | import scipy 6 | import scipy.signal 7 | import time 8 | import os 9 | from rl_rvo_nav.policy_test.post_train import post_train 10 | import threading 11 | from mpi4py import MPI 12 | 13 | def combined_shape(length, shape=None): 14 | if shape is None: 15 | return (length,) 16 | return (length, shape) if np.isscalar(shape) else (length, *shape) 17 | 18 | def discount_cumsum(x, discount): 19 | """ 20 | magic from rllab for computing discounted cumulative sums of vectors. 21 | 22 | input: 23 | vector x, 24 | [x0, 25 | x1, 26 | x2] 27 | 28 | output: 29 | [x0 + discount * x1 + discount^2 * x2, 30 | x1 + discount * x2, 31 | x2] 32 | """ 33 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 34 | 35 | 36 | class multi_PPObuf: 37 | def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): 38 | 39 | # gamma: discount factor 40 | # Lambda for GAE-Lambda. (Always between 0 and 1, close to 1.) 41 | 42 | self.obs_buf = [0] * size 43 | self.act_buf = np.zeros(combined_shape(size, act_dim), dtype=np.float32) 44 | self.adv_buf = np.zeros(size, dtype=np.float32) 45 | self.rew_buf = np.zeros(size, dtype=np.float32) 46 | self.ret_buf = np.zeros(size, dtype=np.float32) 47 | self.val_buf = np.zeros(size, dtype=np.float32) 48 | self.logp_buf = np.zeros(size, dtype=np.float32) 49 | self.gamma, self.lam = gamma, lam 50 | self.ptr, self.path_start_idx, self.max_size = 0, 0, size 51 | 52 | def store(self, obs, act, rew, val, logp): 53 | """ 54 | Append one timestep of agent-environment interaction to the buffer. 55 | """ 56 | assert self.ptr < self.max_size # buffer has to have room so you can store 57 | self.obs_buf[self.ptr] = obs.copy() 58 | self.act_buf[self.ptr] = act 59 | self.rew_buf[self.ptr] = rew 60 | self.val_buf[self.ptr] = val 61 | self.logp_buf[self.ptr] = logp 62 | self.ptr += 1 63 | 64 | def finish_path(self, last_val=0): 65 | 66 | path_slice = slice(self.path_start_idx, self.ptr) 67 | rews = np.append(self.rew_buf[path_slice], last_val) 68 | vals = np.append(self.val_buf[path_slice], last_val) 69 | 70 | # the next two lines implement GAE-Lambda advantage calculation 71 | deltas = rews[:-1] + self.gamma * vals[1:] - vals[:-1] 72 | self.adv_buf[path_slice] = discount_cumsum(deltas, self.gamma * self.lam) 73 | 74 | # the next line computes rewards-to-go, to be targets for the value function 75 | self.ret_buf[path_slice] = discount_cumsum(rews, self.gamma)[:-1] 76 | self.path_start_idx = self.ptr 77 | 78 | def get(self): 79 | assert self.ptr == self.max_size # buffer has to be full before you can get 80 | self.ptr, self.path_start_idx = 0, 0 81 | 82 | act_ten = torch.as_tensor(self.act_buf, dtype=torch.float32) 83 | ret_ten = torch.as_tensor(self.ret_buf, dtype=torch.float32) 84 | adv_ten = torch.as_tensor(self.adv_buf, dtype=torch.float32) 85 | logp_ten = torch.as_tensor(self.logp_buf, dtype=torch.float32) 86 | obs_tensor_list = list(map(lambda o: torch.as_tensor(o, dtype=torch.float32), self.obs_buf)) 87 | 88 | data = dict(obs=obs_tensor_list, act=act_ten, ret=ret_ten, 89 | adv=adv_ten, logp=logp_ten) 90 | 91 | return data 92 | 93 | def complete(self): 94 | self.ptr, self.path_start_idx = 0, 0 95 | 96 | class multi_ppo: 97 | def __init__(self, env, ac_policy, pi_lr=3e-4, vf_lr=1e-3, train_epoch=50, steps_per_epoch = 600, max_ep_len=300, gamma=0.99, lam=0.97, clip_ratio=0.2, train_pi_iters=100, train_v_iters=100, target_kl=0.01, render=False, render_freq=20, con_train=False, seed=7, save_freq=50, save_figure=False, save_path='test/', save_name='test', load_fname=None, use_gpu = False, reset_mode=1, save_result=False, counter=0, test_env=None, lr_decay_epoch=1000, max_update_num=10, mpi=False, figure_save_path=None, **kwargs): 98 | 99 | torch.manual_seed(seed) 100 | torch.cuda.manual_seed(seed) 101 | np.random.seed(seed) 102 | 103 | self.env = env 104 | self.ac = ac_policy 105 | self.con_train=con_train 106 | self.robot_num = env.ir_gym.robot_number 107 | self.reset_mode = reset_mode 108 | 109 | self.obs_dim = env.observation_space.shape 110 | self.act_dim = env.action_space.shape 111 | 112 | # Set up optimizers for policy and value function 113 | self.pi_optimizer = Adam(self.ac.pi.parameters(), lr=pi_lr) 114 | self.vf_optimizer = Adam(self.ac.v.parameters(), lr=vf_lr) 115 | 116 | if con_train: 117 | check_point = torch.load(load_fname) 118 | self.ac.load_state_dict(check_point['model_state'], strict=True) 119 | self.ac.train() 120 | # self.ac.eval() 121 | 122 | # parameter 123 | self.epoch = train_epoch 124 | self.max_ep_len = max_ep_len 125 | self.steps_per_epoch = steps_per_epoch 126 | 127 | self.buf_list = [multi_PPObuf(self.obs_dim, self.act_dim, steps_per_epoch, gamma, lam) for i in range(self.robot_num)] 128 | 129 | # update parameters 130 | self.clip_ratio = clip_ratio 131 | self.train_pi_iters = train_pi_iters 132 | self.train_v_iters=train_v_iters 133 | self.target_kl=target_kl 134 | 135 | self.render = render 136 | self.render_freq = render_freq 137 | 138 | self.save_freq = save_freq 139 | self.save_path = save_path 140 | self.figure_save_path = figure_save_path 141 | self.save_name = save_name 142 | self.save_figure = save_figure 143 | self.use_gpu = use_gpu 144 | 145 | self.save_result = save_result 146 | self.counter = counter 147 | self.pt = post_train(test_env, reset_mode=reset_mode, inf_print=False, render=False) 148 | torch.cuda.synchronize() 149 | 150 | self.lr_decay_epoch = lr_decay_epoch 151 | self.max_update_num = max_update_num 152 | 153 | self.mpi = mpi 154 | 155 | if self.mpi: 156 | self.comm = MPI.COMM_WORLD 157 | self.rank = self.comm.Get_rank() 158 | 159 | def training_loop(self): 160 | 161 | obs_list, ep_ret_list, ep_len_list = self.env.reset(mode=self.reset_mode), [0] * self.robot_num, [0] * self.robot_num 162 | ep_ret_list_mean = [[] for i in range(self.robot_num)] 163 | 164 | for epoch in range(self.epoch + 1): 165 | start_time = time.time() 166 | print('current epoch', epoch) 167 | 168 | if self.mpi: 169 | state_dict = self.comm.bcast(self.ac.state_dict(), root=0) 170 | self.ac.load_state_dict(state_dict) 171 | 172 | for t in range(self.steps_per_epoch): 173 | 174 | if self.render and (epoch % self.render_freq == 0 or epoch == self.epoch): 175 | self.env.render(save=self.save_figure, path=self.figure_save_path, i = t ) 176 | 177 | # if self.save_figure and epoch == 1: 178 | # self.env.render(save=True, path=self.save_path+'figure/', i=t) 179 | 180 | a_list, v_list, logp_list, abs_action_list = [], [], [], [] 181 | 182 | for i in range(self.robot_num): 183 | obs = obs_list[i] 184 | 185 | a_inc, v, logp = self.ac.step(torch.as_tensor(obs, dtype=torch.float32)) 186 | a_inc = np.round(a_inc, 2) 187 | a_list.append(a_inc) 188 | v_list.append(v) 189 | logp_list.append(logp) 190 | 191 | cur_vel = np.squeeze(self.env.ir_gym.robot_list[i].vel_omni) 192 | abs_action = self.env.ir_gym.acceler * np.round(a_inc, 2) + cur_vel 193 | # abs_action = 1.5*a_inc 194 | abs_action = np.round(abs_action, 2) 195 | abs_action_list.append(abs_action) 196 | 197 | next_obs_list, reward_list, done_list, info_list = self.env.step_ir(abs_action_list, vel_type = 'omni') 198 | 199 | # save to buffer 200 | for i in range(self.robot_num): 201 | 202 | self.buf_list[i].store(obs_list[i], a_list[i], reward_list[i], v_list[i], logp_list[i]) 203 | ep_ret_list[i] += reward_list[i] 204 | ep_len_list[i] += 1 205 | 206 | # Update obs 207 | obs_list = next_obs_list[:] 208 | 209 | epoch_ended = t == self.steps_per_epoch-1 210 | arrive_all = min(info_list) == True 211 | terminal = max(done_list) == True or max(ep_len_list) > self.max_ep_len 212 | 213 | if epoch_ended or arrive_all: 214 | 215 | if epoch + 1 % 300 == 0: 216 | obs_list = self.env.reset(mode=self.reset_mode) 217 | else: 218 | obs_list = self.env.reset(mode=0) 219 | 220 | for i in range(self.robot_num): 221 | 222 | if arrive_all: 223 | ep_ret_list_mean[i].append(ep_ret_list[i]) 224 | 225 | ep_ret_list[i] = 0 226 | ep_len_list[i] = 0 227 | 228 | self.buf_list[i].finish_path(0) 229 | 230 | elif terminal: 231 | 232 | for i in range(self.robot_num): 233 | if done_list[i] or ep_len_list[i] > self.max_ep_len: 234 | 235 | self.env.reset_one(i) 236 | ep_ret_list_mean[i].append(ep_ret_list[i]) 237 | ep_ret_list[i] = 0 238 | ep_len_list[i]= 0 239 | 240 | self.buf_list[i].finish_path(0) 241 | 242 | obs_list = self.env.ir_gym.env_observation() 243 | 244 | if (epoch % self.save_freq == 0) or (epoch == self.epoch): 245 | self.save_model(epoch) 246 | 247 | if self.save_result and epoch != 0: 248 | # if self.save_result: 249 | policy_model = self.save_path + self.save_name+'_'+str(epoch)+'.pt' 250 | # policy_model = self.save_path + self.save_name+'_'+'check_point_'+ str(epoch)+'.pt' 251 | result_path = self.save_path 252 | policy_name = self.save_name+'_'+str(epoch) 253 | thread = threading.Thread(target=self.pt.policy_test, args=('drl', policy_model, policy_name, result_path, '/results.txt')) 254 | thread.start() 255 | 256 | mean = [round(np.mean(r), 2) for r in ep_ret_list_mean] 257 | max_ret = [round(np.max(r), 2) for r in ep_ret_list_mean] 258 | min_ret = [round(np.min(r), 2) for r in ep_ret_list_mean] 259 | print('The reward in this epoch: ', 'min', min_ret, 'mean', mean, 'max', max_ret) 260 | ep_ret_list_mean = [[] for i in range(self.robot_num)] 261 | 262 | # update 263 | # self.update() 264 | data_list = [buf.get() for buf in self.buf_list] 265 | if self.mpi: 266 | rank_data_list = self.comm.gather(data_list, root=0) 267 | 268 | if self.rank == 0: 269 | for data_list in rank_data_list: 270 | self.update(data_list) 271 | else: 272 | self.update(data_list) 273 | 274 | # animate 275 | # if epoch == 1: 276 | # self.env.create_animate(self.save_path+'figure/') 277 | if self.mpi: 278 | if self.rank == 0: 279 | time_cost = time.time()-start_time 280 | print('time cost in one epoch', time_cost, 'estimated remain time', time_cost*(self.epoch-epoch)/3600, 'hours' ) 281 | else: 282 | time_cost = time.time()-start_time 283 | print('time cost in one epoch', time_cost, 'estimated remain time', time_cost*(self.epoch-epoch)/3600, 'hours' ) 284 | 285 | def update(self, data_list): 286 | 287 | randn = np.arange(self.robot_num) 288 | np.random.shuffle(randn) 289 | 290 | update_num = 0 291 | for r in randn: 292 | 293 | data = data_list[r] 294 | update_num += 1 295 | 296 | if update_num > self.max_update_num: 297 | continue 298 | 299 | for i in range(self.train_pi_iters): 300 | self.pi_optimizer.zero_grad() 301 | loss_pi, pi_info = self.compute_loss_pi(data) 302 | kl = pi_info['kl'] 303 | 304 | 305 | if kl > self.target_kl: 306 | print('Early stopping at step %d due to reaching max kl.'%i) 307 | break 308 | 309 | loss_pi.backward() 310 | self.pi_optimizer.step() 311 | 312 | # Value function learning 313 | for i in range(self.train_v_iters): 314 | self.vf_optimizer.zero_grad() 315 | loss_v = self.compute_loss_v(data) 316 | loss_v.backward() 317 | self.vf_optimizer.step() 318 | 319 | 320 | def compute_loss_v(self, data): 321 | obs, ret = data['obs'], data['ret'] 322 | if self.use_gpu: 323 | ret = ret.cuda() 324 | return ((self.ac.v(obs) - ret)**2).mean() 325 | 326 | def compute_loss_pi(self, data): 327 | # Set up function for computing PPO policy loss 328 | obs, act, adv, logp_old = data['obs'], data['act'], data['adv'], data['logp'] 329 | 330 | if self.use_gpu: 331 | logp_old = logp_old.cuda() 332 | adv = adv.cuda() 333 | 334 | # Policy loss 335 | pi, logp = self.ac.pi(obs, act) 336 | ratio = torch.exp(logp - logp_old) 337 | clip_adv = torch.clamp(ratio, 1-self.clip_ratio, 1+self.clip_ratio) * adv 338 | loss_pi = -(torch.min(ratio * adv, clip_adv)).mean() 339 | 340 | # Useful extra info 341 | approx_kl = (logp_old - logp).mean().item() 342 | ent = pi.entropy().mean().item() 343 | clipped = ratio.gt(1+self.clip_ratio) | ratio.lt(1-self.clip_ratio) 344 | clipfrac = torch.as_tensor(clipped, dtype=torch.float32).mean().item() 345 | pi_info = dict(kl=approx_kl, ent=ent, cf=clipfrac) 346 | 347 | return loss_pi, pi_info 348 | 349 | def save_model(self, index=0): 350 | 351 | dir_name = self.save_path 352 | fname_model = self.save_path + self.save_name+'_{}.pt' 353 | fname_check_point = self.save_path + self.save_name+'_check_point_{}.pt' 354 | state_dict = dict(model_state=self.ac.state_dict(), pi_optimizer=self.pi_optimizer.state_dict(), 355 | vf_optimizer = self.vf_optimizer.state_dict() ) 356 | 357 | if os.path.exists(dir_name): 358 | torch.save(self.ac, fname_model.format(index)) 359 | torch.save(state_dict, fname_check_point.format(index)) 360 | else: 361 | os.makedirs(dir_name) 362 | torch.save(self.ac, fname_model.format(index)) 363 | torch.save(state_dict, fname_check_point.format(index)) 364 | 365 | 366 | 367 | 368 | 369 | 370 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/train_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import gym 4 | import pickle 5 | import shutil 6 | import gym_env 7 | import argparse 8 | from torch import nn 9 | from pathlib import Path 10 | from rl_rvo_nav.policy_train.multi_ppo import multi_ppo 11 | from rl_rvo_nav.policy.policy_rnn_ac import rnn_ac 12 | 13 | # path set 14 | cur_path = Path(__file__).parent 15 | world_abs_path = str(cur_path/'train_world.yaml') 16 | 17 | # default 18 | # robot_number = 4 19 | counter = 0 20 | 21 | # model_path = cur_path / 'model_save' / ('r' + str(robot_number) ) 22 | 23 | parser = argparse.ArgumentParser(description='drl rvo parameters') 24 | 25 | par_env = parser.add_argument_group('par env', 'environment parameters') 26 | par_env.add_argument('--env_name', default='mrnav-v1') 27 | par_env.add_argument('--world_path', default='train_world.yaml') 28 | par_env.add_argument('--robot_number', type=int, default=4) 29 | par_env.add_argument('--init_mode', default=3) 30 | par_env.add_argument('--reset_mode', default=3) 31 | par_env.add_argument('--mpi', default=False) 32 | 33 | par_env.add_argument('--neighbors_region', default=4) 34 | par_env.add_argument('--neighbors_num', type=int, default=5) 35 | par_env.add_argument('--reward_parameter', type=float, default=(3.0, 0.3, 0.0, 6.0, 0.3, 3.0, -0, 0), nargs='+') 36 | par_env.add_argument('--env_train', default=True) 37 | par_env.add_argument('--random_bear', default=True) 38 | par_env.add_argument('--random_radius', default=False) 39 | par_env.add_argument('--full', default=False) 40 | 41 | par_policy = parser.add_argument_group('par policy', 'policy parameters') 42 | par_policy.add_argument('--state_dim', default=6) 43 | par_policy.add_argument('--rnn_input_dim', default=8) 44 | par_policy.add_argument('--rnn_hidden_dim', default=256) 45 | par_policy.add_argument('--trans_input_dim', default=8) 46 | par_policy.add_argument('--trans_max_num', default=10) 47 | par_policy.add_argument('--trans_nhead', default=1) 48 | par_policy.add_argument('--trans_mode', default='attn') 49 | par_policy.add_argument('--hidden_sizes_ac', default=(256, 256)) 50 | par_policy.add_argument('--drop_p', type=float, default=0) 51 | par_policy.add_argument('--hidden_sizes_v', type=tuple, default=(256, 256)) # 16 16 52 | par_policy.add_argument('--activation', default=nn.ReLU) 53 | par_policy.add_argument('--output_activation', default=nn.Tanh) 54 | par_policy.add_argument('--output_activation_v', default=nn.Identity) 55 | par_policy.add_argument('--use_gpu', action='store_true') 56 | par_policy.add_argument('--rnn_mode', default='biGRU') # LSTM 57 | 58 | par_train = parser.add_argument_group('par train', 'train parameters') 59 | par_train.add_argument('--pi_lr', type=float, default=4e-6) 60 | par_train.add_argument('--vf_lr', type=float, default=5e-5) 61 | par_train.add_argument('--train_epoch', type=int, default=250) 62 | par_train.add_argument('--steps_per_epoch', type=int, default=500) 63 | par_train.add_argument('--max_ep_len', default=150) 64 | par_train.add_argument('--gamma', default=0.99) 65 | par_train.add_argument('--lam', default=0.97) 66 | par_train.add_argument('--clip_ratio', default=0.2) 67 | par_train.add_argument('--train_pi_iters', default=50) 68 | par_train.add_argument('--train_v_iters', default=50) 69 | par_train.add_argument('--target_kl',type=float, default=0.05) 70 | par_train.add_argument('--render', default=True) 71 | par_train.add_argument('--render_freq', default=50) 72 | par_train.add_argument('--con_train', action='store_true') 73 | par_train.add_argument('--seed', default=7) 74 | par_train.add_argument('--save_freq', default=50) 75 | par_train.add_argument('--save_figure', default=False) 76 | par_train.add_argument('--figure_save_path', default='figure') 77 | par_train.add_argument('--save_path', default=str(cur_path / 'model_save') + '/') 78 | par_train.add_argument('--save_name', default= 'r') 79 | par_train.add_argument('--load_path', default=str(cur_path / 'model_save')+ '/') 80 | par_train.add_argument('--load_name', default='r4_0/r4_0_check_point_250.pt') # '/r4_0/r4_0_check_point_250.pt' 81 | par_train.add_argument('--save_result', type=bool, default=True) 82 | par_train.add_argument('--lr_decay_epoch', type=int, default=1000) 83 | par_train.add_argument('--max_update_num', type=int, default=10) 84 | 85 | args = parser.parse_args() 86 | 87 | # decide the model path and model name 88 | model_path_check = args.save_path + args.save_name + str(args.robot_number) + '_{}' 89 | model_name_check = args.save_name + str(args.robot_number) + '_{}' 90 | while os.path.isdir(model_path_check.format(counter)): 91 | counter+=1 92 | 93 | model_abs_path = model_path_check.format(counter) + '/' 94 | model_name = model_name_check.format(counter) 95 | 96 | load_fname = args.load_path + args.load_name 97 | 98 | env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=args.env_train, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, full=args.full) 99 | 100 | test_env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=False, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, plot=False, full=args.full) 101 | 102 | policy = rnn_ac(env.observation_space, env.action_space, args.state_dim, args.rnn_input_dim, args.rnn_hidden_dim, 103 | args.hidden_sizes_ac, args.hidden_sizes_v, args.activation, args.output_activation, 104 | args.output_activation_v, args.use_gpu, args.rnn_mode, args.drop_p) 105 | 106 | ppo = multi_ppo(env, policy, args.pi_lr, args.vf_lr, args.train_epoch, args.steps_per_epoch, args.max_ep_len, args.gamma, args.lam, args.clip_ratio, args.train_pi_iters, args.train_v_iters, args.target_kl, args.render, args.render_freq, args.con_train, args.seed, args.save_freq, args.save_figure, model_abs_path, model_name, load_fname, args.use_gpu, args.reset_mode, args.save_result, counter, test_env, args.lr_decay_epoch, args.max_update_num, args.mpi, args.figure_save_path) 107 | 108 | # save hyparameters 109 | if not os.path.exists(model_abs_path): 110 | os.makedirs(model_abs_path) 111 | 112 | f = open(model_abs_path + model_name, 'wb') 113 | pickle.dump(args, f) 114 | f.close() 115 | 116 | with open(model_abs_path+model_name+'.txt', 'w') as p: 117 | print(vars(args), file=p) 118 | p.close() 119 | 120 | shutil.copyfile( str(cur_path/'train_world.yaml'), model_abs_path+model_name+'_world.yaml') 121 | 122 | # run the training loop 123 | ppo.training_loop() 124 | 125 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/train_process_s1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import gym 4 | import pickle 5 | import shutil 6 | import gym_env 7 | import argparse 8 | from torch import nn 9 | from pathlib import Path 10 | from rl_rvo_nav.policy_train.multi_ppo import multi_ppo 11 | from rl_rvo_nav.policy.policy_rnn_ac import rnn_ac 12 | 13 | # path set 14 | cur_path = Path(__file__).parent 15 | world_abs_path = str(cur_path/'train_world.yaml') 16 | 17 | # default 18 | # robot_number = 4 19 | counter = 0 20 | 21 | # model_path = cur_path / 'model_save' / ('r' + str(robot_number) ) 22 | 23 | parser = argparse.ArgumentParser(description='drl rvo parameters') 24 | 25 | par_env = parser.add_argument_group('par env', 'environment parameters') 26 | par_env.add_argument('--env_name', default='mrnav-v1') 27 | par_env.add_argument('--world_path', default='train_world.yaml') # dynamic_obs_test.yaml; train_world.yaml 28 | par_env.add_argument('--robot_number', type=int, default=4) 29 | par_env.add_argument('--init_mode', default=3) 30 | par_env.add_argument('--reset_mode', default=3) 31 | par_env.add_argument('--mpi', default=False) 32 | 33 | par_env.add_argument('--neighbors_region', default=4) 34 | par_env.add_argument('--neighbors_num', type=int, default=5) 35 | par_env.add_argument('--reward_parameter', type=float, default=(3.0, 0.3, 0.0, 6.0, 0.3, 3.0, -0, 0), nargs='+') 36 | par_env.add_argument('--env_train', default=True) 37 | par_env.add_argument('--random_bear', default=True) 38 | par_env.add_argument('--random_radius', default=False) 39 | par_env.add_argument('--full', default=False) 40 | 41 | par_policy = parser.add_argument_group('par policy', 'policy parameters') 42 | par_policy.add_argument('--state_dim', default=6) 43 | par_policy.add_argument('--rnn_input_dim', default=8) 44 | par_policy.add_argument('--rnn_hidden_dim', default=256) 45 | par_policy.add_argument('--trans_input_dim', default=8) 46 | par_policy.add_argument('--trans_max_num', default=10) 47 | par_policy.add_argument('--trans_nhead', default=1) 48 | par_policy.add_argument('--trans_mode', default='attn') 49 | par_policy.add_argument('--hidden_sizes_ac', default=(256, 256)) 50 | par_policy.add_argument('--drop_p', type=float, default=0) 51 | par_policy.add_argument('--hidden_sizes_v', type=tuple, default=(256, 256)) # 16 16 52 | par_policy.add_argument('--activation', default=nn.ReLU) 53 | par_policy.add_argument('--output_activation', default=nn.Tanh) 54 | par_policy.add_argument('--output_activation_v', default=nn.Identity) 55 | par_policy.add_argument('--use_gpu', action='store_true') 56 | par_policy.add_argument('--rnn_mode', default='biGRU') # LSTM 57 | 58 | par_train = parser.add_argument_group('par train', 'train parameters') 59 | par_train.add_argument('--pi_lr', type=float, default=4e-6) 60 | par_train.add_argument('--vf_lr', type=float, default=5e-5) 61 | par_train.add_argument('--train_epoch', type=int, default=250) 62 | par_train.add_argument('--steps_per_epoch', type=int, default=500) 63 | par_train.add_argument('--max_ep_len', default=150) 64 | par_train.add_argument('--gamma', default=0.99) 65 | par_train.add_argument('--lam', default=0.97) 66 | par_train.add_argument('--clip_ratio', default=0.2) 67 | par_train.add_argument('--train_pi_iters', default=50) 68 | par_train.add_argument('--train_v_iters', default=50) 69 | par_train.add_argument('--target_kl',type=float, default=0.05) 70 | par_train.add_argument('--render', default=True) 71 | par_train.add_argument('--render_freq', default=50) 72 | par_train.add_argument('--con_train', action='store_true') 73 | par_train.add_argument('--seed', default=7) 74 | par_train.add_argument('--save_freq', default=50) 75 | par_train.add_argument('--save_figure', default=False) 76 | par_train.add_argument('--figure_save_path', default='figure') 77 | par_train.add_argument('--save_path', default=str(cur_path / 'model_save') + '/') 78 | par_train.add_argument('--save_name', default= 'r') 79 | par_train.add_argument('--load_path', default=str(cur_path / 'model_save')+ '/') 80 | par_train.add_argument('--load_name', default='r4_0/r4_0_check_point_250.pt') # '/r4_0/r4_0_check_point_250.pt' 81 | par_train.add_argument('--save_result', type=bool, default=True) 82 | par_train.add_argument('--lr_decay_epoch', type=int, default=1000) 83 | par_train.add_argument('--max_update_num', type=int, default=10) 84 | 85 | args = parser.parse_args(['--train_epoch', '250', '--use_gpu']) 86 | 87 | # decide the model path and model name 88 | model_path_check = args.save_path + args.save_name + str(args.robot_number) + '_{}' 89 | model_name_check = args.save_name + str(args.robot_number) + '_{}' 90 | while os.path.isdir(model_path_check.format(counter)): 91 | counter+=1 92 | 93 | model_abs_path = model_path_check.format(counter) + '/' 94 | model_name = model_name_check.format(counter) 95 | 96 | load_fname = args.load_path + args.load_name 97 | 98 | env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=args.env_train, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, full=args.full) 99 | 100 | test_env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=False, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, plot=False, full=args.full) 101 | 102 | policy = rnn_ac(env.observation_space, env.action_space, args.state_dim, args.rnn_input_dim, args.rnn_hidden_dim, 103 | args.hidden_sizes_ac, args.hidden_sizes_v, args.activation, args.output_activation, 104 | args.output_activation_v, args.use_gpu, args.rnn_mode, args.drop_p) 105 | 106 | ppo = multi_ppo(env, policy, args.pi_lr, args.vf_lr, args.train_epoch, args.steps_per_epoch, args.max_ep_len, args.gamma, args.lam, args.clip_ratio, args.train_pi_iters, args.train_v_iters, args.target_kl, args.render, args.render_freq, args.con_train, args.seed, args.save_freq, args.save_figure, model_abs_path, model_name, load_fname, args.use_gpu, args.reset_mode, args.save_result, counter, test_env, args.lr_decay_epoch, args.max_update_num, args.mpi, args.figure_save_path) 107 | 108 | # save hyparameters 109 | if not os.path.exists(model_abs_path): 110 | os.makedirs(model_abs_path) 111 | 112 | f = open(model_abs_path + model_name, 'wb') 113 | pickle.dump(args, f) 114 | f.close() 115 | 116 | with open(model_abs_path+model_name+'.txt', 'w') as p: 117 | print(vars(args), file=p) 118 | p.close() 119 | 120 | shutil.copyfile( str(cur_path/'train_world.yaml'), model_abs_path+model_name+'_world.yaml') 121 | 122 | # run the training loop 123 | ppo.training_loop() 124 | 125 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/train_process_s2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import gym 4 | import pickle 5 | import shutil 6 | import gym_env 7 | import argparse 8 | from torch import nn 9 | from pathlib import Path 10 | from rl_rvo_nav.policy_train.multi_ppo import multi_ppo 11 | from rl_rvo_nav.policy.policy_rnn_ac import rnn_ac 12 | 13 | # path set 14 | cur_path = Path(__file__).parent 15 | world_abs_path = str(cur_path/'train_world.yaml') 16 | 17 | # default 18 | # robot_number = 4 19 | counter = 0 20 | 21 | # model_path = cur_path / 'model_save' / ('r' + str(robot_number) ) 22 | 23 | parser = argparse.ArgumentParser(description='drl rvo parameters') 24 | 25 | par_env = parser.add_argument_group('par env', 'environment parameters') 26 | par_env.add_argument('--env_name', default='mrnav-v1') 27 | par_env.add_argument('--world_path', default='train_world.yaml') 28 | par_env.add_argument('--robot_number', type=int, default=4) 29 | par_env.add_argument('--init_mode', default=3) 30 | par_env.add_argument('--reset_mode', default=3) 31 | par_env.add_argument('--mpi', default=False) 32 | 33 | par_env.add_argument('--neighbors_region', default=4) 34 | par_env.add_argument('--neighbors_num', type=int, default=5) 35 | par_env.add_argument('--reward_parameter', type=float, default=(3.0, 0.3, 0.0, 6.0, 0.3, 3.0, -0, 0), nargs='+') 36 | par_env.add_argument('--env_train', default=True) 37 | par_env.add_argument('--random_bear', default=True) 38 | par_env.add_argument('--random_radius', default=False) 39 | par_env.add_argument('--full', default=False) 40 | 41 | par_policy = parser.add_argument_group('par policy', 'policy parameters') 42 | par_policy.add_argument('--state_dim', default=6) 43 | par_policy.add_argument('--rnn_input_dim', default=8) 44 | par_policy.add_argument('--rnn_hidden_dim', default=256) 45 | par_policy.add_argument('--trans_input_dim', default=8) 46 | par_policy.add_argument('--trans_max_num', default=10) 47 | par_policy.add_argument('--trans_nhead', default=1) 48 | par_policy.add_argument('--trans_mode', default='attn') 49 | par_policy.add_argument('--hidden_sizes_ac', default=(256, 256)) 50 | par_policy.add_argument('--drop_p', type=float, default=0) 51 | par_policy.add_argument('--hidden_sizes_v', type=tuple, default=(256, 256)) # 16 16 52 | par_policy.add_argument('--activation', default=nn.ReLU) 53 | par_policy.add_argument('--output_activation', default=nn.Tanh) 54 | par_policy.add_argument('--output_activation_v', default=nn.Identity) 55 | par_policy.add_argument('--use_gpu', action='store_true') 56 | par_policy.add_argument('--rnn_mode', default='biGRU') # LSTM 57 | 58 | par_train = parser.add_argument_group('par train', 'train parameters') 59 | par_train.add_argument('--pi_lr', type=float, default=4e-6) 60 | par_train.add_argument('--vf_lr', type=float, default=5e-5) 61 | par_train.add_argument('--train_epoch', type=int, default=250) 62 | par_train.add_argument('--steps_per_epoch', type=int, default=500) 63 | par_train.add_argument('--max_ep_len', default=150) 64 | par_train.add_argument('--gamma', default=0.99) 65 | par_train.add_argument('--lam', default=0.97) 66 | par_train.add_argument('--clip_ratio', default=0.2) 67 | par_train.add_argument('--train_pi_iters', default=50) 68 | par_train.add_argument('--train_v_iters', default=50) 69 | par_train.add_argument('--target_kl',type=float, default=0.05) 70 | par_train.add_argument('--render', default=True) 71 | par_train.add_argument('--render_freq', default=50) 72 | par_train.add_argument('--con_train', action='store_true') 73 | par_train.add_argument('--seed', default=7) 74 | par_train.add_argument('--save_freq', default=50) 75 | par_train.add_argument('--save_figure', default=False) 76 | par_train.add_argument('--figure_save_path', default='figure') 77 | par_train.add_argument('--save_path', default=str(cur_path / 'model_save') + '/') 78 | par_train.add_argument('--save_name', default= 'r') 79 | par_train.add_argument('--load_path', default=str(cur_path / 'model_save')+ '/') 80 | par_train.add_argument('--load_name', default='r4_0/r4_0_check_point_250.pt') # '/r4_0/r4_0_check_point_250.pt' 81 | par_train.add_argument('--save_result', type=bool, default=True) 82 | par_train.add_argument('--lr_decay_epoch', type=int, default=1000) 83 | par_train.add_argument('--max_update_num', type=int, default=10) 84 | 85 | args = parser.parse_args(['--train_epoch', '2000', '--robot_number', '10', '--load_name', 'r4_0/r4_0_check_point_250.pt', '--con_train', '--use_gpu']) 86 | 87 | # decide the model path and model name 88 | model_path_check = args.save_path + args.save_name + str(args.robot_number) + '_{}' 89 | model_name_check = args.save_name + str(args.robot_number) + '_{}' 90 | while os.path.isdir(model_path_check.format(counter)): 91 | counter+=1 92 | 93 | model_abs_path = model_path_check.format(counter) + '/' 94 | model_name = model_name_check.format(counter) 95 | 96 | load_fname = args.load_path + args.load_name 97 | 98 | env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=args.env_train, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, full=args.full) 99 | 100 | test_env = gym.make(args.env_name, world_name=args.world_path, robot_number=args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=args.init_mode, env_train=False, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, plot=False, full=args.full) 101 | 102 | policy = rnn_ac(env.observation_space, env.action_space, args.state_dim, args.rnn_input_dim, args.rnn_hidden_dim, 103 | args.hidden_sizes_ac, args.hidden_sizes_v, args.activation, args.output_activation, 104 | args.output_activation_v, args.use_gpu, args.rnn_mode, args.drop_p) 105 | 106 | ppo = multi_ppo(env, policy, args.pi_lr, args.vf_lr, args.train_epoch, args.steps_per_epoch, args.max_ep_len, args.gamma, args.lam, args.clip_ratio, args.train_pi_iters, args.train_v_iters, args.target_kl, args.render, args.render_freq, args.con_train, args.seed, args.save_freq, args.save_figure, model_abs_path, model_name, load_fname, args.use_gpu, args.reset_mode, args.save_result, counter, test_env, args.lr_decay_epoch, args.max_update_num, args.mpi, args.figure_save_path) 107 | 108 | # save hyparameters 109 | if not os.path.exists(model_abs_path): 110 | os.makedirs(model_abs_path) 111 | 112 | f = open(model_abs_path + model_name, 'wb') 113 | pickle.dump(args, f) 114 | f.close() 115 | 116 | with open(model_abs_path+model_name+'.txt', 'w') as p: 117 | print(vars(args), file=p) 118 | p.close() 119 | 120 | shutil.copyfile( str(cur_path/'train_world.yaml'), model_abs_path+model_name+'_world.yaml') 121 | 122 | # run the training loop 123 | ppo.training_loop() 124 | 125 | -------------------------------------------------------------------------------- /rl_rvo_nav/policy_train/train_world.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.1 11 | interval: 1 12 | square: [0, 0, 10, 10] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 17 | 18 | -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/policy_test_pre_train.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_env 3 | from pathlib import Path 4 | import pickle 5 | import sys 6 | from rl_rvo_nav.policy_test.post_train import post_train 7 | import argparse 8 | 9 | parser = argparse.ArgumentParser(description='policy test') 10 | parser.add_argument('--policy_type', default='drl') 11 | parser.add_argument('--model_path', default='pre_trained') 12 | parser.add_argument('--model_name', default='pre_train_check_point_1000.pt') 13 | parser.add_argument('--arg_name', default='pre_train') 14 | parser.add_argument('--world_name', default='test_world.yaml') # test_world_lines.yaml; test_world_dyna_obs.yaml 15 | parser.add_argument('--render', action='store_true') 16 | parser.add_argument('--robot_number', type=int, default='6') 17 | parser.add_argument('--num_episodes', type=int, default='100') 18 | parser.add_argument('--dis_mode', type=int, default='3') 19 | parser.add_argument('--save', action='store_true') 20 | parser.add_argument('--full', action='store_true') 21 | parser.add_argument('--show_traj', action='store_true') 22 | parser.add_argument('--once', action='store_true') 23 | 24 | policy_args = parser.parse_args() 25 | 26 | cur_path = Path(__file__).parent 27 | model_base_path = str(cur_path) + '/' + policy_args.model_path 28 | args_path = model_base_path + '/' + policy_args.arg_name 29 | 30 | # args from train 31 | r = open(args_path, 'rb') 32 | args = pickle.load(r) 33 | 34 | if policy_args.policy_type == 'drl': 35 | # fname_model = save_path_string +'_check_point_250.pt' 36 | fname_model = model_base_path + '/' + policy_args.model_name 37 | policy_name = 'rl_rvo' 38 | 39 | env = gym.make('mrnav-v1', world_name=policy_args.world_name, robot_number=policy_args.robot_number, neighbors_region=args.neighbors_region, neighbors_num=args.neighbors_num, robot_init_mode=policy_args.dis_mode, env_train=False, random_bear=args.random_bear, random_radius=args.random_radius, reward_parameter=args.reward_parameter, goal_threshold=0.2, full=policy_args.full) 40 | 41 | policy_name = policy_name + '_' + str(policy_args.robot_number) + '_dis' + str(policy_args.dis_mode) 42 | 43 | pt = post_train(env, num_episodes=policy_args.num_episodes, reset_mode=policy_args.dis_mode, render=policy_args.render, std_factor=0.00001, acceler_vel=1.0, max_ep_len=300, neighbors_region=args.neighbors_region, neighbor_num=args.neighbors_num, args=args, save=policy_args.save, show_traj=policy_args.show_traj, figure_format='eps') 44 | 45 | pt.policy_test(policy_args.policy_type, fname_model, policy_name, result_path=str(cur_path), result_name='/result.txt', figure_save_path=cur_path / 'figure' , ani_save_path=cur_path / 'gif', policy_dict=True, once=policy_args.once) 46 | -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/pre_trained/pre_train: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/pre_trained_model/pre_trained/pre_train -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/pre_trained/pre_train.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/pre_trained_model/pre_trained/pre_train.pt -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/pre_trained/pre_train.txt: -------------------------------------------------------------------------------- 1 | {'env_name': 'MrNav-v0', 'world_path': '/home/han/ir/intelligent-robot-ext/project/drl_rvo_nav/drl_rvo_world.yaml', 'robot_num': 10, 'init_mode': 3, 'reset_mode': 3, 'circular': [5, 5, 4], 'interval': 1, 'square': [0.5, 0.5, 9.5, 9.5], 'obs_mode': 0, 'reward_mode': 1, 'mpi': False, 'neighbors_region': 4, 'neighbors_num': 5, 'reward_parameter': (3.0, 0.3, 0.0, 6.0, 0.3, 3.0, -0, 0), 'env_train': True, 'random_bear': True, 'random_radius': False, 'policy_mode': 'rnn', 'state_dim': 6, 'rnn_input_dim': 8, 'rnn_hidden_dim': 256, 'trans_input_dim': 8, 'trans_max_num': 10, 'trans_nhead': 1, 'trans_mode': 'attn', 'hidden_sizes_ac': (256, 256), 'drop_p': 0, 'hidden_sizes_v': (256, 256), 'activation': , 'output_activation': , 'output_activation_v': , 'use_gpu': True, 'rnn_mode': 'biGRU', 'pi_lr': 4e-06, 'vf_lr': 5e-05, 'train_epoch': 10000, 'steps_per_epoch': 450, 'max_ep_len': 150, 'gamma': 0.99, 'lam': 0.97, 'clip_ratio': 0.2, 'train_pi_iters': 50, 'train_v_iters': 50, 'target_kl': 0.01, 'render': True, 'render_freq': 50, 'con_train': 'T', 'seed': 7, 'save_freq': 50, 'save_figure': False, 'save_path': '/home/han/ir/intelligent-robot-ext/project/drl_rvo_nav/model_save/r10_2', 'save_name': '/r10_2', 'load_fname': '/home/han/ir/intelligent-robot-ext/project/drl_rvo_nav/model_save/r4_4/r4_4_check_point_100.pt', 'save_result': True, 'lr_decay_epoch': 1000, 'max_update_num': 10} 2 | -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/pre_trained/pre_train_check_point_1000.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hanruihua/rl_rvo_nav/8e4bed3d11f907cf3aca83e4032c99aba18a22a8/rl_rvo_nav/pre_trained_model/pre_trained/pre_train_check_point_1000.pt -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/pre_trained/pre_train_world.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 11 3 | world_width: 11 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.1 11 | interval: 1 12 | square: [0, 0, 10, 10] 13 | circular: [5, 5, 4] -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/test_world.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.2 11 | interval: 1 12 | square: [0, 0, 9, 9] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/test_world_dyna_obs.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 10 4 | step_time: 0.1 5 | 6 | robots: 7 | robot_mode: 'diff' # omni or diff 8 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 9 | vel_max: [1.5, 1.5] 10 | radius_exp: 0.2 11 | interval: 1 12 | square: [0, 0, 9, 9] 13 | circular: [5, 5, 4] 14 | 15 | # obs_lines: 16 | # number: 2 17 | 18 | obs_cirs: 19 | number: 5 20 | dist_mode: 2 21 | obs_model: 'dynamic' # static, dynamic 22 | obs_step_mode: 'wander' # default, wander 23 | obs_state_list: [[25, 34], [35, 26]] 24 | obs_goal_list: [[45, 24], [15, 36]] 25 | obs_radius_list: [0.1, 0.1] 26 | obs_square: [2, 2, 8, 8] 27 | obs_interval: 1 28 | random_radius: False 29 | vel_max: [0.3, 0.3] -------------------------------------------------------------------------------- /rl_rvo_nav/pre_trained_model/test_world_lines.yaml: -------------------------------------------------------------------------------- 1 | world: 2 | world_height: 10 3 | world_width: 7 4 | step_time: 0.1 5 | offset_x: -1 6 | offset_y: -1 7 | 8 | 9 | robots: 10 | robot_mode: 'diff' # omni or diff 11 | radius_list: [0.2, 0.2] # first one is the default radius under other init_mode 12 | vel_max: [1.5, 1.5] 13 | radius_exp: 0.1 14 | interval: 1 15 | square: [1, 1, 7, 7] 16 | circular: [5, 5, 4] 17 | 18 | 19 | # obstacle_line_list: [ [0, 0, 0, 8], [0, 8, 5, 8],[5, 8, 5, 0], [5, 0, 0, 0]] 20 | 21 | obs_lines: 22 | obs_line_states: [ [0, 0, 0, 8], [0, 8, 5, 8],[5, 8, 5, 0], [5, 0, 0, 0]] -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | 4 | setup( 5 | name='rl_rvo_nav', 6 | py_modules=['rl_rvo_nav'], 7 | version= '2.0', 8 | install_requires=[ 9 | 'matplotlib', 10 | 'numpy', 11 | 'scipy', 12 | 'transforms3d', 13 | 'gym==0.20.0', 14 | 'Pathlib', 15 | 'mpi4py', 16 | ], 17 | description="source code of the paper", 18 | author="Han Ruihua", 19 | ) -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | pip install -e . 3 | pip install -e ./gym_env 4 | 5 | --------------------------------------------------------------------------------