├── LICENSE ├── README.md ├── __init__.py ├── config ├── __init__.py ├── agent │ └── sac.yaml ├── cfg.py ├── config.yaml └── env │ ├── highway-fast-continues-v0_s35_d1.yaml │ ├── intersection-continues-v0-o1.yaml │ ├── merge-continues-v0.yaml │ └── roundabout_continues_v1.yaml ├── dataset ├── __init__.py ├── load_data.py └── rs_memory.py ├── exp-highway-table.png ├── exp-highway.png ├── expert_data ├── highway-fast-continues-v0-s35-d1 │ ├── 1.npy │ ├── 10.npy │ ├── 14.npy │ ├── 15.npy │ ├── 19.npy │ ├── 20.npy │ ├── 24.npy │ ├── 25.npy │ ├── 4.npy │ ├── 5.npy │ └── 9.npy ├── intersection-continues-v0-o1 │ ├── 1.npy │ ├── 10.npy │ ├── 14.npy │ ├── 15.npy │ ├── 19.npy │ ├── 20.npy │ ├── 24.npy │ ├── 25.npy │ ├── 4.npy │ ├── 5.npy │ └── 9.npy ├── merge-continues-v0 │ ├── 1.npy │ ├── 10.npy │ ├── 14.npy │ ├── 15.npy │ ├── 19.npy │ ├── 20.npy │ ├── 24.npy │ ├── 25.npy │ ├── 4.npy │ ├── 5.npy │ └── 9.npy └── roundabout-continues-v1 │ ├── 1.npy │ ├── 10.npy │ ├── 14.npy │ ├── 15.npy │ ├── 19.npy │ ├── 20.npy │ ├── 24.npy │ ├── 25.npy │ ├── 4.npy │ ├── 5.npy │ └── 9.npy ├── framework.png ├── highway_modify ├── .github │ └── workflows │ │ ├── build.yml │ │ └── release.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── codecov.yml ├── docs │ ├── Makefile │ ├── requirements.txt │ └── source │ │ ├── actions │ │ └── index.rst │ │ ├── bibliography │ │ ├── biblio.bib │ │ └── index.rst │ │ ├── conf.py │ │ ├── dynamics │ │ ├── index.rst │ │ ├── road │ │ │ ├── lane.rst │ │ │ ├── regulation.rst │ │ │ └── road.rst │ │ └── vehicle │ │ │ ├── behavior.rst │ │ │ ├── controller.rst │ │ │ └── kinematics.rst │ │ ├── environments │ │ ├── highway.rst │ │ ├── index.rst │ │ ├── intersection.rst │ │ ├── merge.rst │ │ ├── parking.rst │ │ ├── racetrack.rst │ │ └── roundabout.rst │ │ ├── faq.rst │ │ ├── graphics │ │ └── index.rst │ │ ├── index.rst │ │ ├── installation.rst │ │ ├── make_your_own.rst │ │ ├── multi_agent.rst │ │ ├── observations │ │ └── index.rst │ │ ├── quickstart.rst │ │ ├── rewards │ │ └── index.rst │ │ └── user_guide.rst ├── highway_env │ ├── __init__.py │ ├── envs │ │ ├── __init__.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── abstract.py │ │ │ ├── action.py │ │ │ ├── finite_mdp.py │ │ │ ├── graphics.py │ │ │ └── observation.py │ │ ├── exit_env.py │ │ ├── highway_env.py │ │ ├── intersection_env.py │ │ ├── lane_keeping_env.py │ │ ├── merge_env.py │ │ ├── parking_env.py │ │ ├── racetrack_env.py │ │ ├── roundabout_line2_env.py │ │ ├── roundabout_line4_env.py │ │ ├── summon_env.py │ │ ├── two_way_env.py │ │ └── u_turn_env.py │ ├── interval.py │ ├── road │ │ ├── __init__.py │ │ ├── graphics.py │ │ ├── lane.py │ │ ├── regulation.py │ │ ├── road.py │ │ └── spline.py │ ├── utils.py │ └── vehicle │ │ ├── __init__.py │ │ ├── behavior.py │ │ ├── controller.py │ │ ├── dynamics.py │ │ ├── graphics.py │ │ ├── kinematics.py │ │ ├── objects.py │ │ └── uncertainty │ │ ├── __init__.py │ │ ├── estimation.py │ │ └── prediction.py ├── pyproject.toml ├── setup.cfg ├── setup.py └── tests │ ├── __init__.py │ ├── envs │ ├── __init__.py │ ├── test_actions.py │ ├── test_env_preprocessors.py │ ├── test_gym.py │ └── test_time.py │ ├── graphics │ └── test_render.py │ ├── road │ └── test_road.py │ ├── test_utils.py │ └── vehicle │ ├── test_behavior.py │ ├── test_control.py │ ├── test_dynamics.py │ └── test_uncertainty.py ├── introduction.png ├── main.py ├── make_envs.py ├── model ├── __init__.py ├── agent.py ├── ego_attention.py ├── sac.py ├── sac_models.py └── sac_rs.py ├── requirements.txt ├── scripts ├── highway-fast-continues-v0-s35-d1.sh ├── intersection-continues-o1.sh ├── merge-v0.sh └── roundabout-v1.sh ├── utils ├── __init__.py └── util.py └── wrappers ├── __init__.py ├── atari_wrapper.py └── normalize_action_wrapper.py /README.md: -------------------------------------------------------------------------------- 1 | # Curricular Subgoal for Inverse Reinforcement Learning 2 | 3 | [![License: Apache](https://img.shields.io/badge/License-Apache-blue.svg)](LICENSE) 4 | [![arXiv](https://img.shields.io/badge/arXiv-2306.08232-b31b1b.svg)](https://arxiv.org/abs/2306.08232) 5 | 6 | 7 | Official codebase for paper [Curricular Subgoal for Inverse Reinforcement Learning](https://arxiv.org/abs/2306.08232). 8 | 9 |
10 | 11 |
12 | 13 | ## Overview 14 | 15 | **TLDR:** Our main contribution is a dedicated curricular subgoal-based IRL framework that enables multi-stage imitation based on expert demonstrations. Extensive experiments conducted on the D4RL and autonomous driving benchmarks show that our proposed CSIRL framework yields significantly superior performance to state-of-the-art competitors, as well as better interpretability in the training process. Moreover, the robustness analysis experiments show that CSIRL still maintains high performance even with only one expert trajectory. 16 | 17 | **Abstract:** Inverse Reinforcement Learning (IRL) aims to reconstruct the reward function from expert demonstrations to facilitate policy learning, and has demonstrated its remarkable success in imitation learning. To promote expert-like behavior, existing IRL methods mainly focus on learning global reward functions to minimize the trajectory difference between the imitator and the expert. However, these global designs are still limited by the redundant noise and error propagation problems, leading to the unsuitable reward assignment and thus downgrading the agent capability in complex multi-stage tasks. In this paper, we propose a novel Curricular Subgoal-based Inverse Reinforcement Learning (CSIRL) framework, that explicitly disentangles one task with several local subgoals to guide agent imitation. Specifically, CSIRL firstly introduces decision uncertainty of the trained agent over expert trajectories to dynamically select subgoals, which directly determines the exploration boundary of different task stages. To further acquire local reward functions for each stage, we customize a meta-imitation objective based on these curricular subgoals to train an intrinsic reward generator. Experiments on the D4RL and autonomous driving benchmarks demonstrate that the proposed methods yields results superior to the state-of-the-art counterparts, as well as better interpretability. 18 | 19 | ![image](https://github.com/Plankson/CSIRL/blob/master/framework.png) 20 | 21 | ## Prerequisites 22 | 23 | #### Install dependencies 24 | 25 | See `requirments.txt` file for more information about how to install the dependencies. 26 | 27 | #### Install highway-env 28 | It should be noted that we make some modification on the original [highway-env](https://github.com/eleurent/highway-env) to make it more fit the real driving environment. The modified highway-env is provided by `highway_modify`, which can be installed by running: 29 | 30 | ```bash 31 | cd highway_modify 32 | pip install -e . 33 | ``` 34 | 35 | 36 | ## Usage 37 | Detailed instructions to replicate the results in the paper are contained in `scripts` directory. 38 | Here we give the form of the instructions. 39 | 40 | ```bash 41 | # highway-fast 42 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra= seed= 43 | 44 | # merge 45 | python main.py env=merge-continues-v0 expert.tra= seed= 46 | 47 | # roundabout 48 | python main.py env=roundabout-continues-v1 expert.tra= seed= 49 | 50 | # intersection 51 | python main.py env=intersection-continues-v0-o1 expert.tra= seed= 52 | ``` 53 | 54 | Make sure to replace `EXPERT_DATASET_PATH` with the path to the corresponding dataset in `expert_data`. 55 | 56 | 57 | ![image](https://github.com/Plankson/CSIRL/blob/master/exp-highway.png) 58 | 59 | 60 | ![image](https://github.com/Plankson/CSIRL/blob/master/exp-highway-table.png) 61 | 62 | ## Citation 63 | 64 | If you find this work useful for your research, please cite our paper: 65 | 66 | ``` 67 | @article{liu2023CSIRL, 68 | title={Curricular Subgoal for Inverse Reinforcement Learning}, 69 | author={Liu, Shunyu and Qing, Yunpeng and Xu, Shuqi and Wu, Hongyan and Zhang, Jiangtao and Cong, Jingyuan and Liu, Yunfu and Song, Mingli}, 70 | journal={arXiv preprint arXiv:2306.08232}, 71 | year={2023} 72 | } 73 | ``` 74 | 75 | ## Contact 76 | 77 | Please feel free to contact me via email (, ) if you are interested in my research :) 78 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/__init__.py -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/config/__init__.py -------------------------------------------------------------------------------- /config/agent/sac.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | agent: 4 | name: sac 5 | _target_: model.sac.SAC 6 | obs_dim: ??? # to be specified later 7 | action_dim: ??? # to be specified later 8 | 9 | critic_cfg: ${q_net} 10 | actor_cfg: ${diag_gaussian_actor} 11 | rs_cfg: ${intrinsic_reward_net} 12 | init_temp: 1e-2 # use a low temp for IL 13 | 14 | alpha_lr: 3e-4 15 | alpha_betas: [0.9, 0.999] 16 | 17 | actor_lr: 3e-4 18 | actor_betas: [0.9, 0.999] 19 | actor_update_frequency: 1 20 | 21 | critic_lr: 3e-4 22 | critic_betas: [0.9, 0.999] 23 | critic_tau: 0.005 24 | critic_target_update_frequency: 1 25 | 26 | grad_lr: 0.01 27 | rs_lr: 0.001 28 | rs_betas: [0.9, 0.999] 29 | rs_update_frequency: 1 30 | # learn temperature coefficient (disabled by default) 31 | learn_temp: false 32 | 33 | # Use either value_dice actor or normal SAC actor loss 34 | vdice_actor: false 35 | 36 | q_net: 37 | _target_: model.sac_models.MultiQCritic 38 | obs_dim: ${agent.obs_dim} 39 | action_dim: ${agent.action_dim} 40 | hidden_dim: 256 41 | hidden_depth: 2 42 | q_net_num: 5 43 | 44 | intrinsic_reward_net: 45 | _target_: model.sac_models.Intrinsic_Reward_Generator 46 | input_dim: ${agent.obs_dim} 47 | hidden_dim: 256 48 | hidden_depth: 2 49 | 50 | diag_gaussian_actor: 51 | _target_: model.sac_models.DiagGaussianActor 52 | obs_dim: ${agent.obs_dim} 53 | action_dim: ${agent.action_dim} 54 | hidden_dim: 256 55 | hidden_depth: 2 56 | log_std_bounds: [-5, 2] -------------------------------------------------------------------------------- /config/cfg.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | exp_name: '' 2 | project_name: ${env.name} 3 | 4 | cuda_deterministic: False 5 | device: ??? # to be specified later 6 | use_rs: True 7 | gamma: 0.99 8 | seed: 0 9 | num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL) 10 | soft_mean: True 11 | sigma: 0.2 12 | insert_subgoal_exp: False 13 | subgoal_num_actions: 1 14 | train: 15 | batch: 64 16 | use_target: False 17 | soft_update: False 18 | expert: 19 | 20 | eval: 21 | eps: 10 22 | 23 | env: 24 | replay_mem: 50000 25 | initial_mem: 1280 26 | eps_steps: 1000 27 | eval_interval: 1e3 28 | 29 | # Extra args 30 | hydra_base_dir: "" 31 | 32 | # Number of actor updates per env step 33 | num_actor_updates: 1 34 | 35 | 36 | defaults: 37 | - agent: sac 38 | - env: roundabout_continues_v1 39 | - method: simple_sac -------------------------------------------------------------------------------- /config/env/highway-fast-continues-v0_s35_d1.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | name: highway-fast-v0 5 | action_type: continues 6 | density: 1 7 | speed: 35 8 | # learn_steps: 1e5 9 | # eval_interval: 1e3 10 | 11 | replay_mem: 1e6 12 | # initial_mem: 10000 13 | round_steps: 2e4 14 | eps_steps: 100000 15 | eval_interval: 1e3 16 | expert_data: ${expert} 17 | first_step: 5 18 | delta: 5 19 | sample_uc: 0.03 20 | l_ego_s: 0 21 | r_ego_s: 4 22 | l_pos: 1 23 | r_pos: 1 24 | g1: 700.0 25 | expert: 26 | subsample_freq: 1 27 | basic_tra: "/expert_data/highway-fast-continues-v0-s35-d1/1.npy" 28 | tra: "/expert_data/highway-fast-continues-v0-s35-d1/20.npy" 29 | 30 | agent: 31 | name: sac 32 | 33 | num_actor_updates: 1 34 | 35 | train: 36 | use_target: true 37 | soft_update: true 38 | batch: 256 39 | 40 | q_net: 41 | _target_: model.sac_models.MultiQCritic -------------------------------------------------------------------------------- /config/env/intersection-continues-v0-o1.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | name: intersection-v0 5 | action_type: continues 6 | destination: o11 7 | # learn_steps: 1e5 8 | # eval_interval: 1e3 9 | 10 | replay_mem: 1e6 11 | # initial_mem: 10000 12 | round_steps: 2e4 13 | eps_steps: 100000 14 | eval_interval: 1e3 15 | expert_data: ${expert} 16 | first_step: 5 17 | delta: 5 18 | sample_uc: 0.03 19 | l_ego_s: 0 20 | r_ego_s: 6 21 | l_pos: 1 22 | r_pos: 2 23 | g1: 100.0 24 | expert: 25 | subsample_freq: 1 26 | basic_tra: "/expert_data/intersection-continues-v0-o1/1.npy" 27 | tra: "/expert_data/intersection-continues-v0-o1/20.npy" 28 | 29 | agent: 30 | name: sac 31 | 32 | num_actor_updates: 1 33 | 34 | train: 35 | use_target: true 36 | soft_update: true 37 | batch: 256 38 | 39 | q_net: 40 | _target_: model.sac_models.MultiQCritic -------------------------------------------------------------------------------- /config/env/merge-continues-v0.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | name: merge-v0 5 | action_type: continues 6 | # learn_steps: 1e5 7 | # eval_interval: 1e3 8 | 9 | replay_mem: 1e6 10 | # initial_mem: 10000 11 | round_steps: 2e4 12 | eps_steps: 100000 13 | eval_interval: 1e3 14 | expert_data: ${expert} 15 | first_step: 5 16 | delta: 5 17 | sample_uc: 0.03 18 | l_ego_s: 0 19 | r_ego_s: 4 20 | l_pos: 1 21 | r_pos: 1 22 | g1: 500.0 23 | expert: 24 | basic_tra: "/expert_data/merge-continues-v0/1.npy" 25 | tra: "/expert_data/merge-continues-v0/20.npy" 26 | 27 | agent: 28 | name: sac 29 | 30 | num_actor_updates: 1 31 | 32 | train: 33 | use_target: true 34 | soft_update: true 35 | batch: 256 36 | 37 | q_net: 38 | _target_: model.sac_models.MultiQCritic -------------------------------------------------------------------------------- /config/env/roundabout_continues_v1.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | env: 4 | name: roundabout-v1 5 | action_type: continues 6 | # learn_steps: 1e5 7 | # eval_interval: 1e3 8 | 9 | replay_mem: 1e6 10 | # initial_mem: 10000 11 | round_steps: 2e4 12 | eps_steps: 100000 13 | eval_interval: 1e3 14 | expert_data: ${expert} 15 | first_step: 5 16 | delta: 5 17 | sample_uc: 0.03 18 | l_ego_s: 0 19 | r_ego_s: 6 20 | l_pos: 1 21 | r_pos: 2 22 | g1: 100.0 23 | expert: 24 | basic_tra: "/expert_data/roundabout-continues-v1/1.npy" 25 | tra: "/expert_data/roundabout-continues-v1/20.npy" 26 | 27 | agent: 28 | name: sac 29 | 30 | num_actor_updates: 1 31 | 32 | train: 33 | use_target: true 34 | soft_update: true 35 | batch: 256 36 | 37 | q_net: 38 | _target_: model.sac_models.MultiQCritic -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/load_data.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import glob 5 | 6 | import torch 7 | import utils 8 | import time 9 | import glob 10 | import config 11 | class Dataset(): 12 | # fro this dataset, the subgoal_trajectory count is 1. 13 | def __init__(self, cur_pth, args): 14 | self.args = args 15 | self.expert_data = np.load(cur_pth+args.expert.basic_tra, allow_pickle=True).item() 16 | self.states = self.expert_data["states"] 17 | self.action = self.expert_data["actions"] 18 | self.next_states = self.expert_data["next_states"] 19 | self.all_data = np.load(cur_pth+args.expert.tra,allow_pickle=True).item() 20 | self.all_state = np.vstack([self.all_data["states"],self.states]).reshape(-1,self.states.shape[2]) 21 | self.all_action = np.vstack([self.all_data["actions"],self.action]).reshape(-1,self.action.shape[2]).clip(-1.0,1.0) 22 | # extract self state 23 | self.is_done = self.expert_data["dones"] 24 | self.tra_num = self.expert_data["dones"].shape[0] 25 | self.goals = np.zeros((self.tra_num),dtype=np.int32) ## goals: state id 26 | for i in range(self.tra_num): ## TODO: Set random goal at start 27 | # self.goals[i] = random.randint(1,args.first_step) 28 | self.goals[i] = random.randint(1,args.env.first_step) 29 | self.belongs = np.zeros(self.expert_data["rewards"].shape, dtype=np.int16) ## 30 | self.reset_belongs() 31 | self.insert_new_subgoal() 32 | 33 | def reset_belongs(self): 34 | for i in range(self.tra_num): 35 | for j in range(self.expert_data["lengths"][i]): 36 | # if j != 0: 37 | # self.belongs[i][j] = min( random.randint(max(j + 1, self.belongs[ i][j-1]), j+self.args.env.first_step), self.states[0].shape[0]-1) 38 | # else: 39 | # self.belongs[i][j] = min( random.randint(j + 1, j + self.args.env.first_step), self.states[0].shape[0] - 1) 40 | self.belongs[i][j] = min(j+1,self.states[0].shape[0]-1) 41 | def insert_new_subgoal(self,pos=None):# pos list 42 | # self.goals: tra_num 43 | for i in range(self.tra_num): 44 | if pos != None: 45 | if pos[i]< self.goals[i]: 46 | print("??? There is some error in subgoal setting at trajectory %d, goal: %d , new goal: %d" %(i, self.goals[i], pos[i] ) ) 47 | return False 48 | self.goals[i]=pos[i] 49 | for j in range(self.goals[i]): 50 | self.belongs[i][j] = self.goals[i] 51 | print(self.goals[i]) 52 | for j in range(self.expert_data["lengths"][i]): 53 | print(self.belongs[i][j], end=' ') 54 | return True 55 | def find_subgoal(self, state): 56 | # state: batch_num * state_dim 57 | # return: batch_num * state_dim 58 | focus_state=state[:,self.args.env.l_ego_s:self.args.env.r_ego_s+1] 59 | subgoals=np.zeros(state.shape) 60 | for i in range(state.shape[0]): 61 | id=0 62 | min_dis = float('inf') 63 | for j in range(self.states[0].shape[0]): #TODO:there is only one trajectory to get subgoal! 64 | dis=np.linalg.norm(focus_state[i]-self.states[0][j][self.args.env.l_ego_s:self.args.env.r_ego_s+1]) 65 | if dis <= min_dis: 66 | min_dis = dis 67 | id = self.belongs[0][j] 68 | subgoals[i]=self.states[0][id] 69 | return id, subgoals 70 | 71 | def sample(self,device): 72 | batch_size = 32 73 | indexes = np.random.choice(np.arange(self.expert_data["lengths"][0]), size=batch_size, replace=False) 74 | batch_state, batch_action = [self.states[0][i] for i in indexes], [self.action[0][i] for i in indexes] 75 | batch_state = np.array(batch_state) 76 | batch_action = np.array(batch_action) 77 | batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device) 78 | batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device) 79 | return batch_state, batch_action 80 | 81 | def all_sample(self,device): 82 | batch_size = 32 83 | indexes = np.random.choice(np.arange(self.all_state.shape[0]), size=batch_size, replace=False) 84 | batch_state, batch_action = [self.all_state[i] for i in indexes], [self.all_action[i] for i in indexes] 85 | batch_state = np.array(batch_state) 86 | batch_action = np.array(batch_action) 87 | batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device) 88 | batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device) 89 | return batch_state, batch_action 90 | def sqil_sample(self,device): 91 | batch_size = 32 92 | indexes = np.random.choice(np.arange(self.all_state.shape[0]), size=batch_size, replace=False) 93 | batch_state, batch_action ,batch_next_state, batch_done = [self.all_state[i] for i in indexes], [self.all_action[i] for i in indexes], [self.all_state[min(i+1,self.expert_data["lengths"][0]-1)] for i in indexes], [1.0 if i==self.expert_data["lengths"][0]-1 else 0.0 for i in indexes] 94 | batch_state = np.array(batch_state) 95 | batch_action = np.array(batch_action) 96 | batch_next_state = np.array(batch_next_state) 97 | batch_done = np.array(batch_done) 98 | batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device) 99 | batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device) 100 | batch_next_state = torch.as_tensor(batch_next_state, dtype=torch.float, device=device) 101 | batch_done = torch.as_tensor(batch_done, dtype=torch.float, device=device) 102 | return batch_state, batch_action, batch_next_state,batch_done 103 | def get_tra_num(self): 104 | return self.all_data["dones"].shape[0] 105 | 106 | def select_subgoal(self, agent, args): 107 | flag = False 108 | test_s = torch.squeeze(torch.tensor(self.states).float(), dim=0) 109 | if args.insert_subgoal_exp == True: 110 | test_a = torch.squeeze(torch.tensor(self.action).float(), dim=0) 111 | else: 112 | test_a = agent.choose_action(test_s, sample=True) 113 | test_a = torch.squeeze(torch.tensor(test_a).float(), dim=0) 114 | UC = agent.getUC(test_s, test_a).squeeze() 115 | target_uc = args.env.delta 116 | base_uc = UC[self.goals[0]] 117 | cnt = 1.0 118 | for i in range(self.goals[0] + 1, self.expert_data["lengths"][0]): 119 | if (args.soft_mean and base_uc * target_uc < UC[i]) or ( 120 | (not args.soft_mean) and base_uc * target_uc / cnt < UC[i]): # TODO more specific condition?: 121 | pos = np.array(i, dtype=np.int16).reshape((1, 1)) 122 | # pos += random.randint(1,args.env.next_step) 123 | flag = self.insert_new_subgoal(pos) 124 | break 125 | base_uc = base_uc * args.sigma + (1 - args.sigma) * UC[i] if args.soft_mean else base_uc + UC[i] 126 | cnt += 1.0 if args.soft_mean else 0.0 127 | print(self.goals[0]) 128 | return flag 129 | def get_reward(state): 130 | return None 131 | -------------------------------------------------------------------------------- /dataset/rs_memory.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import deque 3 | import numpy as np 4 | import random 5 | import torch 6 | 7 | 8 | class LazyFrames(object): 9 | def __init__(self, frames): 10 | """This object ensures that common frames between the observations are only stored once. 11 | It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay 12 | buffers. 13 | This object should only be converted to numpy array before being passed to the model.""" 14 | self._frames = frames 15 | self._out = None 16 | 17 | def _force(self): 18 | if self._out is None: 19 | self._out = np.concatenate(self._frames, axis=0) 20 | self._frames = None 21 | return self._out 22 | 23 | def __array__(self, dtype=None): 24 | out = self._force() 25 | if dtype is not None: 26 | out = out.astype(dtype) 27 | return out 28 | 29 | def __len__(self): 30 | return len(self._force()) 31 | 32 | def __getitem__(self, i): 33 | return self._force()[i] 34 | 35 | def count(self): 36 | frames = self._force() 37 | return frames.shape[frames.ndim - 1] 38 | 39 | def frame(self, i): 40 | return self._force()[..., i] 41 | 42 | 43 | class Memory(object): 44 | def __init__(self, memory_size: int, seed: int = 0) -> None: 45 | random.seed(seed) 46 | self.memory_size = memory_size 47 | self.buffer = deque(maxlen=self.memory_size) 48 | 49 | def add(self, experience) -> None: 50 | self.buffer.append(experience) 51 | 52 | def size(self): 53 | return len(self.buffer) 54 | 55 | def sample(self, batch_size: int, continuous: bool = True): 56 | if batch_size > len(self.buffer): 57 | batch_size = len(self.buffer) 58 | if continuous: 59 | rand = random.randint(0, len(self.buffer) - batch_size) 60 | return [self.buffer[i] for i in range(rand, rand + batch_size)] 61 | else: 62 | indexes = np.random.choice(np.arange(len(self.buffer)), size=batch_size, replace=False) 63 | return [self.buffer[i] for i in indexes] 64 | 65 | def clear(self): 66 | self.buffer.clear() 67 | 68 | 69 | def get_samples(self, batch_size, device): 70 | batch = self.sample(batch_size, False) 71 | 72 | # batch_state, batch_next_state, batch_action, batch_re_obs, batch_reward1, batch_reward2, batch_done = zip(*batch) 73 | batch_state, batch_next_state, batch_action, batch_re_obs, batch_reward1, batch_done = zip(*batch) 74 | 75 | # Scale obs for atari. TODO: Use flags 76 | if isinstance(batch_state[0], LazyFrames): 77 | # Use lazyframes for improved memory storage (same as original DQN) 78 | batch_state = np.array(batch_state) / 255.0 79 | if isinstance(batch_next_state[0], LazyFrames): 80 | batch_next_state = np.array(batch_next_state) / 255.0 81 | batch_state = np.array(batch_state) 82 | batch_next_state = np.array(batch_next_state) 83 | batch_action = np.array(batch_action) 84 | 85 | batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device) 86 | batch_next_state = torch.as_tensor(batch_next_state, dtype=torch.float, device=device) 87 | batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device) 88 | batch_re_obs = torch.as_tensor(batch_re_obs, dtype=torch.float, device=device) 89 | if batch_action.ndim == 1: 90 | batch_action = batch_action.unsqueeze(1) 91 | batch_reward1 = torch.as_tensor(batch_reward1, dtype=torch.float, device=device).unsqueeze(1) 92 | # batch_reward2 = torch.as_tensor(batch_reward2, dtype=torch.float, device=device).unsqueeze(1) 93 | batch_done = torch.as_tensor(batch_done, dtype=torch.float, device=device).unsqueeze(1) 94 | 95 | # return batch_state, batch_next_state, batch_action,batch_re_obs, batch_reward1, batch_reward2, batch_done 96 | return batch_state, batch_next_state, batch_action,batch_re_obs, batch_reward1, batch_done 97 | 98 | -------------------------------------------------------------------------------- /exp-highway-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/exp-highway-table.png -------------------------------------------------------------------------------- /exp-highway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/exp-highway.png -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/1.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/10.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/14.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/14.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/15.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/15.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/19.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/19.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/20.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/24.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/25.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/25.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/4.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/5.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/5.npy -------------------------------------------------------------------------------- /expert_data/highway-fast-continues-v0-s35-d1/9.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/9.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/1.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/10.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/14.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/14.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/15.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/15.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/19.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/19.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/20.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/24.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/25.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/25.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/4.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/5.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/5.npy -------------------------------------------------------------------------------- /expert_data/intersection-continues-v0-o1/9.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/9.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/1.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/10.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/14.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/14.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/15.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/15.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/19.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/19.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/20.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/24.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/25.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/25.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/4.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/5.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/5.npy -------------------------------------------------------------------------------- /expert_data/merge-continues-v0/9.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/9.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/1.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/10.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/10.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/14.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/14.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/15.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/15.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/19.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/19.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/20.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/24.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/24.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/25.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/25.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/4.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/5.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/5.npy -------------------------------------------------------------------------------- /expert_data/roundabout-continues-v1/9.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/9.npy -------------------------------------------------------------------------------- /framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/framework.png -------------------------------------------------------------------------------- /highway_modify/.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: build 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | sudo pip install pygame 27 | pip install -e .[deploy] 28 | - name: Lint with flake8 29 | run: | 30 | pip install flake8 31 | # stop the build if there are Python syntax errors or undefined names 32 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 33 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 34 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 35 | - name: Test with pytest 36 | run: | 37 | pip install pytest 38 | pip install pytest-cov 39 | pytest --cov=./ --cov-report=xml 40 | - name: Upload coverage to Codecov 41 | uses: codecov/codecov-action@v1 42 | with: 43 | file: ./coverage.xml 44 | flags: unittests 45 | fail_ci_if_error: true 46 | -------------------------------------------------------------------------------- /highway_modify/.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | release: 4 | types: 5 | - published 6 | 7 | jobs: 8 | release: 9 | name: Deploy release to PyPI 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout source 13 | uses: actions/checkout@v1 14 | - name: Set up Python 15 | uses: actions/setup-python@v1 16 | with: 17 | python-version: 3.8 18 | - name: Install dependencies 19 | run: pip install wheel 20 | - name: Build package 21 | run: python setup.py sdist bdist_wheel 22 | - name: Upload package 23 | uses: pypa/gh-action-pypi-publish@master 24 | with: 25 | user: __token__ 26 | password: ${{ secrets.pypi_password }} -------------------------------------------------------------------------------- /highway_modify/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled sources 2 | *.pyc 3 | build/ 4 | dist/ 5 | 6 | # Setup 7 | **.egg* 8 | 9 | # Jupyter notebooks 10 | **.ipynb_checkpoints* 11 | 12 | # Sphinx documentation 13 | _build 14 | 15 | # Editor files 16 | .idea 17 | 18 | # Test files 19 | .pytest_cache 20 | .cache 21 | 22 | # Outputs 23 | **/out/* -------------------------------------------------------------------------------- /highway_modify/CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Leurent" 5 | given-names: "Edouard" 6 | title: "An Environment for Autonomous Driving Decision-Making" 7 | version: 1.4 8 | date-released: 2018-05-01 9 | url: "https://github.com/eleurent/highway-env" 10 | -------------------------------------------------------------------------------- /highway_modify/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Edouard Leurent 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /highway_modify/codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | informational: true 6 | patch: 7 | default: 8 | informational: true 9 | -------------------------------------------------------------------------------- /highway_modify/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = ../highway-env 8 | SOURCEDIR = source 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | apidoc: 18 | sphinx-apidoc -o $(SOURCEDIR) -e ../highway_env 19 | 20 | http: 21 | python -mwebbrowser "http://localhost:8000/" 22 | python -m http.server 8000 23 | 24 | 25 | # Catch-all target: route all unknown targets to Sphinx using the new 26 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 27 | %: Makefile 28 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 29 | -------------------------------------------------------------------------------- /highway_modify/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pygments==2.4.1 2 | sphinx_rtd_theme 3 | sphinxcontrib-bibtex<2.0.0 4 | jupyter-sphinx 5 | gym 6 | numpy 7 | pygame 8 | matplotlib 9 | pandas 10 | 11 | # ========= 12 | # Optionals 13 | # ========= 14 | pytest 15 | scipy 16 | -------------------------------------------------------------------------------- /highway_modify/docs/source/actions/index.rst: -------------------------------------------------------------------------------- 1 | .. _actions: 2 | 3 | .. py:module:: highway_env.envs.common.action 4 | 5 | Actions 6 | ============= 7 | 8 | Similarly to :ref:`observations `, **several types of actions** can be used in every environment. They are defined in the 9 | :py:mod:`~highway_env.envs.common.action` module. 10 | Each environment comes with a *default* action type, which can be changed or customised using 11 | :ref:`environment configurations `. For instance, 12 | 13 | 14 | .. code-block:: python 15 | 16 | import gym 17 | import highway_env 18 | 19 | env = gym.make('highway-v0') 20 | env.configure({ 21 | "action": { 22 | "type": "ContinuousAction" 23 | } 24 | }) 25 | env.reset() 26 | 27 | 28 | Continuous Actions 29 | ------------------- 30 | 31 | The :py:class:`~highway_env.envs.common.action.ContinuousAction` type allows the agent to directly set the low-level 32 | controls of the :ref:`vehicle kinematics `, namely the throttle :math:`a` and steering angle :math:`\delta`. 33 | 34 | .. note:: 35 | The control of throttle and steering can be enabled or disabled through the 36 | :py:attr:`~highway_env.envs.common.action.ContinuousAction.longitudinal` and :py:attr:`~highway_env.envs.common.action.ContinuousAction.lateral` 37 | configurations, respectively. Thus, the action space can be either 1D or 2D. 38 | 39 | Discrete Actions 40 | ------------------- 41 | 42 | The :py:class:`~highway_env.envs.common.action.DiscreteAction` is a uniform quantization of the :py:class:`~highway_env.envs.common.action.ContinuousAction` above. 43 | 44 | The :py:attr:`~highway_env.envs.common.action.DiscreteAction.actions_per_axis` parameter allows to set the quantization step. Similarly to continuous actions, the longitudinal and lateral axis can be enabled or disabled separately. 45 | 46 | 47 | 48 | Discrete Meta-Actions 49 | ---------------------- 50 | 51 | The :py:class:`~highway_env.envs.common.action.DiscreteMetaAction` type adds a layer of :ref:`speed and steering controllers ` 52 | on top of the continuous low-level control, so that the ego-vehicle can automatically follow the road at a desired velocity. 53 | Then, the available **meta-actions** consist in *changing the target lane and speed* that are used as setpoints for the low-level controllers. 54 | 55 | The full corresponding action space is defined in :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.ACTIONS_ALL` 56 | 57 | .. code-block:: python 58 | 59 | ACTIONS_ALL = { 60 | 0: 'LANE_LEFT', 61 | 1: 'IDLE', 62 | 2: 'LANE_RIGHT', 63 | 3: 'FASTER', 64 | 4: 'SLOWER' 65 | } 66 | 67 | Some of these actions might not be always available (lane changes at the edges of the roads, or accelerating/decelrating 68 | beyond the maximum/minimum velocity), and the list of available actions can be accessed with :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.get_available_actions` method. 69 | Taking an unavailable action is equivalent to taking the ``IDLE`` action. 70 | 71 | Similarly to continuous actions, the longitudinal (speed changes) and lateral (lane changes) actions can be disabled separately 72 | through the :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.longitudinal` and :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.lateral` parameters. 73 | For instance, in the default configuration of the :ref:`intersection ` environment, only the speed is controlled by the agent, 74 | while the lateral control of the vehicle is automatically performed by a :ref:`steering controller ` to track a desired lane. 75 | 76 | 77 | Manual control 78 | ---------------- 79 | 80 | The environments can be used as a simulation: 81 | 82 | .. code-block:: python 83 | 84 | env = gym.make("highway-v0") 85 | env.configure({ 86 | "manual_control": True 87 | }) 88 | env.reset() 89 | done = False 90 | while not done: 91 | env.step(env.action_space.sample()) # with manual control, these actions are ignored 92 | 93 | The ego-vehicle is controlled by directional arrows keys, as defined in 94 | :py:class:`~highway_env.envs.common.graphics.EventHandler` 95 | 96 | API 97 | -------- 98 | 99 | .. automodule:: highway_env.envs.common.action 100 | :members: 101 | 102 | -------------------------------------------------------------------------------- /highway_modify/docs/source/bibliography/biblio.bib: -------------------------------------------------------------------------------- 1 | @article{Treiber2000, 2 | author = {Treiber, Martin and Hennecke, Ansgar and Helbing, Dirk}, 3 | journal = {Physical Review E - Statistical Physics, Plasmas, Fluids, and Related Interdisciplinary Topics}, 4 | number = {2}, 5 | pages = {1805--1824}, 6 | title = {{Congested traffic states in empirical observations and microscopic simulations}}, 7 | volume = {62}, 8 | year = {2000} 9 | } 10 | 11 | @article{Kesting2007, 12 | abstract = {A general model (minimizing overall braking induced by lane change, MOBIL) is proposed to derive lane-changing rules for discretionary and mandatory lane changes for a wide class of car-following models. Both the utility of a given lane and the risk associated with lane changes are determined In terms of longitudinal accelerations calculated with micro-scopic traffic models. This determination allows for the formulation of compact and general safety and incentive criteria for both symmetric and asymmetric passing rules. Moreover, anticipative elements and the crucial influence of velocity differences of these car-following models are automatically transferred to the lane-changing rules. Although the safety criterion prevents critical lane changes and collisions, the incentive criterion takes into account the advantages and disadvantages of other drivers associated with a lane change via the "politeness factor." The parameter allows one to vary the motivation for lane changing from purely egoistic to more cooperative driving behavior. This novel feature allows one first to prevent lane changes for a marginal advantage if they obstruct other drivers and second to let an aggressive driver induce the lane change of a slower driver ahead in order to no longer be obstructed. This phenomenon is common for asymmetric passing rules with a dedicated lane for passing. The model is applied to traffic simulations of cars and trucks with the Intelligent driver model as the underlying car-following model. An open system with an on-ramp is studied, and the resulting lanechanging rate is investigated as a function of the spatial coordinate as well as a function of traffic density.}, 13 | author = {Kesting, Arne and Treiber, Martin and Helbing, Dirk}, 14 | doi = {10.3141/1999-10}, 15 | isbn = {9780309104258}, 16 | issn = {03611981}, 17 | journal = {Transportation Research Record}, 18 | title = {{General lane-changing model MOBIL for car-following models}}, 19 | year = {2007} 20 | } 21 | 22 | @article{Polack2017, 23 | author = {Polack, Philip and Altch{\'{e}}, Florent and D'Andr{\'{e}}a-Novel, Brigitte}, 24 | isbn = {9781509048038}, 25 | journal = {IEEE Intelligent Vehicles Symposium}, 26 | pages = {6--8}, 27 | title = {{The Kinematic Bicycle Model : a Consistent Model for Planning Feasible Trajectories for Autonomous Vehicles ?}}, 28 | address = {Los Angeles}, 29 | year = {2017} 30 | } 31 | 32 | @article{Hren2008, 33 | author = {Hren, Jean Fran{\c{c}}ois and Munos, R{\'{e}}mi}, 34 | journal = {Lecture Notes in Computer Science}, 35 | title = {{Optimistic planning of deterministic systems}}, 36 | year = {2008} 37 | } 38 | 39 | @inproceedings{Andrychowicz2017, 40 | abstract = {Dealing with sparse rewards is one of the biggest challenges in Reinforcement Learning (RL). We present a novel technique called Hindsight Experience Replay which allows sample-efficient learning from rewards which are sparse and binary and therefore avoid the need for complicated reward engineering. It can be combined with an arbitrary off-policy RL algorithm and may be seen as a form of implicit curriculum. We demonstrate our approach on the task of manipulating objects with a robotic arm. In particular, we run experiments on three different tasks: pushing, sliding, and pick-and-place, in each case using only binary rewards indicating whether or not the task is completed. Our ablation studies show that Hindsight Experience Replay is a crucial ingredient which makes training possible in these challenging environments. We show that our policies trained on a physics simulation can be deployed on a physical robot and successfully complete the task. The video presenting our experiments is available at https://goo.gl/SMrQnI.}, 41 | archivePrefix = {arXiv}, 42 | arxivId = {1707.01495}, 43 | author = {Andrychowicz, Marcin and Wolski, Filip and Ray, Alex and Schneider, Jonas and Fong, Rachel and Welinder, Peter and McGrew, Bob and Tobin, Josh and Abbeel, Pieter and Zaremba, Wojciech}, 44 | booktitle = {Advances in Neural Information Processing Systems}, 45 | eprint = {1707.01495}, 46 | issn = {10495258}, 47 | title = {{Hindsight experience replay}}, 48 | year = {2017} 49 | } 50 | 51 | @article{Mnih2015, 52 | author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis}, 53 | journal = {Nature}, 54 | number = {7540}, 55 | pages = {529--533}, 56 | title = {{Human-level control through deep reinforcement learning}}, 57 | volume = {518}, 58 | year = {2015} 59 | } 60 | 61 | @inproceedings{Leurent2019social, 62 | title = {Social Attention for Autonomous Decision-Making in Dense Traffic}, 63 | author = {Edouard Leurent and Jean Mercat}, 64 | year = {2019}, 65 | booktitle = {Machine Learning for Autonomous Driving Workshop at the Thirty-third Conference on Neural Information Processing Systems (NeurIPS 2019)}, 66 | address = {Montreal, Canada}, 67 | month=dec, 68 | eprint = {1911.12250}, 69 | archivePrefix = {arXiv}, 70 | primaryClass = {cs.SY} 71 | } 72 | 73 | @misc{Qi2017pointnet, 74 | title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation}, 75 | author={Charles R. Qi and Hao Su and Kaichun Mo and Leonidas J. Guibas}, 76 | year={2017}, 77 | eprint={1612.00593}, 78 | archivePrefix={arXiv}, 79 | primaryClass={cs.CV} 80 | } -------------------------------------------------------------------------------- /highway_modify/docs/source/bibliography/index.rst: -------------------------------------------------------------------------------- 1 | .. _bibliography: 2 | 3 | Bibliography 4 | ############ 5 | 6 | .. bibliography:: biblio.bib 7 | :encoding: latin 8 | :style: alpha 9 | :all: 10 | 11 | .. Fix to make sure bibliography appear when bibliography called in separate file 12 | .. latex+latin => latin 13 | .. :cited: => :all: see http://sphinxcontrib-bibtex.readthedocs.io/en/latest/usage.html#unresolved-citations-across-documents 14 | 15 | .. :style: alpha, plain , unsrt, and unsrtalpha -------------------------------------------------------------------------------- /highway_modify/docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/stable/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('../..')) 18 | 19 | 20 | # -- Path setup for jupyter-sphix -------------------------------------------- 21 | # See https://jupyter-sphinx.readthedocs.io/en/latest/#configuration-options 22 | # BUT this does not seem to work with Anaconda on windows 23 | import os 24 | package_path = os.path.abspath('../..') 25 | os.environ['PYTHONPATH'] = ':'.join(filter(None, (package_path, os.environ.get('PYTHONPATH', '')))) 26 | 27 | 28 | # -- Project information ----------------------------------------------------- 29 | 30 | project = 'highway-env' 31 | copyright = '2018, Edouard Leurent' 32 | author = 'Edouard Leurent' 33 | 34 | # The short X.Y version 35 | version = '' 36 | # The full version, including alpha/beta/rc tags 37 | release = '' 38 | 39 | 40 | # -- General configuration --------------------------------------------------- 41 | 42 | # If your documentation needs a minimal Sphinx version, state it here. 43 | # 44 | # needs_sphinx = '1.0' 45 | 46 | # Add any Sphinx extension module names here, as strings. They can be 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 48 | # ones. 49 | extensions = [ 50 | 'sphinx.ext.autodoc', 51 | 'sphinx.ext.coverage', 52 | 'sphinx.ext.githubpages', 53 | 'sphinx.ext.viewcode', 54 | 'sphinx.ext.autosectionlabel', 55 | 'sphinxcontrib.bibtex', 56 | 'jupyter_sphinx' 57 | ] 58 | 59 | autodoc_default_flags = ['members', 'private-members', 'undoc-members', 'special-members'] 60 | autodoc_member_order = 'bysource' 61 | 62 | # Add any paths that contain templates here, relative to this directory. 63 | templates_path = ['_templates'] 64 | 65 | # The suffix(es) of source filenames. 66 | # You can specify multiple suffix as a list of string: 67 | # 68 | # source_suffix = ['.rst', '.md'] 69 | source_suffix = '.rst' 70 | 71 | # The master toctree document. 72 | master_doc = 'index' 73 | 74 | # The language for content autogenerated by Sphinx. Refer to documentation 75 | # for a list of supported languages. 76 | # 77 | # This is also used if you do content translation via gettext catalogs. 78 | # Usually you set "language" from the command line for these cases. 79 | language = None 80 | 81 | # List of patterns, relative to source directory, that match files and 82 | # directories to ignore when looking for source files. 83 | # This pattern also affects html_static_path and html_extra_path . 84 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = 'sphinx' 88 | 89 | 90 | # -- Options for HTML output ------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | # 95 | html_theme = 'sphinx_rtd_theme' 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | # 101 | # html_theme_options = {} 102 | 103 | # Add any paths that contain custom static files (such as style sheets) here, 104 | # relative to this directory. They are copied after the builtin static files, 105 | # so a file named "default.css" will overwrite the builtin "default.css". 106 | html_static_path = ['_static'] 107 | 108 | # Custom sidebar templates, must be a dictionary that maps document names 109 | # to template names. 110 | # 111 | # The default sidebars (for documents that don't match any pattern) are 112 | # defined by theme itself. Builtin themes are using these templates by 113 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 114 | # 'searchbox.html']``. 115 | # 116 | # html_sidebars = {} 117 | 118 | 119 | # -- Options for HTMLHelp output --------------------------------------------- 120 | 121 | # Output file base name for HTML help builder. 122 | htmlhelp_basename = 'highway-envdoc' 123 | 124 | 125 | # -- Options for LaTeX output ------------------------------------------------ 126 | 127 | latex_elements = { 128 | # The paper size ('letterpaper' or 'a4paper'). 129 | # 130 | # 'papersize': 'letterpaper', 131 | 132 | # The font size ('10pt', '11pt' or '12pt'). 133 | # 134 | # 'pointsize': '10pt', 135 | 136 | # Additional stuff for the LaTeX preamble. 137 | # 138 | # 'preamble': '', 139 | 140 | # Latex figure (float) alignment 141 | # 142 | # 'figure_align': 'htbp', 143 | } 144 | 145 | # Grouping the document tree into LaTeX files. List of tuples 146 | # (source start file, target name, title, 147 | # author, documentclass [howto, manual, or own class]). 148 | latex_documents = [ 149 | (master_doc, 'highway-env.tex', 'highway-env Documentation', 150 | 'Edouard Leurent', 'manual'), 151 | ] 152 | 153 | 154 | # -- Options for manual page output ------------------------------------------ 155 | 156 | # One entry per manual page. List of tuples 157 | # (source start file, name, description, authors, manual section). 158 | man_pages = [ 159 | (master_doc, 'highway-env', 'highway-env Documentation', 160 | [author], 1) 161 | ] 162 | 163 | 164 | # -- Options for Texinfo output ---------------------------------------------- 165 | 166 | # Grouping the document tree into Texinfo files. List of tuples 167 | # (source start file, target name, title, author, 168 | # dir menu entry, description, category) 169 | texinfo_documents = [ 170 | (master_doc, 'highway-env', 'highway-env Documentation', 171 | author, 'highway-env', 'One line description of project.', 172 | 'Miscellaneous'), 173 | ] 174 | 175 | 176 | # -- Extension configuration ------------------------------------------------- 177 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/index.rst: -------------------------------------------------------------------------------- 1 | .. _dynamics: 2 | 3 | Dynamics 4 | ############ 5 | 6 | The dynamics of every environment describes how vehicles move and behave through time. 7 | There are two important sections that affect these dynamics: the description of the roads, and the vehicle physics and behavioral models. 8 | 9 | Roads 10 | ======== 11 | 12 | 13 | A :py:class:`~highway_env.road.road.Road` is composed of a :py:class:`~highway_env.road.road.RoadNetwork` and a list 14 | of :py:class:`~highway_env.vehicle.kinematics.Vehicle`. 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | 19 | road/lane 20 | road/road 21 | road/regulation 22 | 23 | Vehicles 24 | ======== 25 | 26 | .. toctree:: 27 | :maxdepth: 1 28 | 29 | vehicle/kinematics 30 | vehicle/controller 31 | vehicle/behavior -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/road/lane.rst: -------------------------------------------------------------------------------- 1 | .. _road_lane: 2 | 3 | Lane 4 | ######### 5 | 6 | The geometry of lanes are described by :py:class:`~highway_env.road.lane.AbstractLane` objects, as a parametrized center line curve, providing a local coordinate system. 7 | 8 | Conversions between the (longi, lat) coordinates in the Frenet frame and the global :math:`x,y` coordinates are ensured by the :py:meth:`~highway_env.road.lane.AbstractLane.position` and :py:meth:`~highway_env.road.lane.AbstractLane.local_coordinates` methods. 9 | 10 | The main implementations are: 11 | 12 | - :py:class:`~highway_env.road.lane.StraightLane` 13 | - :py:class:`~highway_env.road.lane.SineLane` 14 | - :py:class:`~highway_env.road.lane.CircularLane` 15 | 16 | API 17 | *** 18 | 19 | .. automodule:: highway_env.road.lane 20 | :members: 21 | 22 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/road/regulation.rst: -------------------------------------------------------------------------------- 1 | .. _road_regulation: 2 | 3 | Road regulation 4 | ######### 5 | 6 | A :py:class:`~highway_env.road.regulation.RegulatedRoad` is a :py:class:`~highway_env.road.road.Road` in which the behavior of vehicles take or give the right of way at an intersection based on the :py:attr:`~highway_env.road.lane.AbstractLane.priority` lane attribute. 7 | 8 | On such a road, some rules are enforced: 9 | 10 | - most of the time, vehicles behave as usual; 11 | - however, they try to predict collisions with other vehicles through the :py:meth:`~highway_env.road.regulation.RegulatedRoad.is_conflict_possible` method; 12 | - when it is the case, right of way is arbitrated through the :py:meth:`~highway_env.road.regulation.RegulatedRoad.respect_priorities` method, and the yielding vehicle target velocity is set to 0 until the conflict is resolved. 13 | 14 | API 15 | *** 16 | 17 | .. automodule:: highway_env.road.regulation 18 | :members: 19 | 20 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/road/road.rst: -------------------------------------------------------------------------------- 1 | .. _road_road: 2 | 3 | Road 4 | ######### 5 | 6 | 7 | A :py:class:`~highway_env.road.road.Road` is composed of a :py:class:`~highway_env.road.road.RoadNetwork` and a list 8 | of :py:class:`~highway_env.vehicle.kinematics.Vehicle`. 9 | 10 | The :py:class:`~highway_env.road.road.RoadNetwork` describes the topology of the road infrastructure as a graph, 11 | where edges represent lanes and nodes represent intersections. It contains a :py:attr:`~highway_env.road.road.RoadNetwork.graph` dictionary which stores the :py:class:`~highway_env.road.lane.AbstractLane` geometries by their :py:class:`~highway_env.road.road.LaneIndex`. 12 | A :py:class:`~highway_env.road.road.LaneIndex` is a tuple containing: 13 | 14 | - a string identifier of a starting position 15 | - a string identifier of an ending position 16 | - an integer giving the index of the described lane, in the (unique) road from the starting to the ending position 17 | 18 | For instance, the geometry of the second lane in the road going from the ``"lab"`` to the ``"pub"`` can be obtained by: 19 | 20 | .. code-block:: python 21 | 22 | lane = road.road_network.graph["lab"]["pub"][1] 23 | 24 | The actual positions of the lab and the pub are defined in the ``lane```geometry object. 25 | 26 | API 27 | ******* 28 | 29 | .. automodule:: highway_env.road.road 30 | :members: 31 | 32 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/vehicle/behavior.rst: -------------------------------------------------------------------------------- 1 | .. _vehicle_behavior: 2 | 3 | Behavior 4 | ========== 5 | 6 | .. py:module::highway_env.vehicle.behavior 7 | 8 | Other simulated vehicles follow simple and realistic behaviors that dictate how they accelerate and 9 | steer on the road. They are implemented in the :py:class:`~highway_env.vehicle.behavior.IDMVehicle` class. 10 | 11 | Longitudinal Behavior 12 | ~~~~~~~~~~~~~~~~~~~~~~ 13 | 14 | The acceleration of the vehicle is given by the *Intelligent Driver Model* (IDM) from :cite:`Treiber2000`. 15 | 16 | .. math:: 17 | \dot{v} &= a\left[1-\left(\frac{v}{v_0}\right)^\delta - \left(\frac{d^*}{d}\right)^2\right] \\ 18 | d^* &= d_0 + Tv + \frac{v\Delta v}{2\sqrt{ab}} \\ 19 | 20 | where :math:`v` is the vehicle velocity, :math:`d` is the distance to its front vehicle. 21 | The dynamics are parametrised by: 22 | 23 | - :math:`v_0` the desired velocity, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.target_velocity` 24 | - :math:`T` the desired time gap, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.TIME_WANTED` 25 | - :math:`d_0` the jam distance, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.DISTANCE_WANTED` 26 | - :math:`a,\,b` the maximum acceleration and deceleration, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.COMFORT_ACC_MAX` and :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.COMFORT_ACC_MIN` 27 | - :math:`\delta` the velocity exponent, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.DELTA` 28 | 29 | It is implemented in :py:meth:`~highway_env.vehicle.behavior.IDMVehicle.acceleration` method. 30 | 31 | Lateral Behavior 32 | ~~~~~~~~~~~~~~~~ 33 | 34 | The discrete lane change decisions are given by the *Minimizing Overall Braking Induced by Lane change* (MOBIL) model from :cite:`Kesting2007`. 35 | According to this model, a vehicle decides to change lane when: 36 | 37 | - it is **safe** (do not cut-in): 38 | 39 | .. math:: 40 | \tilde{a}_n \geq - b_\text{safe}; 41 | 42 | - there is an **incentive** (for the ego-vehicle and possibly its followers): 43 | 44 | .. math:: 45 | \underbrace{\tilde{a}_c - a_c}_{\text{ego-vehicle}} + p\left(\underbrace{\tilde{a}_n - a_n}_{\text{new follower}} + \underbrace{\tilde{a}_o - a_o}_{\text{old follower}}\right) \geq \Delta a_\text{th}, 46 | 47 | where 48 | 49 | - :math:`c` is the center (ego-) vehicle, :math:`o` is its old follower *before* the lane change, and :math:`n` is its new follower *after* the lane change 50 | - :math:`a, \tilde{a}` are the acceleration of the vehicles *before* and *after* the lane change, respectively. 51 | - :math:`p` is a politeness coefficient, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.POLITENESS` 52 | - :math:`\Delta a_\text{th}` the acceleration gain required to trigger a lane change, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.LANE_CHANGE_MIN_ACC_GAIN` 53 | - :math:`b_\text{safe}` the maximum braking imposed to a vehicle during a cut-in, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.LANE_CHANGE_MAX_BRAKING_IMPOSED` 54 | 55 | 56 | It is implemented in the :py:meth:`~highway_env.vehicle.behavior.IDMVehicle.mobil` method. 57 | 58 | .. note:: 59 | In the :py:class:`~highway_env.vehicle.behavior.LinearVehicle` class, the longitudinal and lateral behaviours 60 | are approximated as linear weightings of several features, such as the distance and speed difference to the leading 61 | vehicle. 62 | 63 | 64 | 65 | API 66 | *** 67 | 68 | .. automodule:: highway_env.vehicle.behavior 69 | :members: 70 | 71 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/vehicle/controller.rst: -------------------------------------------------------------------------------- 1 | .. _vehicle_controller: 2 | 3 | Control 4 | ======== 5 | 6 | The :py:class:`~highway_env.vehicle.controller.ControlledVehicle` class implements a low-level controller on top of a :py:class:`~highway_env.vehicle.kinematics.Vehicle`, allowing to track a given target speed and follow a target lane. 7 | The controls are computed when calling the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.act` method. 8 | 9 | Longitudinal controller 10 | ----------------------- 11 | 12 | The longitudinal controller is a simple proportional controller: 13 | 14 | .. math:: 15 | a = K_p(v_r - v), 16 | 17 | where 18 | 19 | - :math:`a` is the vehicle acceleration (throttle); 20 | - :math:`v` is the vehicle velocity; 21 | - :math:`v_r` is the reference velocity; 22 | - :math:`K_p` is the controller proportional gain, implemented as :py:attr:`~highway_env.vehicle.controller.ControlledVehicle.KP_A`. 23 | 24 | It is implemented in the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.speed_control` method. 25 | 26 | Lateral controller 27 | ----------------------- 28 | 29 | The lateral controller is a simple proportional-derivative controller, combined with some non-linearities that invert those of the :ref:`kinematics model `. 30 | 31 | Position control 32 | ~~~~~~~~~~~~~~~~ 33 | 34 | .. math:: 35 | v_{\text{lat},r} &= -K_{p,\text{lat}} \Delta_{\text{lat}}, \\ 36 | \Delta \psi_{r} &= \arcsin \left(\frac{v_{\text{lat},r}}{v}\right), 37 | 38 | Heading control 39 | ~~~~~~~~~~~~~~~~ 40 | 41 | .. math:: 42 | \psi_r &= \psi_L + \Delta \psi_{r}, \\ 43 | \dot{\psi}_r &= K_{p,\psi} (\psi_r - \psi), \\ 44 | \delta &= \arcsin \left(\frac{1}{2} \frac{l}{v} \dot{\psi}_r\right), \\ 45 | 46 | where 47 | 48 | - :math:`\Delta_{\text{lat}}` is the lateral position of the vehicle with respect to the lane center-line; 49 | - :math:`v_{\text{lat},r}` is the lateral velocity command; 50 | - :math:`\Delta \psi_{r}` is a heading variation to apply the lateral velocity command; 51 | - :math:`\psi_L` is the lane heading (at some lookahead position to anticipate turns); 52 | - :math:`\psi_r` is the target heading to follow the lane heading and position; 53 | - :math:`\dot{\psi}_r` is the yaw rate command; 54 | - :math:`\delta` is the front wheels angle control; 55 | - :math:`K_{p,\text{lat}}` and :math:`K_{p,\psi}` are the position and heading control gains. 56 | 57 | It is implemented in the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.steering_control` method. 58 | 59 | API 60 | ---- 61 | 62 | .. automodule:: highway_env.vehicle.controller 63 | :members: 64 | 65 | -------------------------------------------------------------------------------- /highway_modify/docs/source/dynamics/vehicle/kinematics.rst: -------------------------------------------------------------------------------- 1 | .. _vehicle_kinematics: 2 | 3 | .. py:module::highway_env.vehicle.kinematics 4 | 5 | Kinematics 6 | ================== 7 | 8 | The vehicles kinematics are represented in the :py:class:`~highway_env.vehicle.kinematics.Vehicle` class by the *Kinematic Bicycle Model* :cite:`Polack2017`. 9 | 10 | .. math:: 11 | \dot{x}&=v\cos(\psi+\beta) \\ 12 | \dot{y}&=v\sin(\psi+\beta) \\ 13 | \dot{v}&=a \\ 14 | \dot{\psi}&=\frac{v}{l}\sin\beta \\ 15 | \beta&=\tan^{-1}(1/2\tan\delta), \\ 16 | 17 | where 18 | 19 | - :math:`(x, y)` is the vehicle position; 20 | - :math:`v` its forward speed; 21 | - :math:`\psi` its heading; 22 | - :math:`a` is the acceleration command; 23 | - :math:`\beta` is the slip angle at the center of gravity; 24 | - :math:`\delta` is the front wheel angle used as a steering command. 25 | 26 | These calculations appear in the :py:meth:`~highway_env.vehicle.kinematics.Vehicle.step` method. 27 | 28 | API 29 | *** 30 | 31 | .. automodule:: highway_env.vehicle.kinematics 32 | :members: 33 | 34 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/highway.rst: -------------------------------------------------------------------------------- 1 | .. _environments_highway: 2 | 3 | .. currentmodule:: highway_env.envs.highway_env 4 | 5 | Highway 6 | ********** 7 | 8 | In this task, the ego-vehicle is driving on a multilane highway populated with other vehicles. The agent's objective is to reach a high speed while avoiding collisions with neighbouring vehicles. Driving on the right side of the road is also rewarded. 9 | 10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway.gif 11 | :width: 80% 12 | :align: center 13 | :name: fig:highway_env 14 | 15 | Usage 16 | ========== 17 | 18 | .. code-block:: python 19 | 20 | env = gym.make("highway-v0") 21 | 22 | 23 | Default configuration 24 | ===================== 25 | 26 | .. code-block:: python 27 | 28 | { 29 | "observation": { 30 | "type": "Kinematics" 31 | }, 32 | "action": { 33 | "type": "DiscreteMetaAction", 34 | }, 35 | "lanes_count": 4, 36 | "vehicles_count": 50, 37 | "duration": 40, # [s] 38 | "initial_spacing": 2, 39 | "collision_reward": -1, # The reward received when colliding with a vehicle. 40 | "reward_speed_range": [20, 30], # [m/s] The reward for high speed is mapped linearly from this range to [0, HighwayEnv.HIGH_SPEED_REWARD]. 41 | "simulation_frequency": 15, # [Hz] 42 | "policy_frequency": 1, # [Hz] 43 | "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", 44 | "screen_width": 600, # [px] 45 | "screen_height": 150, # [px] 46 | "centering_position": [0.3, 0.5], 47 | "scaling": 5.5, 48 | "show_trajectories": False, 49 | "render_agent": True, 50 | "offscreen_rendering": False 51 | } 52 | 53 | More specifically, it is defined in: 54 | 55 | .. automethod:: HighwayEnv.default_config 56 | 57 | Faster variant 58 | ===================== 59 | 60 | A faster (x15 speedup) variant is also available with: 61 | 62 | .. code-block:: python 63 | 64 | env = gym.make("highway-fast-v0") 65 | 66 | 67 | The details of this variant are described `here `_. 68 | 69 | API 70 | ===== 71 | 72 | .. autoclass:: HighwayEnv 73 | :members: 74 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/index.rst: -------------------------------------------------------------------------------- 1 | .. _environments: 2 | 3 | The Environments 4 | ############ 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | highway 10 | merge 11 | roundabout 12 | parking 13 | intersection 14 | racetrack -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/intersection.rst: -------------------------------------------------------------------------------- 1 | .. _environments_intersection: 2 | 3 | .. currentmodule:: highway_env.envs.intersection_env 4 | 5 | Intersection 6 | ************ 7 | 8 | An intersection negotiation task with dense traffic. 9 | 10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/intersection-env.gif 11 | :width: 80% 12 | :align: center 13 | :name: fig:intersection_env 14 | 15 | 16 | .. warning:: 17 | 18 | It's quite hard to come up with good decentralized behaviors for other agents to avoid each other. Of course, this 19 | could be achieved by sophisticated centralized schedulers, or traffic lights, but to keep things simple a 20 | :ref:`rudimentary collision prediction ` was added in the behaviour of other vehicles. 21 | 22 | This simple system sometime fails which results in collisions, blocking the way for the ego-vehicle. 23 | I figured it was fine for my own purpose, since it did not happen too often and it's reasonable to expect 24 | the ego-vehicle to simply wait the end of episode in these situations. But I agree that it is not ideal, 25 | and I welcome any contribution on that matter. 26 | 27 | Usage 28 | ========== 29 | 30 | .. code-block:: python 31 | 32 | env = gym.make("intersection-v0") 33 | 34 | 35 | Default configuration 36 | ===================== 37 | 38 | .. code-block:: python 39 | 40 | { 41 | "observation": { 42 | "type": "Kinematics", 43 | "vehicles_count": 15, 44 | "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"], 45 | "features_range": { 46 | "x": [-100, 100], 47 | "y": [-100, 100], 48 | "vx": [-20, 20], 49 | "vy": [-20, 20], 50 | }, 51 | "absolute": True, 52 | "flatten": False, 53 | "observe_intentions": False 54 | }, 55 | "action": { 56 | "type": "DiscreteMetaAction", 57 | "longitudinal": False, 58 | "lateral": True 59 | }, 60 | "duration": 13, # [s] 61 | "destination": "o1", 62 | "initial_vehicle_count": 10, 63 | "spawn_probability": 0.6, 64 | "screen_width": 600, 65 | "screen_height": 600, 66 | "centering_position": [0.5, 0.6], 67 | "scaling": 5.5 * 1.3, 68 | "collision_reward": IntersectionEnv.COLLISION_REWARD, 69 | "normalize_reward": False 70 | } 71 | 72 | More specifically, it is defined in: 73 | 74 | .. automethod:: IntersectionEnv.default_config 75 | 76 | API 77 | ===== 78 | 79 | .. autoclass:: IntersectionEnv 80 | :members: 81 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/merge.rst: -------------------------------------------------------------------------------- 1 | .. _environments_merge: 2 | 3 | .. currentmodule:: highway_env.envs.merge_env 4 | 5 | Merge 6 | ********** 7 | 8 | In this task, the ego-vehicle starts on a main highway but soon approaches a road junction with incoming vehicles on the access ramp. The agent's objective is now to maintain a high speed while making room for the vehicles so that they can safely merge in the traffic. 9 | 10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/merge-env.gif 11 | :width: 80% 12 | :align: center 13 | :name: fig:merge_env 14 | 15 | Usage 16 | ========== 17 | 18 | .. code-block:: python 19 | 20 | env = gym.make("merge-v0") 21 | 22 | 23 | Default configuration 24 | ===================== 25 | 26 | .. code-block:: python 27 | 28 | { 29 | "observation": { 30 | "type": "TimeToCollision" 31 | }, 32 | "action": { 33 | "type": "DiscreteMetaAction" 34 | }, 35 | "simulation_frequency": 15, # [Hz] 36 | "policy_frequency": 1, # [Hz] 37 | "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", 38 | "screen_width": 600, # [px] 39 | "screen_height": 150, # [px] 40 | "centering_position": [0.3, 0.5], 41 | "scaling": 5.5, 42 | "show_trajectories": False, 43 | "render_agent": True, 44 | "offscreen_rendering": False 45 | } 46 | 47 | More specifically, it is defined in: 48 | 49 | .. automethod:: MergeEnv.default_config 50 | 51 | API 52 | ===== 53 | 54 | .. autoclass:: MergeEnv 55 | :members: 56 | 57 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/parking.rst: -------------------------------------------------------------------------------- 1 | .. _environments_parking: 2 | 3 | .. currentmodule:: highway_env.envs.parking_env 4 | 5 | Parking 6 | ********** 7 | 8 | A goal-conditioned continuous control task in which the ego-vehicle must park in a given space with the appropriate heading. 9 | 10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/parking-env.gif 11 | :width: 80% 12 | :align: center 13 | :name: fig:parking_env 14 | 15 | Usage 16 | ========== 17 | 18 | .. code-block:: python 19 | 20 | env = gym.make("parking-v0") 21 | 22 | 23 | Default configuration 24 | ===================== 25 | 26 | .. code-block:: python 27 | 28 | { 29 | "observation": { 30 | "type": "KinematicsGoal", 31 | "features": ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 32 | "scales": [100, 100, 5, 5, 1, 1], 33 | "normalize": False 34 | }, 35 | "action": { 36 | "type": "ContinuousAction" 37 | }, 38 | "simulation_frequency": 15, 39 | "policy_frequency": 5, 40 | "screen_width": 600, 41 | "screen_height": 300, 42 | "centering_position": [0.5, 0.5], 43 | "scaling": 7 44 | "show_trajectories": False, 45 | "render_agent": True, 46 | "offscreen_rendering": False 47 | } 48 | 49 | More specifically, it is defined in: 50 | 51 | .. automethod:: ParkingEnv.default_config 52 | 53 | API 54 | ===== 55 | 56 | .. autoclass:: ParkingEnv 57 | :members: 58 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/racetrack.rst: -------------------------------------------------------------------------------- 1 | .. _environments_racetrack: 2 | 3 | .. currentmodule:: highway_env.envs.racetrack_env 4 | 5 | Racetrack 6 | ********** 7 | 8 | A continuous control environment, where the he agent has to follow the tracks while avoiding collisions with other vehicles. 9 | 10 | Credits and many thanks to `@supperted825 `_ for the `idea and initial implementation `_. 11 | 12 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/racetrack-env.gif 13 | :width: 80% 14 | :align: center 15 | :name: fig:racetrack_env 16 | 17 | Usage 18 | ========== 19 | 20 | .. code-block:: python 21 | 22 | env = gym.make("racetrack-v0") 23 | 24 | 25 | Default configuration 26 | ===================== 27 | 28 | .. code-block:: python 29 | 30 | { 31 | "observation": { 32 | "type": "OccupancyGrid", 33 | "features": ['presence', 'on_road'], 34 | "grid_size": [[-18, 18], [-18, 18]], 35 | "grid_step": [3, 3], 36 | "as_image": False, 37 | "align_to_vehicle_axes": True 38 | }, 39 | "action": { 40 | "type": "ContinuousAction", 41 | "longitudinal": False, 42 | "lateral": True 43 | }, 44 | "simulation_frequency": 15, 45 | "policy_frequency": 5, 46 | "duration": 300, 47 | "collision_reward": -1, 48 | "lane_centering_cost": 4, 49 | "action_reward": -0.3, 50 | "controlled_vehicles": 1, 51 | "other_vehicles": 1, 52 | "screen_width": 600, 53 | "screen_height": 600, 54 | "centering_position": [0.5, 0.5], 55 | "scaling": 7 56 | "show_trajectories": False, 57 | "render_agent": True, 58 | "offscreen_rendering": False 59 | } 60 | 61 | More specifically, it is defined in: 62 | 63 | .. automethod:: RacetrackEnv.default_config 64 | 65 | API 66 | ===== 67 | 68 | .. autoclass:: RacetrackEnv 69 | :members: 70 | -------------------------------------------------------------------------------- /highway_modify/docs/source/environments/roundabout.rst: -------------------------------------------------------------------------------- 1 | .. _environments_roundabout: 2 | 3 | .. currentmodule:: highway_env.envs.roundabout_env 4 | 5 | Roundabout 6 | ********** 7 | 8 | In this task, the ego-vehicle if approaching a roundabout with flowing traffic. It will follow its planned route automatically, but has to handle lane changes and longitudinal control to pass the roundabout as fast as possible while avoiding collisions. 9 | 10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/roundabout-env.gif 11 | :width: 80% 12 | :align: center 13 | :name: fig:roundabout_env 14 | 15 | Usage 16 | ========== 17 | 18 | .. code-block:: python 19 | 20 | env = gym.make("roundabout-v0") 21 | 22 | 23 | Default configuration 24 | ===================== 25 | 26 | .. code-block:: python 27 | 28 | { 29 | "observation": { 30 | "type": "TimeToCollision" 31 | }, 32 | "action": { 33 | "type": "DiscreteMetaAction" 34 | }, 35 | "incoming_vehicle_destination": None, 36 | "duration": 11 37 | "simulation_frequency": 15, # [Hz] 38 | "policy_frequency": 1, # [Hz] 39 | "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", 40 | "screen_width": 600, # [px] 41 | "screen_height": 600, # [px] 42 | "centering_position": [0.5, 0.6], 43 | "scaling": 5.5, 44 | "show_trajectories": False, 45 | "render_agent": True, 46 | "offscreen_rendering": False 47 | } 48 | 49 | More specifically, it is defined in: 50 | 51 | .. automethod:: RoundaboutEnv.default_config 52 | 53 | API 54 | ===== 55 | 56 | .. autoclass:: RoundaboutEnv 57 | :members: 58 | -------------------------------------------------------------------------------- /highway_modify/docs/source/faq.rst: -------------------------------------------------------------------------------- 1 | .. _faq: 2 | 3 | ============================= 4 | Frequently Asked Questions 5 | ============================= 6 | 7 | 8 | This is a list of Frequently Asked Questions about highway-env. Feel free to 9 | suggest new entries! 10 | 11 | I try to train an agent using the Kinematics Observation and an MLP model, but the resulting policy is not optimal. Why? 12 | I also tend to get reasonable but sub-optimal policies using this observation-model pair. 13 | In :cite:`Leurent2019social`, we argued that a possible reason is that the MLP output depends on the order of 14 | vehicles in the observation. Indeed, if the agent revisits a given scene but observes vehicles described in a different 15 | order, it will see it as a novel state and will not be able to reuse past information. Thus, the agent struggles to 16 | make use of its observation. 17 | 18 | This can be addressed in two ways: 19 | 20 | * - Change the *model*, to use a permutation-invariant architecture which will not be sensitive to the vehicles order, such as *e.g.* :cite:`Qi2017pointnet` or :cite:`Leurent2019social`. 21 | This example is implemented `here (DQN) `_ or `here (SB3's PPO) `_. 22 | 23 | * - Change the *observation*. For example, the :ref:`Grayscale Image` does not depend on an ordering. In this case, a CNN model is more suitable than an MLP model. 24 | This example is implemented `here (SB3's DQN) `_. 25 | 26 | 27 | My videos are too fast / have a low framerate. 28 | This is because in openai/gym, a single video frame is generated at each call of ``env.step(action)``. However, in highway-env, the policy typically runs at a low-level frequency (e.g. 1 Hz) so that a long action (*e.g.* change lane) actually corresponds to several (typically, 15) simulation frames. 29 | In order to also render these intermediate simulation frames, the following should be done: 30 | 31 | .. code-block:: python 32 | 33 | import gym 34 | import highway_env 35 | 36 | # Wrap the env by a RecordVideo wrapper 37 | env = gym.make("highway-v0") 38 | env = RecordVideo(env, video_folder="run", 39 | episode_trigger=lambda e: True) # record all episodes 40 | 41 | # Provide the video recorder to the wrapped environment 42 | # so it can send it intermediate simulation frames. 43 | env.unwrapped.set_record_video_wrapper(env) 44 | 45 | # Record a video as usual 46 | obs = env.reset() 47 | done = False: 48 | while not done: 49 | action = env.action_space.sample() 50 | obs, reward, done, info = env.step(action) 51 | env.render() 52 | env.close() -------------------------------------------------------------------------------- /highway_modify/docs/source/graphics/index.rst: -------------------------------------------------------------------------------- 1 | .. _graphics: 2 | 3 | .. py:currentmodule::highway_env.envs.common.graphics 4 | 5 | Graphics 6 | ============= 7 | 8 | Environment rendering is done with `pygame `_, which must be :ref:`installed separately `. 9 | 10 | A window is created at the first call of ``env.render()``. Its dimensions can be configured: 11 | 12 | .. code-block:: python 13 | 14 | env = gym.make("roundabout-v0") 15 | env.configure({ 16 | "screen_width": 640, 17 | "screen_height": 480 18 | }) 19 | env.reset() 20 | env.render() 21 | 22 | World surface 23 | -------------- 24 | 25 | The simulation is rendered in a :py:class:`~highway_env.envs.common.graphics.RoadSurface` pygame surface, which defines the location and zoom of the rendered location. 26 | By default, the rendered area is always centered on the ego-vehicle. 27 | Its initial scale and offset can be set with the ``"scaling"`` and ``"centering_position"`` configurations, and can also be 28 | updated during simulation using the O,L keys and K,M keys, respectively. 29 | 30 | Scene graphics 31 | --------------- 32 | 33 | - Roads are rendered in the :py:class:`~highway_env.road.graphics.RoadGraphics` class. 34 | - Vehicles are rendered in the :py:class:`~highway_env.vehicle.graphics.VehicleGraphics` class. 35 | 36 | 37 | API 38 | ----------- 39 | 40 | 41 | .. automodule:: highway_env.envs.common.graphics 42 | :members: 43 | 44 | .. automodule:: highway_env.road.graphics 45 | :members: 46 | 47 | .. automodule:: highway_env.vehicle.graphics 48 | :members: -------------------------------------------------------------------------------- /highway_modify/docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. highway-env documentation master file, created by 2 | sphinx-quickstart on Wed Feb 28 15:51:44 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | .. |Build Status| image:: https://github.com/eleurent/highway-env/workflows/build/badge.svg 8 | :target: https://github.com/eleurent/highway-env/workflows/build/ 9 | 10 | .. |Documentation Status| image:: https://readthedocs.org/projects/highway-env/badge/?version=latest 11 | :target: https://highway-env.readthedocs.io/en/latest/ 12 | 13 | .. |Downloads| image:: https://img.shields.io/pypi/dm/highway-env 14 | :target: https://pypi.org/project/highway-env/ 15 | 16 | .. |Codacy Status| image:: https://api.codacy.com/project/badge/Grade/63847d9328f64fce9c137b03fcafcc27 17 | :target: https://app.codacy.com/manual/eleurent/highway-env?utm_source=github.com&utm_medium=referral&utm_content=eleurent/highway-env&utm_campaign=Badge_Grade_Dashboard 18 | 19 | .. |Coverage Status| image:: https://codecov.io/gh/eleurent/highway-env/branch/master/graph/badge.svg 20 | :target: https://codecov.io/gh/eleurent/highway-env 21 | 22 | .. |Contributors| image:: https://img.shields.io/github/contributors/eleurent/highway-env 23 | :target: https://github.com/eleurent/highway-env/graphs/contributors 24 | 25 | .. |Environments| image:: https://img.shields.io/github/search/eleurent/highway-env/import%20filename:*_env%20path:highway_env/envs?label=environments 26 | :target: https://highway-env.readthedocs.io/en/latest/quickstart.html#all-the-environments 27 | 28 | 29 | |Build Status| |Documentation Status| |Downloads| |Codacy Status| |Coverage Status| |Contributors| |Environments| 30 | 31 | Welcome to `highway-env `_'s documentation! 32 | ==================================================================================== 33 | 34 | This project gathers a collection of environment for *decision-making* in Autonomous Driving. 35 | 36 | The purpose of this documentation is to provide: 37 | 38 | 1. a :ref:`quick start guide ` describing the environments and their customization options; 39 | 2. a :ref:`detailed description ` of the nuts and bolts of the project, and how *you* can contribute. 40 | 41 | .. _index_how_to_cite_this_work: 42 | 43 | How to cite this work? 44 | ====================== 45 | 46 | If you use this package, please consider citing it with this piece of 47 | BibTeX: 48 | 49 | .. code:: bibtex 50 | 51 | @misc{highway-env, 52 | author = {Leurent, Edouard}, 53 | title = {An Environment for Autonomous Driving Decision-Making}, 54 | year = {2018}, 55 | publisher = {GitHub}, 56 | journal = {GitHub repository}, 57 | howpublished = {\url{https://github.com/eleurent/highway-env}}, 58 | } 59 | 60 | Documentation contents 61 | ====================== 62 | 63 | .. toctree:: 64 | :maxdepth: 2 65 | 66 | installation 67 | quickstart 68 | user_guide 69 | faq 70 | bibliography/index 71 | -------------------------------------------------------------------------------- /highway_modify/docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | Prerequisites 7 | ------------- 8 | 9 | This project requires python3 (>=3.5) 10 | 11 | The graphics require the installation of `pygame `_, which itself has dependencies that must be installed manually. 12 | 13 | 14 | Ubuntu 15 | ~~~~~~ 16 | 17 | .. code-block:: bash 18 | 19 | sudo apt-get update -y 20 | sudo apt-get install -y python-dev libsdl-image1.2-dev libsdl-mixer1.2-dev 21 | libsdl-ttf2.0-dev libsdl1.2-dev libsmpeg-dev python-numpy subversion libportmidi-dev 22 | ffmpeg libswscale-dev libavformat-dev libavcodec-dev libfreetype6-dev gcc 23 | 24 | Windows 10 25 | ~~~~~~~~~~ 26 | 27 | We recommend using `Anaconda `_. 28 | 29 | 30 | Stable release 31 | --------------------- 32 | To install the latest stable version: 33 | 34 | .. code-block:: bash 35 | 36 | pip install highway-env 37 | 38 | Development version 39 | --------------------- 40 | 41 | To install the current development version: 42 | 43 | .. code-block:: bash 44 | 45 | pip install --user git+https://github.com/eleurent/highway-env 46 | -------------------------------------------------------------------------------- /highway_modify/docs/source/make_your_own.rst: -------------------------------------------------------------------------------- 1 | .. _make_your_own: 2 | 3 | Make your own environment 4 | ========================== 5 | 6 | Here are the steps required to create a new environment. 7 | 8 | .. note:: 9 | Pull requests are welcome! 10 | 11 | Set up files 12 | ------------ 13 | 14 | 1. Create a new ``your_env.py`` file in ``highway_env/envs/`` 15 | 2. Define a class YourEnv, that must inherit from :py:class:`~highway_env.envs.common.abstract.AbstractEnv` 16 | 17 | This class provides several useful functions: 18 | 19 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.default_config` method, that provides a default configuration dictionary that can be overloaded. 20 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.define_spaces` method, that gives access to a choice of observation and action types, set from the environment configuration 21 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.step` method, which executes the desired actions (at policy frequency) and simulate the environment (at simulation frequency) 22 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.render` method, which renders the environment. 23 | 24 | Create the scene 25 | ------------------ 26 | 27 | The first step is to create a :py:class:`~highway_env.road.road.RoadNetwork` that describes the geometry and topology of 28 | roads and lanes in the scene. 29 | This should be achieved in a ``YourEnv._make_road()`` method, called from ``YourEnv.reset()`` to set the ``self.road`` field. 30 | 31 | See :ref:`Roads ` for reference, and existing environments as examples. 32 | 33 | Create the vehicles 34 | ------------------ 35 | 36 | The second step is to populate your road network with vehicles. This should be achieved in a ``YourEnv._make_road()`` 37 | method, called from ``YourEnv.reset()`` to set the ``self.road.vehicles`` list of :py:class:`~highway_env.vehicle.kinematics.Vehicle`. 38 | 39 | First, define the controlled ego-vehicle by setting ``self.vehicle``. The class of controlled vehicle depends on the 40 | choice of action type, and can be accessed as ``self.action_type.vehicle_class``. 41 | Other vehicles can be created more freely, and added to the ``self.road.vehicles`` list. 42 | 43 | See :ref:`vehicle behaviors ` for reference, and existing environments as examples. 44 | 45 | Make the environment configurable 46 | ------------------------------------ 47 | 48 | To make a part of your environment configurable, overload the :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.default_config` 49 | method to define new ``{"config_key": value}`` pairs with default values. These configurations then be accessed in your 50 | environment implementation with ``self.config["config_key"]``, and once the environment is created, it can be configured with 51 | ``env.configure({"config_key": other_value})`` followed by ``env.reset()``. 52 | 53 | Register the environment 54 | --------------------------- 55 | 56 | In ``highway_env/envs/your_env.py``, add the following line: 57 | 58 | .. code-block:: python 59 | 60 | register( 61 | id='your-env-v0', 62 | entry_point='highway_env.envs:YourEnv', 63 | ) 64 | 65 | and import it from ``highway_env/envs/__init__.py``: 66 | 67 | .. code-block:: python 68 | 69 | from highway_env.envs.your_env import * 70 | 71 | 72 | Profit 73 | -------- 74 | That's it! 75 | You should now be able to run the environment: 76 | 77 | .. code-block:: python 78 | 79 | import gym 80 | import highway_env 81 | 82 | env = gym.make('your-env-v0') 83 | obs = env.reset() 84 | obs, reward, done, info = env.step(env.action_space.sample()) 85 | env.render() 86 | 87 | API 88 | ------- 89 | 90 | 91 | .. automodule:: highway_env.envs.common.abstract 92 | :members: 93 | :private-members: 94 | 95 | -------------------------------------------------------------------------------- /highway_modify/docs/source/multi_agent.rst: -------------------------------------------------------------------------------- 1 | .. multi_agent: 2 | 3 | The Multi-Agent setting 4 | ========================== 5 | 6 | Most environments can be configured to a multi-agent version. Here is how: 7 | 8 | Increase the number of controlled vehicles 9 | ------------------------------------------ 10 | 11 | To that end, update the :ref:`environment configuration ` to increase ``controlled_vehicles`` 12 | 13 | .. jupyter-execute:: 14 | 15 | import gym 16 | import highway_env 17 | 18 | env = gym.make('highway-v0') 19 | env.seed(0) 20 | 21 | env.configure({"controlled_vehicles": 2}) # Two controlled vehicles 22 | env.configure({"vehicles_count": 1}) # A single other vehicle, for the sake of visualisation 23 | env.reset() 24 | 25 | from matplotlib import pyplot as plt 26 | %matplotlib inline 27 | plt.imshow(env.render(mode="rgb_array")) 28 | plt.title("Controlled vehicles are in green") 29 | plt.show() 30 | 31 | Change the action space 32 | ----------------------- 33 | 34 | Right now, since the action space has not been changed, only the first vehicle is controlled by ``env.step(action)``. 35 | In order for the environment to accept a tuple of actions, its action type must be set to :py:class:`~highway_env.envs.common.action.MultiAgentAction` 36 | The type of actions contained in the tuple must be described by a standard :ref:`action configuration ` in the ``action_config`` field. 37 | 38 | .. jupyter-execute:: 39 | 40 | env.configure({ 41 | "action": { 42 | "type": "MultiAgentAction", 43 | "action_config": { 44 | "type": "DiscreteMetaAction", 45 | } 46 | } 47 | }) 48 | env.reset() 49 | 50 | _, (ax1, ax2) = plt.subplots(nrows=2) 51 | ax1.imshow(env.render(mode="rgb_array")) 52 | ax1.set_title("Initial state") 53 | 54 | # Make the first vehicle change to the left lane, and the second one to the right 55 | action_1, action_2 = 0, 2 # See highway_env.envs.common.action.DiscreteMetaAction.ACTIONS_ALL 56 | env.step((action_1, action_2)) 57 | 58 | ax2.imshow(env.render(mode="rgb_array")) 59 | ax2.set_title("After sending actions to each vehicle") 60 | plt.show() 61 | 62 | 63 | Change the observation space 64 | ----------------------------- 65 | 66 | In order to actually decide what ``action_1`` and ``action_2`` should be, both vehicles must generate their own observations. 67 | As before, since the observation space has not been changed no far, the observation only includes that of the first vehicle. 68 | 69 | In order for the environment to return a tuple of observations -- one for each agent --, its observation type must be set to :py:class:`~highway_env.envs.common.observation.MultiAgentObservation` 70 | The type of observations contained in the tuple must be described by a standard :ref:`observation configuration ` in the ``observation_config`` field. 71 | 72 | .. jupyter-execute:: 73 | 74 | env.configure({ 75 | "observation": { 76 | "type": "MultiAgentObservation", 77 | "observation_config": { 78 | "type": "Kinematics", 79 | } 80 | } 81 | }) 82 | obs = env.reset() 83 | 84 | import pprint 85 | pprint.pprint(obs) 86 | 87 | Wrapping it up 88 | -------------- 89 | 90 | Here is a pseudo-code example of how a centralized multi-agent policy could be trained: 91 | 92 | .. jupyter-execute:: 93 | 94 | # Multi-agent environment configuration 95 | env.configure({ 96 | "controlled_vehicles": 2, 97 | "observation": { 98 | "type": "MultiAgentObservation", 99 | "observation_config": { 100 | "type": "Kinematics", 101 | } 102 | }, 103 | "action": { 104 | "type": "MultiAgentAction", 105 | "action_config": { 106 | "type": "DiscreteMetaAction", 107 | } 108 | } 109 | }) 110 | 111 | # Dummy RL algorithm 112 | class Model: 113 | """ Dummy code for an RL algorithm, which predicts an action from an observation, 114 | and update its model from observed transitions.""" 115 | 116 | def predict(self, obs): 117 | return 0 118 | 119 | def update(self, obs, action, next_obs, reward, info, done): 120 | pass 121 | model = Model() 122 | 123 | # A training episode 124 | obs = env.reset() 125 | done = False 126 | while not done: 127 | # Dispatch the observations to the model to get the tuple of actions 128 | action = tuple(model.predict(obs_i) for obs_i in obs) 129 | # Execute the actions 130 | next_obs, reward, info, done = env.step(action) 131 | # Update the model with the transitions observed by each agent 132 | for obs_i, action_i, next_obs_i in zip(obs, action, next_obs): 133 | model.update(obs_i, action_i, next_obs_i, reward, info, done) 134 | obs = next_obs 135 | 136 | 137 | For example, this is supported by `eleurent/rl-agents `_'s DQN implementation, and can be run with 138 | 139 | 140 | .. code-block:: bash 141 | 142 | cd 143 | python experiments.py evaluate configs/IntersectionEnv/env_multi_agent.json \ 144 | configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json \ 145 | --train --episodes=3000 146 | 147 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/intersection_multi_agent.gif 148 | 149 | Video of a multi-agent episode with the trained policy. 150 | -------------------------------------------------------------------------------- /highway_modify/docs/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | =============== 4 | Getting Started 5 | =============== 6 | 7 | Making an environment 8 | ------------------------ 9 | 10 | Here is a quick example of how to create an environment: 11 | 12 | .. jupyter-execute:: 13 | 14 | import gym 15 | import highway_env 16 | from matplotlib import pyplot as plt 17 | %matplotlib inline 18 | 19 | env = gym.make('highway-v0') 20 | env.reset() 21 | for _ in range(3): 22 | action = env.action_type.actions_indexes["IDLE"] 23 | obs, reward, done, info = env.step(action) 24 | env.render() 25 | 26 | plt.imshow(env.render(mode="rgb_array")) 27 | plt.show() 28 | 29 | All the environments 30 | ~~~~~~~~~~~~~~~~~~~~ 31 | Here is the list of all the environments available and their descriptions: 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | 36 | environments/highway 37 | environments/merge 38 | environments/roundabout 39 | environments/parking 40 | environments/intersection 41 | environments/racetrack 42 | 43 | .. _configuration: 44 | 45 | Configuring an environment 46 | --------------------------- 47 | 48 | The :ref:`observations `, :ref:`actions `, :ref:`dynamics ` and :ref:`rewards ` 49 | of an environment are parametrized by a configuration, defined as a 50 | :py:attr:`~highway_env.envs.common.abstract.AbstractEnv.config` dictionary. 51 | After environment creation, the configuration can be accessed using the 52 | :py:attr:`~highway_env.envs.common.abstract.AbstractEnv.config` attribute. 53 | 54 | .. jupyter-execute:: 55 | 56 | import pprint 57 | 58 | env = gym.make("highway-v0") 59 | pprint.pprint(env.config) 60 | 61 | For example, the number of lanes can be changed with: 62 | 63 | .. jupyter-execute:: 64 | 65 | env.config["lanes_count"] = 2 66 | env.reset() 67 | plt.imshow(env.render(mode="rgb_array")) 68 | plt.show() 69 | 70 | .. note:: 71 | 72 | The environment must be :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.reset` for the change of configuration 73 | to be effective. 74 | 75 | 76 | Training an agent 77 | ------------------- 78 | 79 | Reinforcement Learning agents can be trained using libraries such as `eleurent/rl-agents `_, 80 | `openai/baselines `_ or `Stable Baselines3 `_. 81 | 82 | Here is an example of SB3's DQN implementation trained on ``highway-fast-v0`` with its default kinematics observation and an MLP model. 83 | 84 | .. |highway_dqn| image:: https://colab.research.google.com/assets/colab-badge.svg 85 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/sb3_highway_dqn.ipynb 86 | 87 | |highway_dqn| 88 | 89 | .. code-block:: python 90 | 91 | import gym 92 | import highway_env 93 | from stable_baselines3 import DQN 94 | 95 | env = gym.make("highway-fast-v0") 96 | model = DQN('MlpPolicy', env, 97 | policy_kwargs=dict(net_arch=[256, 256]), 98 | learning_rate=5e-4, 99 | buffer_size=15000, 100 | learning_starts=200, 101 | batch_size=32, 102 | gamma=0.8, 103 | train_freq=1, 104 | gradient_steps=1, 105 | target_update_interval=50, 106 | verbose=1, 107 | tensorboard_log="highway_dqn/") 108 | model.learn(int(2e4)) 109 | model.save("highway_dqn/model") 110 | 111 | # Load and test saved model 112 | model = DQN.load("highway_dqn/model") 113 | while True: 114 | done = False 115 | obs = env.reset() 116 | while not done: 117 | action, _states = model.predict(obs, deterministic=True) 118 | obs, reward, done, info = env.step(action) 119 | env.render() 120 | 121 | A full run takes about 25mn on my laptop (fps=14). The following results are obtained: 122 | 123 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway_fast_dqn.png 124 | 125 | Training curves, for 5 random seeds. 126 | 127 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway_fast_dqn.gif 128 | 129 | Video of an episode run with the trained policy. 130 | 131 | .. note:: 132 | 133 | There are several ways to get better performances. For instance, `SB3 provides only vanilla Deep Q-Learning and has no extensions such as Double-DQN, Dueling-DQN and Prioritized Experience Replay `_. 134 | However, `eleurent/rl-agents `_'s implementation of DQN does provide those extensions, which yields better results. Improvements can also be obtained by changing the observation type or the model, see the :ref:`FAQ `. 135 | 136 | 137 | Examples on Google Colab 138 | ------------------------- 139 | 140 | Several scripts and notebooks to train driving policies on `highway-env` are available `on this page `_. 141 | Here are a few of them: 142 | 143 | .. |highway_dqn_cnn| image:: https://colab.research.google.com/assets/colab-badge.svg 144 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/sb3_highway_dqn_cnn.ipynb 145 | .. |planning_hw| image:: https://colab.research.google.com/assets/colab-badge.svg 146 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/highway_planning.ipynb 147 | .. |parking_mb| image:: https://colab.research.google.com/assets/colab-badge.svg 148 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/parking_model_based.ipynb 149 | .. |parking_her| image:: https://colab.research.google.com/assets/colab-badge.svg 150 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/parking_her.ipynb 151 | .. |dqn_social| image:: https://colab.research.google.com/assets/colab-badge.svg 152 | :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/intersection_social_dqn.ipynb 153 | 154 | - | Highway with image observations and a CNN model |highway_dqn_cnn| 155 | | Train SB3's DQN on `highway-fast-v0` , but using :ref:`image observations ` and a CNN model for the value function. 156 | - | Trajectory Planning on Highway |planning_hw| 157 | | Plan a trajectory on `highway-v0` using the `OPD` :cite:`Hren2008` implementation from `eleurent/rl-agents `_. 158 | - | A Model-based Reinforcement Learning tutorial on Parking |parking_mb| 159 | | A tutorial written for `RLSS 2019 `_ and demonstrating the principle of model-based reinforcement learning on the `parking-v0` task. 160 | - | Parking with Hindsight Experience Replay |parking_her| 161 | | Train a goal-conditioned `parking-v0` policy using the `HER` :cite:`Andrychowicz2017` implementation from `stable-baselines `_. 162 | - | Intersection with DQN and social attention |dqn_social| 163 | | Train an `intersection-v0` crossing policy using the social attention architecture :cite:`Leurent2019social` and the DQN implementation from `eleurent/rl-agents `_. -------------------------------------------------------------------------------- /highway_modify/docs/source/rewards/index.rst: -------------------------------------------------------------------------------- 1 | .. _rewards: 2 | 3 | Rewards 4 | ############ 5 | 6 | The reward function is defined in the :py:meth:`~highway_env.envs.common.abstract.AbstractEnv._reward` method, overloaded in every environment. 7 | 8 | .. note:: 9 | The choice of an appropriate reward function that yields realistic optimal driving behaviour is a challenging problem, that we do not address in this project. 10 | In particular, we do not wish to specify every single aspect of the expected driving behaviour inside the reward function, such as keeping a safe distance to the front vehicle. 11 | Instead, we would rather only specify a reward function as simple and straightforward as possible in order to see adequate behaviour emerge from learning. 12 | In this perspective, keeping a safe distance is optimal not for being directly rewarded but for robustness against the uncertain behaviour of the leading vehicle, which could brake at any time. 13 | 14 | Most environments 15 | ----------------- 16 | 17 | We generally focus on two features: a vehicle should 18 | 19 | - progress quickly on the road; 20 | - avoid collisions. 21 | 22 | Thus, the reward function is often composed of a velocity term and a collision term: 23 | 24 | .. math:: 25 | R(s,a) = a\frac{v - v_\min}{v_\max - v_\min} - b\,\text{collision} 26 | 27 | where :math:`v,\,v_\min,\,v_\max` are the current, minimum and maximum speed of the ego-vehicle respectively, and :math:`a,\,b` are two coefficients. 28 | 29 | 30 | .. note:: 31 | Since the rewards must be bounded, and the optimal policy is invariant by scaling and shifting rewards, we choose to normalize them in the :math:`[0, 1]` range, by convention. 32 | Normalizing rewards has also been observed to be practically beneficial in deep reinforcement learning :cite:`Mnih2015`. 33 | Note that we forbid negative rewards, since they may encourage the agent to prefer terminating an episode early (by causing a collision) rather than risking suffering a negative return if no satisfying trajectory can be found. 34 | 35 | In some environments, the weight of the collision penalty can be configured through the `collision_penalty` parameter. 36 | 37 | Goal environments 38 | ----------------- 39 | 40 | In the :ref:`Parking ` environment, however, the reward function must also specify the desired goal destination. 41 | Thus, the velocity term is replaced by a weighted p-norm between the agent state and the goal state. 42 | 43 | 44 | .. math:: 45 | R(s,a) = -\| s - s_g \|_{W,p}^p - b\,\text{collision} 46 | 47 | where :math:`s = [x, y, v_x, v_y, \cos\psi, \sin\psi]`, :math:`s_g = [x_g, y_g, 0, 0, \cos\psi_g, \sin\psi_g]`, and 48 | :math:`\|x\|_{W,p} = (\sum_i |W_i x_i|^p)^{1/p}`. We use a p-norm rather than an Euclidean norm in order to have a narrower spike of rewards at the goal. 49 | -------------------------------------------------------------------------------- /highway_modify/docs/source/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. _user_guide: 2 | 3 | User Guide 4 | ============ 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | observations/index 10 | actions/index 11 | dynamics/index 12 | rewards/index 13 | graphics/index 14 | multi_agent 15 | make_your_own -------------------------------------------------------------------------------- /highway_modify/highway_env/__init__.py: -------------------------------------------------------------------------------- 1 | # Hide pygame support prompt 2 | import os 3 | os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1' 4 | # Import the envs module so that envs register themselves 5 | import highway_env.envs 6 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from highway_env.envs.highway_env import * 2 | from highway_env.envs.merge_env import * 3 | from highway_env.envs.parking_env import * 4 | from highway_env.envs.summon_env import * 5 | from highway_env.envs.roundabout_line4_env import * 6 | from highway_env.envs.roundabout_line2_env import * 7 | from highway_env.envs.two_way_env import * 8 | from highway_env.envs.intersection_env import * 9 | from highway_env.envs.lane_keeping_env import * 10 | from highway_env.envs.u_turn_env import * 11 | from highway_env.envs.exit_env import * 12 | from highway_env.envs.racetrack_env import * 13 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/envs/common/__init__.py -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/exit_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Tuple 3 | from gym.envs.registration import register 4 | 5 | from highway_env import utils 6 | from highway_env.envs import HighwayEnv, CircularLane, Vehicle 7 | from highway_env.envs.common.action import Action 8 | from highway_env.road.road import Road, RoadNetwork 9 | from highway_env.vehicle.controller import ControlledVehicle 10 | 11 | 12 | class ExitEnv(HighwayEnv): 13 | """ 14 | """ 15 | @classmethod 16 | def default_config(cls) -> dict: 17 | config = super().default_config() 18 | config.update({ 19 | "observation": { 20 | "type": "ExitObservation", 21 | "vehicles_count": 15, 22 | "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"], 23 | "clip": False 24 | }, 25 | "action": { 26 | "type": "DiscreteMetaAction", 27 | "target_speeds": [18, 24, 30] 28 | }, 29 | "lanes_count": 6, 30 | "collision_reward": 0, 31 | "high_speed_reward": 0.1, 32 | "right_lane_reward": 0, 33 | "goal_reward": 1, 34 | "vehicles_count": 20, 35 | "vehicles_density": 1.5, 36 | "controlled_vehicles": 1, 37 | "duration": 18, # [s], 38 | "simulation_frequency": 5, 39 | "scaling": 5 40 | }) 41 | return config 42 | 43 | def _reset(self) -> None: 44 | self._create_road() 45 | self._create_vehicles() 46 | 47 | def step(self, action) -> Tuple[np.ndarray, float, bool, dict]: 48 | obs, reward, terminal, info = super().step(action) 49 | info.update({"is_success": self._is_success()}) 50 | return obs, reward, terminal, info 51 | 52 | def _create_road(self, road_length=1000, exit_position=400, exit_length=100) -> None: 53 | net = RoadNetwork.straight_road_network(self.config["lanes_count"], start=0, 54 | length=exit_position, nodes_str=("0", "1")) 55 | net = RoadNetwork.straight_road_network(self.config["lanes_count"] + 1, start=exit_position, 56 | length=exit_length, nodes_str=("1", "2"), net=net) 57 | net = RoadNetwork.straight_road_network(self.config["lanes_count"], start=exit_position+exit_length, 58 | length=road_length-exit_position-exit_length, 59 | nodes_str=("2", "3"), net=net) 60 | for _from in net.graph: 61 | for _to in net.graph[_from]: 62 | for _id in range(len(net.graph[_from][_to])): 63 | net.get_lane((_from, _to, _id)).speed_limit = 26 - 3.4 * _id 64 | exit_position = np.array([exit_position + exit_length, self.config["lanes_count"] * CircularLane.DEFAULT_WIDTH]) 65 | radius = 150 66 | exit_center = exit_position + np.array([0, radius]) 67 | lane = CircularLane(center=exit_center, 68 | radius=radius, 69 | start_phase=3*np.pi/2, 70 | end_phase=2*np.pi, 71 | forbidden=True) 72 | net.add_lane("2", "exit", lane) 73 | 74 | self.road = Road(network=net, 75 | np_random=self.np_random, 76 | record_history=self.config["show_trajectories"]) 77 | 78 | def _create_vehicles(self) -> None: 79 | """Create some new random vehicles of a given type, and add them on the road.""" 80 | self.controlled_vehicles = [] 81 | for _ in range(self.config["controlled_vehicles"]): 82 | vehicle = Vehicle.create_random(self.road, 83 | speed=25, 84 | lane_from="0", 85 | lane_to="1", 86 | lane_id=0, 87 | spacing=self.config["ego_spacing"]) 88 | vehicle = self.action_type.vehicle_class(self.road, vehicle.position, vehicle.heading, vehicle.speed) 89 | self.controlled_vehicles.append(vehicle) 90 | self.road.vehicles.append(vehicle) 91 | 92 | vehicles_type = utils.class_from_path(self.config["other_vehicles_type"]) 93 | for _ in range(self.config["vehicles_count"]): 94 | lanes = np.arange(self.config["lanes_count"]) 95 | lane_id = self.road.np_random.choice(lanes, size=1, 96 | p=lanes / lanes.sum()).astype(int)[0] 97 | lane = self.road.network.get_lane(("0", "1", lane_id)) 98 | vehicle = vehicles_type.create_random(self.road, 99 | lane_from="0", 100 | lane_to="1", 101 | lane_id=lane_id, 102 | speed=lane.speed_limit, 103 | spacing=1 / self.config["vehicles_density"], 104 | ).plan_route_to("3") 105 | vehicle.enable_lane_change = False 106 | self.road.vehicles.append(vehicle) 107 | 108 | def _reward(self, action: Action) -> float: 109 | """ 110 | The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. 111 | :param action: the last action performed 112 | :return: the corresponding reward 113 | """ 114 | lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \ 115 | else self.vehicle.lane_index 116 | scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1]) 117 | reward = self.config["collision_reward"] * self.vehicle.crashed \ 118 | + self.config["goal_reward"] * self._is_success() \ 119 | + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \ 120 | + self.config["right_lane_reward"] * lane_index[-1] 121 | 122 | reward = utils.lmap(reward, 123 | [self.config["collision_reward"], self.config["goal_reward"]], 124 | [0, 1]) 125 | reward = np.clip(reward, 0, 1) 126 | return reward 127 | 128 | def _is_success(self): 129 | lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \ 130 | else self.vehicle.lane_index 131 | goal_reached = lane_index == ("1", "2", self.config["lanes_count"]) or lane_index == ("2", "exit", 0) 132 | return goal_reached 133 | 134 | def _is_terminal(self) -> bool: 135 | """The episode is over if the ego vehicle crashed or the time is out.""" 136 | return self.vehicle.crashed or self.steps >= self.config["duration"] 137 | 138 | 139 | # class DenseLidarExitEnv(DenseExitEnv): 140 | # @classmethod 141 | # def default_config(cls) -> dict: 142 | # return dict(super().default_config(), 143 | # observation=dict(type="LidarObservation")) 144 | 145 | 146 | 147 | 148 | register( 149 | id='exit-v0', 150 | entry_point='highway_env.envs:ExitEnv', 151 | ) 152 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/highway_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.registration import register 3 | 4 | from highway_env import utils 5 | from highway_env.envs.common.abstract import AbstractEnv 6 | from highway_env.envs.common.action import Action 7 | from highway_env.road.road import Road, RoadNetwork 8 | from highway_env.utils import near_split 9 | from highway_env.vehicle.controller import ControlledVehicle 10 | from highway_env.vehicle.kinematics import Vehicle 11 | 12 | 13 | class HighwayEnv(AbstractEnv): 14 | """ 15 | A highway driving environment. 16 | 17 | The vehicle is driving on a straight highway with several lanes, and is rewarded for reaching a high speed, 18 | staying on the rightmost lanes and avoiding collisions. 19 | """ 20 | 21 | @classmethod 22 | def default_config(cls) -> dict: 23 | config = super().default_config() 24 | config.update({ 25 | "observation": { 26 | "type": "Kinematics", 27 | "absolute": True, 28 | "features_range": {"x": [150, 700.], "y":[-12.,12.], "vx":[-80.,80.], "vy":[-80.,80.]}, 29 | }, 30 | "finish_position": [650.0, 8.0], 31 | "action": { 32 | "type": "DiscreteMetaAction", 33 | "absolute": True, 34 | }, 35 | "lanes_count": 4, 36 | "vehicles_speed": 35, 37 | "vehicles_count": 50, 38 | "controlled_vehicles": 1, 39 | "initial_lane_id": None, 40 | "duration": 40, # [s] 41 | "ego_spacing": 2, 42 | "vehicles_density": 1, 43 | "collision_reward": -1, # The reward received when colliding with a vehicle. 44 | "right_lane_reward": 0.1, # The reward received when driving on the right-most lanes, linearly mapped to 45 | # zero for other lanes. 46 | "high_speed_reward": 0.4, # The reward received when driving at full speed, linearly mapped to zero for 47 | # lower speeds according to config["reward_speed_range"]. 48 | "lane_change_reward": 0, # The reward received at each lane change action. 49 | "reward_speed_range": [20, 30], 50 | "offroad_terminal": True 51 | }) 52 | return config 53 | 54 | def _reset(self) -> None: 55 | self._create_road() 56 | self._create_vehicles() 57 | 58 | def _create_road(self) -> None: 59 | """Create a road composed of straight adjacent lanes.""" 60 | self.road = Road(network=RoadNetwork.straight_road_network(self.config["lanes_count"], speed_limit=25), 61 | np_random=self.np_random, record_history=self.config["show_trajectories"]) 62 | 63 | def _create_vehicles(self) -> None: 64 | """Create some new random vehicles of a given type, and add them on the road.""" 65 | other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"]) 66 | other_per_controlled = near_split(self.config["vehicles_count"], num_bins=self.config["controlled_vehicles"]) 67 | 68 | self.controlled_vehicles = [] 69 | for others in other_per_controlled: 70 | vehicle = Vehicle.create_random( 71 | self.road, 72 | speed=self.config['vehicles_speed'], 73 | lane_id=self.config["initial_lane_id"], 74 | spacing=self.config["ego_spacing"] 75 | ) 76 | vehicle = self.action_type.vehicle_class(self.road, vehicle.position, vehicle.heading, vehicle.speed) 77 | self.controlled_vehicles.append(vehicle) 78 | self.road.vehicles.append(vehicle) 79 | 80 | for _ in range(others): 81 | vehicle = other_vehicles_type.create_random(self.road, spacing=1 / self.config["vehicles_density"]) 82 | vehicle.randomize_behavior() 83 | self.road.vehicles.append(vehicle) 84 | 85 | def _reward(self, action: Action) -> float: 86 | """ 87 | The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions. 88 | :param action: the last action performed 89 | :return: the corresponding reward 90 | """ 91 | neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index) 92 | lane = self.vehicle.target_lane_index[2] if isinstance(self.vehicle, ControlledVehicle) \ 93 | else self.vehicle.lane_index[2] 94 | # Use forward speed rather than speed, see https://github.com/eleurent/highway-env/issues/268 95 | forward_speed = self.vehicle.speed * np.cos(self.vehicle.heading) 96 | scaled_speed = utils.lmap(forward_speed, self.config["reward_speed_range"], [0, 1]) 97 | reward = \ 98 | + self.config["collision_reward"] * self.vehicle.crashed \ 99 | + self.config["right_lane_reward"] * lane / max(len(neighbours) - 1, 1) \ 100 | + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) 101 | reward = utils.lmap(reward, 102 | [self.config["collision_reward"], 103 | self.config["high_speed_reward"] + self.config["right_lane_reward"]], 104 | [0, 1]) 105 | reward = 0 if not self.vehicle.on_road else reward 106 | return reward 107 | 108 | def _is_terminal(self) -> bool: 109 | """The episode is over if the ego vehicle crashed or the time is out.""" 110 | return self.vehicle.crashed or \ 111 | self.steps >= self.config["duration"] or \ 112 | (self.config["offroad_terminal"] and not self.vehicle.on_road) 113 | 114 | def _cost(self, action: int) -> float: 115 | """The cost signal is the occurrence of collision.""" 116 | return float(self.vehicle.crashed) 117 | 118 | 119 | class HighwayEnvFast(HighwayEnv): 120 | """ 121 | A variant of highway-v0 with faster execution: 122 | - lower simulation frequency 123 | - fewer vehicles in the scene (and fewer lanes, shorter episode duration) 124 | - only check collision of controlled vehicles with others 125 | """ 126 | @classmethod 127 | def default_config(cls) -> dict: 128 | cfg = super().default_config() 129 | cfg.update({ 130 | "simulation_frequency": 5, 131 | "lanes_count": 3, 132 | "vehicles_count": 20, 133 | "duration": 15, # [s] 134 | "ego_spacing": 1.5, 135 | }) 136 | return cfg 137 | 138 | def _create_vehicles(self) -> None: 139 | super()._create_vehicles() 140 | # Disable collision check for uncontrolled vehicles 141 | for vehicle in self.road.vehicles: 142 | if vehicle not in self.controlled_vehicles: 143 | vehicle.check_collisions = False 144 | def _legal_terminal(self) -> bool: 145 | if self.steps >= self.config["duration"] * self.config["policy_frequency"]: 146 | return True 147 | else : 148 | return False 149 | 150 | register( 151 | id='highway-v0', 152 | entry_point='highway_env.envs:HighwayEnv', 153 | ) 154 | 155 | register( 156 | id='highway-fast-v0', 157 | entry_point='highway_env.envs:HighwayEnvFast', 158 | ) 159 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/lane_keeping_env.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import copy 4 | from typing import Tuple 5 | 6 | import numpy as np 7 | from gym.envs.registration import register 8 | 9 | from highway_env.envs.common.abstract import AbstractEnv 10 | from highway_env.road.lane import LineType, SineLane, StraightLane 11 | from highway_env.road.road import Road, RoadNetwork 12 | from highway_env.vehicle.dynamics import BicycleVehicle 13 | 14 | 15 | class LaneKeepingEnv(AbstractEnv): 16 | 17 | """A lane keeping control task.""" 18 | 19 | def __init__(self, config: dict = None) -> None: 20 | super().__init__(config) 21 | self.lane = None 22 | self.lanes = [] 23 | self.trajectory = [] 24 | self.interval_trajectory = [] 25 | self.lpv = None 26 | 27 | @classmethod 28 | def default_config(cls) -> dict: 29 | config = super().default_config() 30 | config.update({ 31 | "observation": { 32 | "type": "AttributesObservation", 33 | "attributes": ["state", "derivative", "reference_state"] 34 | }, 35 | "action": { 36 | "type": "ContinuousAction", 37 | "steering_range": [-np.pi / 3, np.pi / 3], 38 | "longitudinal": False, 39 | "lateral": True, 40 | "dynamical": True 41 | }, 42 | "simulation_frequency": 10, 43 | "policy_frequency": 10, 44 | "state_noise": 0.05, 45 | "derivative_noise": 0.05, 46 | "screen_width": 600, 47 | "screen_height": 250, 48 | "scaling": 7, 49 | "centering_position": [0.4, 0.5] 50 | }) 51 | return config 52 | 53 | def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: 54 | if self.lanes and not self.lane.on_lane(self.vehicle.position): 55 | self.lane = self.lanes.pop(0) 56 | self.store_data() 57 | if self.lpv: 58 | self.lpv.set_control(control=action.squeeze(-1), 59 | state=self.vehicle.state[[1, 2, 4, 5]]) 60 | self.lpv.step(1 / self.config["simulation_frequency"]) 61 | 62 | self.action_type.act(action) 63 | obs = self.observation_type.observe() 64 | self._simulate() 65 | 66 | info = {} 67 | reward = self._reward(action) 68 | terminal = self._is_terminal() 69 | return obs, reward, terminal, info 70 | 71 | def _reward(self, action: np.ndarray) -> float: 72 | _, lat = self.lane.local_coordinates(self.vehicle.position) 73 | return 1 - (lat/self.lane.width)**2 74 | 75 | def _is_terminal(self) -> bool: 76 | return False # not self.lane.on_lane(self.vehicle.position) 77 | 78 | def _reset(self) -> None: 79 | self._make_road() 80 | self._make_vehicles() 81 | 82 | def _make_road(self) -> None: 83 | net = RoadNetwork() 84 | lane = SineLane([0, 0], [500, 0], amplitude=5, pulsation=2*np.pi / 100, phase=0, 85 | width=10, line_types=[LineType.STRIPED, LineType.STRIPED]) 86 | net.add_lane("a", "b", lane) 87 | other_lane = StraightLane([50, 50], [115, 15], 88 | line_types=(LineType.STRIPED, LineType.STRIPED), width=10) 89 | net.add_lane("c", "d", other_lane) 90 | self.lanes = [other_lane, lane] 91 | self.lane = self.lanes.pop(0) 92 | net.add_lane("d", "a", StraightLane([115, 15], [115+20, 15+20*(15-50)/(115-50)], 93 | line_types=(LineType.NONE, LineType.STRIPED), width=10)) 94 | road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"]) 95 | self.road = road 96 | 97 | def _make_vehicles(self) -> None: 98 | road = self.road 99 | ego_vehicle = self.action_type.vehicle_class( 100 | road, road.network.get_lane(("c", "d", 0)).position(50, -4), 101 | heading=road.network.get_lane(("c", "d", 0)).heading_at(0), 102 | speed=8.3) 103 | road.vehicles.append(ego_vehicle) 104 | self.vehicle = ego_vehicle 105 | 106 | @property 107 | def dynamics(self) -> BicycleVehicle: 108 | return self.vehicle 109 | 110 | @property 111 | def state(self) -> np.ndarray: 112 | if not self.vehicle: 113 | return np.zeros((4, 1)) 114 | return self.vehicle.state[[1, 2, 4, 5]] + \ 115 | self.np_random.uniform(low=-self.config["state_noise"], 116 | high=self.config["state_noise"], 117 | size=self.vehicle.state[[0, 2, 4, 5]].shape) 118 | 119 | @property 120 | def derivative(self) -> np.ndarray: 121 | if not self.vehicle: 122 | return np.zeros((4, 1)) 123 | return self.vehicle.derivative[[1, 2, 4, 5]] + \ 124 | self.np_random.uniform(low=-self.config["derivative_noise"], 125 | high=self.config["derivative_noise"], 126 | size=self.vehicle.derivative[[0, 2, 4, 5]].shape) 127 | 128 | @property 129 | def reference_state(self) -> np.ndarray: 130 | if not self.vehicle or not self.lane: 131 | return np.zeros((4, 1)) 132 | longi, lat = self.lane.local_coordinates(self.vehicle.position) 133 | psi_l = self.lane.heading_at(longi) 134 | state = self.vehicle.state[[1, 2, 4, 5]] 135 | return np.array([[state[0, 0] - lat], [psi_l], [0], [0]]) 136 | 137 | def store_data(self) -> None: 138 | if self.lpv: 139 | state = self.vehicle.state.copy() 140 | interval = [] 141 | for x_t in self.lpv.change_coordinates(self.lpv.x_i_t, back=True, interval=True): 142 | # lateral state to full state 143 | np.put(state, [1, 2, 4, 5], x_t) 144 | # full state to absolute coordinates 145 | interval.append(state.squeeze(-1).copy()) 146 | self.interval_trajectory.append(interval) 147 | self.trajectory.append(copy.deepcopy(self.vehicle.state)) 148 | 149 | 150 | register( 151 | id='lane-keeping-v0', 152 | entry_point='highway_env.envs:LaneKeepingEnv', 153 | max_episode_steps=200 154 | ) 155 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/merge_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.registration import register 3 | 4 | from highway_env import utils 5 | from highway_env.envs.common.abstract import AbstractEnv 6 | from highway_env.road.lane import LineType, StraightLane, SineLane 7 | from highway_env.road.road import Road, RoadNetwork 8 | from highway_env.vehicle.controller import ControlledVehicle 9 | from highway_env.vehicle.objects import Obstacle 10 | 11 | 12 | class MergeEnv(AbstractEnv): 13 | 14 | """ 15 | A highway merge negotiation environment. 16 | 17 | The ego-vehicle is driving on a highway and approached a merge, with some vehicles incoming on the access ramp. 18 | It is rewarded for maintaining a high speed and avoiding collisions, but also making room for merging 19 | vehicles. 20 | """ 21 | 22 | @classmethod 23 | def default_config(cls) -> dict: 24 | cfg = super().default_config() 25 | cfg.update({ 26 | "observation": { 27 | "type": "Kinematics", 28 | "absolute": True, 29 | "features_range": {"x": [-500., 500.], "y":[-8.,8.], "vx":[-80.,80.], "vy":[-80.,80.]}, 30 | }, 31 | "finish_position": [400.0, 4.0], 32 | "collision_reward": -1, 33 | "right_lane_reward": 0.1, 34 | "high_speed_reward": 0.2, 35 | "merging_speed_reward": -0.5, 36 | "lane_change_reward": -0.05, 37 | "simulation_frequency": 5, 38 | "vehicles_count": 20, 39 | "duration": 12, # [s] 40 | "ego_spacing": 1.5, 41 | }) 42 | return cfg 43 | 44 | def _reward(self, action: int) -> float: 45 | """ 46 | The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions 47 | 48 | But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed. 49 | 50 | :param action: the action performed 51 | :return: the reward of the state-action transition 52 | """ 53 | action_reward = {0: self.config["lane_change_reward"], 54 | 1: 0, 55 | 2: self.config["lane_change_reward"], 56 | 3: 0, 57 | 4: 0 } 58 | 59 | forward_speed = self.vehicle.speed * np.cos(self.vehicle.heading) 60 | scaled_speed = utils.lmap(forward_speed, [20,30], [0, 1]) 61 | reward = self.config["collision_reward"] * self.vehicle.crashed \ 62 | + self.config["right_lane_reward"] * self.vehicle.lane_index[2] / 1 \ 63 | + self.config["high_speed_reward"] * scaled_speed 64 | 65 | # Altruistic penalty 66 | for vehicle in self.road.vehicles: 67 | if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle): 68 | reward += self.config["merging_speed_reward"] * \ 69 | (vehicle.target_speed - vehicle.speed) / vehicle.target_speed 70 | 71 | return utils.lmap(reward, 72 | [self.config["collision_reward"] + self.config["merging_speed_reward"], 73 | self.config["high_speed_reward"] + self.config["right_lane_reward"]], 74 | [0, 1]) 75 | 76 | def _is_terminal(self) -> bool: 77 | """The episode is over when a collision occurs or when the access ramp has been passed.""" 78 | return self.vehicle.crashed or self.vehicle.position[0] > 370 or not self.vehicle.on_road or self.steps >= self.config["duration"] 79 | 80 | def _reset(self) -> None: 81 | self._make_road() 82 | self._make_vehicles() 83 | 84 | def _make_road(self) -> None: 85 | """ 86 | Make a road composed of a straight highway and a merging lane. 87 | 88 | :return: the road 89 | """ 90 | net = RoadNetwork() 91 | 92 | # Highway lanes 93 | ends = [150, 80, 80, 150] # Before, converging, merge, after 94 | c, s, n = LineType.CONTINUOUS_LINE, LineType.STRIPED, LineType.NONE 95 | y = [0, StraightLane.DEFAULT_WIDTH] 96 | line_type = [[c, s], [n, c]] 97 | line_type_merge = [[c, s], [n, s]] 98 | for i in range(2): 99 | net.add_lane("a", "b", StraightLane([0, y[i]], [sum(ends[:2]), y[i]], line_types=line_type[i])) 100 | net.add_lane("b", "c", StraightLane([sum(ends[:2]), y[i]], [sum(ends[:3]), y[i]], line_types=line_type_merge[i])) 101 | net.add_lane("c", "d", StraightLane([sum(ends[:3]), y[i]], [sum(ends), y[i]], line_types=line_type[i])) 102 | 103 | # Merging lane 104 | amplitude = 3.25 105 | ljk = StraightLane([0, 6.5 + 4 + 4], [ends[0], 6.5 + 4 + 4], line_types=[c, c], forbidden=True) 106 | lkb = SineLane(ljk.position(ends[0], -amplitude), ljk.position(sum(ends[:2]), -amplitude), 107 | amplitude, 2 * np.pi / (2*ends[1]), np.pi / 2, line_types=[c, c], forbidden=True) 108 | lbc = StraightLane(lkb.position(ends[1], 0), lkb.position(ends[1], 0) + [ends[2], 0], 109 | line_types=[n, c], forbidden=True) 110 | net.add_lane("j", "k", ljk) 111 | net.add_lane("k", "b", lkb) 112 | net.add_lane("b", "c", lbc) 113 | road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"]) 114 | road.objects.append(Obstacle(road, lbc.position(ends[2], 0))) 115 | self.road = road 116 | 117 | def _make_vehicles(self) -> None: 118 | """ 119 | Populate a road with several vehicles on the highway and on the merging lane, as well as an ego-vehicle. 120 | 121 | :return: the ego-vehicle 122 | """ 123 | road = self.road 124 | ego_vehicle = self.action_type.vehicle_class(road, 125 | road.network.get_lane(("a", "b", 1)).position(30, 0), 126 | speed=30) 127 | road.vehicles.append(ego_vehicle) 128 | 129 | other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"]) 130 | road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 0)).position(90, 0), speed=29)) 131 | road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 1)).position(70, 0), speed=31)) 132 | road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 0)).position(5, 0), speed=31.5)) 133 | 134 | merging_v = other_vehicles_type(road, road.network.get_lane(("j", "k", 0)).position(110, 0), speed=20) 135 | merging_v.target_speed = 30 136 | road.vehicles.append(merging_v) 137 | self.vehicle = ego_vehicle 138 | 139 | def _legal_terminal(self) -> bool: 140 | if self.steps >= self.config["duration"] * self.config["policy_frequency"]: 141 | return True 142 | else : 143 | return False 144 | 145 | register( 146 | id='merge-v0', 147 | entry_point='highway_env.envs:MergeEnv', 148 | ) 149 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/summon_env.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | import numpy as np 3 | 4 | from highway_env import utils 5 | from highway_env.envs import ParkingEnv 6 | from highway_env.road.lane import StraightLane, LineType 7 | from highway_env.road.road import Road, RoadNetwork 8 | from highway_env.vehicle.kinematics import Vehicle 9 | from highway_env.vehicle.objects import Landmark 10 | 11 | 12 | class SummonEnv(ParkingEnv): 13 | 14 | """ 15 | A continuous control environment. 16 | 17 | It implements a reach-type task, where the agent observes their position and speed and must 18 | control their acceleration and steering so as to reach a given goal. 19 | 20 | Credits to Vinny Ruia for the idea and initial implementation. 21 | """ 22 | 23 | @classmethod 24 | def default_config(cls) -> dict: 25 | config = super().default_config() 26 | config.update({ 27 | "vehicles_count": 10, 28 | "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", 29 | }) 30 | return config 31 | 32 | def _create_road(self, spots: int = 15) -> None: 33 | """ 34 | Create a road composed of straight adjacent lanes. 35 | 36 | :param spots: number of parking spots 37 | """ 38 | net = RoadNetwork() 39 | 40 | width = 4.0 41 | lt = (LineType.CONTINUOUS, LineType.CONTINUOUS) 42 | x_offset = 0 43 | y_offset = 12 44 | length = 8 45 | # Parking spots 46 | for k in range(spots): 47 | x = (k - spots // 2) * (width + x_offset) - width / 2 48 | net.add_lane("a", "b", StraightLane([x, y_offset], [x, y_offset + length], 49 | width=width, line_types=lt, speed_limit=5)) 50 | net.add_lane("b", "c", StraightLane([x, -y_offset], [x, -y_offset - length], 51 | width=width, line_types=lt, speed_limit=5)) 52 | 53 | self.spots = spots 54 | self.vehicle_starting = [x, y_offset + (length / 2)] 55 | self.num_middle_lanes = 0 56 | self.x_range = (int(spots / 2) + 1) * width 57 | 58 | # Generate the middle lane for the busy parking lot 59 | for y in np.arange(-y_offset + width, y_offset, width): 60 | net.add_lane("d", "e", StraightLane([-self.x_range, y], [self.x_range, y], 61 | width=width, 62 | line_types=(LineType.STRIPED, LineType.STRIPED), 63 | speed_limit=5)) 64 | self.num_middle_lanes += 1 65 | 66 | self.road = Road(network=net, 67 | np_random=self.np_random, 68 | record_history=self.config["show_trajectories"]) 69 | 70 | def _create_vehicles(self, parked_probability: float = 0.75) -> None: 71 | """ 72 | Create some new random vehicles of a given type, and add them on the road. 73 | 74 | :param parked_probability: probability that a spot is occupied 75 | """ 76 | 77 | self.vehicle = self.action_type.vehicle_class(self.road, 78 | self.vehicle_starting, 79 | 2 * np.pi * self.np_random.rand(), 0) 80 | self.road.vehicles.append(self.vehicle) 81 | 82 | goal_position = [self.np_random.choice([-2 * self.spots - 10, 2 * self.spots + 10]), 0] 83 | self.goal = Landmark(self.road, goal_position, heading=0) 84 | self.road.objects.append(self.goal) 85 | 86 | vehicles_type = utils.class_from_path(self.config["other_vehicles_type"]) 87 | for i in range(self.config["vehicles_count"]): 88 | is_parked = self.np_random.rand() <= parked_probability 89 | if not is_parked: 90 | # Just an effort to spread the vehicles out 91 | idx = self.np_random.randint(0, self.num_middle_lanes) 92 | longitudinal = (i * 5) - (self.x_range / 8) * self.np_random.randint(-1, 1) 93 | self.road.vehicles.append( 94 | vehicles_type.make_on_lane(self.road, ("d", "e", idx), longitudinal, speed=2)) 95 | else: 96 | lane = ("a", "b", i) if self.np_random.rand() >= 0.5 else ("b", "c", i) 97 | self.road.vehicles.append(Vehicle.make_on_lane(self.road, lane, 4, speed=0)) 98 | 99 | for v in self.road.vehicles: # Prevent early collisions 100 | if v is not self.vehicle and np.linalg.norm(v.position - self.vehicle.position) < 20: 101 | self.road.vehicles.remove(v) 102 | 103 | def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict, p: float = 0.5) -> float: 104 | """ 105 | Proximity to the goal is rewarded 106 | 107 | We use a weighted p-norm 108 | :param achieved_goal: the goal that was achieved 109 | :param desired_goal: the goal that was desired 110 | :param info: any supplementary information 111 | :param p: the Lp^p norm used in the reward. Use p<1 to have high kurtosis for rewards in [0, 1] 112 | :return: the corresponding reward 113 | """ 114 | return super().compute_reward(achieved_goal, desired_goal, info, p) + \ 115 | self.config["collision_reward"] * self.vehicle.crashed 116 | 117 | 118 | class SummonEnvActionRepeat(SummonEnv): 119 | def __init__(self): 120 | super().__init__() 121 | self.configure({"policy_frequency": 1}) 122 | 123 | 124 | register( 125 | id='summon-v0', 126 | entry_point='highway_env.envs:SummonEnv', 127 | max_episode_steps=100 128 | ) 129 | 130 | register( 131 | id='summon-ActionRepeat-v0', 132 | entry_point='highway_env.envs:SummonEnvActionRepeat', 133 | max_episode_steps=20 134 | ) 135 | -------------------------------------------------------------------------------- /highway_modify/highway_env/envs/two_way_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym.envs.registration import register 3 | 4 | from highway_env import utils 5 | from highway_env.envs.common.abstract import AbstractEnv 6 | from highway_env.road.lane import LineType, StraightLane 7 | from highway_env.road.road import Road, RoadNetwork 8 | from highway_env.vehicle.controller import MDPVehicle 9 | 10 | 11 | class TwoWayEnv(AbstractEnv): 12 | 13 | """ 14 | A risk management task: the agent is driving on a two-way lane with icoming traffic. 15 | 16 | It must balance making progress by overtaking and ensuring safety. 17 | 18 | These conflicting objectives are implemented by a reward signal and a constraint signal, 19 | in the CMDP/BMDP framework. 20 | """ 21 | 22 | @classmethod 23 | def default_config(cls) -> dict: 24 | config = super().default_config() 25 | config.update({ 26 | "observation": { 27 | "type": "TimeToCollision", 28 | "horizon": 5 29 | }, 30 | "action": { 31 | "type": "DiscreteMetaAction", 32 | }, 33 | "collision_reward": 0, 34 | "left_lane_constraint": 1, 35 | "left_lane_reward": 0.2, 36 | "high_speed_reward": 0.8, 37 | }) 38 | return config 39 | 40 | def _reward(self, action: int) -> float: 41 | """ 42 | The vehicle is rewarded for driving with high speed 43 | :param action: the action performed 44 | :return: the reward of the state-action transition 45 | """ 46 | neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index) 47 | 48 | reward = self.config["high_speed_reward"] * self.vehicle.speed_index / (self.vehicle.target_speeds.size - 1) \ 49 | + self.config["left_lane_reward"] \ 50 | * (len(neighbours) - 1 - self.vehicle.target_lane_index[2]) / (len(neighbours) - 1) 51 | return reward 52 | 53 | def _is_terminal(self) -> bool: 54 | """The episode is over if the ego vehicle crashed or the time is out.""" 55 | return self.vehicle.crashed 56 | 57 | def _cost(self, action: int) -> float: 58 | """The constraint signal is the time spent driving on the opposite lane, and occurrence of collisions.""" 59 | return float(self.vehicle.crashed) + float(self.vehicle.lane_index[2] == 0)/15 60 | 61 | def _reset(self) -> np.ndarray: 62 | self._make_road() 63 | self._make_vehicles() 64 | 65 | def _make_road(self, length=800): 66 | """ 67 | Make a road composed of a two-way road. 68 | 69 | :return: the road 70 | """ 71 | net = RoadNetwork() 72 | 73 | # Lanes 74 | net.add_lane("a", "b", StraightLane([0, 0], [length, 0], 75 | line_types=(LineType.CONTINUOUS_LINE, LineType.STRIPED))) 76 | net.add_lane("a", "b", StraightLane([0, StraightLane.DEFAULT_WIDTH], [length, StraightLane.DEFAULT_WIDTH], 77 | line_types=(LineType.NONE, LineType.CONTINUOUS_LINE))) 78 | net.add_lane("b", "a", StraightLane([length, 0], [0, 0], 79 | line_types=(LineType.NONE, LineType.NONE))) 80 | 81 | road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"]) 82 | self.road = road 83 | 84 | def _make_vehicles(self) -> None: 85 | """ 86 | Populate a road with several vehicles on the road 87 | 88 | :return: the ego-vehicle 89 | """ 90 | road = self.road 91 | ego_vehicle = self.action_type.vehicle_class(road, 92 | road.network.get_lane(("a", "b", 1)).position(30, 0), 93 | speed=30) 94 | road.vehicles.append(ego_vehicle) 95 | self.vehicle = ego_vehicle 96 | 97 | vehicles_type = utils.class_from_path(self.config["other_vehicles_type"]) 98 | for i in range(3): 99 | self.road.vehicles.append( 100 | vehicles_type(road, 101 | position=road.network.get_lane(("a", "b", 1)) 102 | .position(70+40*i + 10*self.np_random.randn(), 0), 103 | heading=road.network.get_lane(("a", "b", 1)).heading_at(70+40*i), 104 | speed=24 + 2*self.np_random.randn(), 105 | enable_lane_change=False) 106 | ) 107 | for i in range(2): 108 | v = vehicles_type(road, 109 | position=road.network.get_lane(("b", "a", 0)) 110 | .position(200+100*i + 10*self.np_random.randn(), 0), 111 | heading=road.network.get_lane(("b", "a", 0)).heading_at(200+100*i), 112 | speed=20 + 5*self.np_random.randn(), 113 | enable_lane_change=False) 114 | v.target_lane_index = ("b", "a", 0) 115 | self.road.vehicles.append(v) 116 | 117 | 118 | register( 119 | id='two-way-v0', 120 | entry_point='highway_env.envs:TwoWayEnv', 121 | max_episode_steps=15 122 | ) 123 | -------------------------------------------------------------------------------- /highway_modify/highway_env/road/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/road/__init__.py -------------------------------------------------------------------------------- /highway_modify/highway_env/road/regulation.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import numpy as np 4 | 5 | from highway_env import utils 6 | from highway_env.road.road import Road, RoadNetwork 7 | from highway_env.vehicle.controller import ControlledVehicle, MDPVehicle 8 | from highway_env.vehicle.kinematics import Vehicle, Obstacle 9 | 10 | 11 | class RegulatedRoad(Road): 12 | YIELDING_COLOR: Tuple[float, float, float] = None 13 | REGULATION_FREQUENCY: int = 2 14 | YIELD_DURATION: float = 0. 15 | 16 | def __init__(self, network: RoadNetwork = None, vehicles: List[Vehicle] = None, obstacles: List[Obstacle] = None, 17 | np_random: np.random.RandomState = None, record_history: bool = False) -> None: 18 | super().__init__(network, vehicles, obstacles, np_random, record_history) 19 | self.steps = 0 20 | 21 | def step(self, dt: float) -> None: 22 | self.steps += 1 23 | if self.steps % int(1 / dt / self.REGULATION_FREQUENCY) == 0: 24 | self.enforce_road_rules() 25 | return super().step(dt) 26 | 27 | def enforce_road_rules(self) -> None: 28 | """Find conflicts and resolve them by assigning yielding vehicles and stopping them.""" 29 | 30 | # Unfreeze previous yielding vehicles 31 | for v in self.vehicles: 32 | if getattr(v, "is_yielding", False): 33 | if v.yield_timer >= self.YIELD_DURATION * self.REGULATION_FREQUENCY: 34 | v.target_speed = v.lane.speed_limit 35 | delattr(v, "color") 36 | v.is_yielding = False 37 | else: 38 | v.yield_timer += 1 39 | 40 | # Find new conflicts and resolve them 41 | for i in range(len(self.vehicles) - 1): 42 | for j in range(i+1, len(self.vehicles)): 43 | if self.is_conflict_possible(self.vehicles[i], self.vehicles[j]): 44 | yielding_vehicle = self.respect_priorities(self.vehicles[i], self.vehicles[j]) 45 | if yielding_vehicle is not None and \ 46 | isinstance(yielding_vehicle, ControlledVehicle) and \ 47 | not isinstance(yielding_vehicle, MDPVehicle): 48 | yielding_vehicle.color = self.YIELDING_COLOR 49 | yielding_vehicle.target_speed = 0 50 | yielding_vehicle.is_yielding = True 51 | yielding_vehicle.yield_timer = 0 52 | 53 | @staticmethod 54 | def respect_priorities(v1: Vehicle, v2: Vehicle) -> Vehicle: 55 | """ 56 | Resolve a conflict between two vehicles by determining who should yield 57 | 58 | :param v1: first vehicle 59 | :param v2: second vehicle 60 | :return: the yielding vehicle 61 | """ 62 | if v1.lane.priority > v2.lane.priority: 63 | return v2 64 | elif v1.lane.priority < v2.lane.priority: 65 | return v1 66 | else: # The vehicle behind should yield 67 | return v1 if v1.front_distance_to(v2) > v2.front_distance_to(v1) else v2 68 | 69 | @staticmethod 70 | def is_conflict_possible(v1: ControlledVehicle, v2: ControlledVehicle, horizon: int = 3, step: float = 0.25) -> bool: 71 | times = np.arange(step, horizon, step) 72 | positions_1, headings_1 = v1.predict_trajectory_constant_speed(times) 73 | positions_2, headings_2 = v2.predict_trajectory_constant_speed(times) 74 | 75 | for position_1, heading_1, position_2, heading_2 in zip(positions_1, headings_1, positions_2, headings_2): 76 | # Fast spherical pre-check 77 | if np.linalg.norm(position_2 - position_1) > v1.LENGTH: 78 | continue 79 | 80 | # Accurate rectangular check 81 | if utils.rotated_rectangles_intersect((position_1, 1.5*v1.LENGTH, 0.9*v1.WIDTH, heading_1), 82 | (position_2, 1.5*v2.LENGTH, 0.9*v2.WIDTH, heading_2)): 83 | return True 84 | -------------------------------------------------------------------------------- /highway_modify/highway_env/road/spline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import interpolate 3 | from typing import List, Tuple 4 | 5 | 6 | class LinearSpline2D: 7 | """ 8 | Piece-wise linear curve fitted to a list of points. 9 | """ 10 | 11 | PARAM_CURVE_SAMPLE_DISTANCE: int = 1 # curve samples are placed 1m apart 12 | 13 | def __init__(self, points: List[Tuple[float, float]]): 14 | x_values = np.array([pt[0] for pt in points]) 15 | y_values = np.array([pt[1] for pt in points]) 16 | x_values_diff = np.diff(x_values) 17 | x_values_diff = np.hstack((x_values_diff, x_values_diff[-1])) 18 | y_values_diff = np.diff(y_values) 19 | y_values_diff = np.hstack((y_values_diff, y_values_diff[-1])) 20 | arc_length_cumulated = np.hstack( 21 | (0, np.cumsum(np.sqrt(x_values_diff[:-1] ** 2 + y_values_diff[:-1] ** 2))) 22 | ) 23 | self.length = arc_length_cumulated[-1] 24 | self.x_curve = interpolate.interp1d( 25 | arc_length_cumulated, x_values, fill_value="extrapolate" 26 | ) 27 | self.y_curve = interpolate.interp1d( 28 | arc_length_cumulated, y_values, fill_value="extrapolate" 29 | ) 30 | self.dx_curve = interpolate.interp1d( 31 | arc_length_cumulated, x_values_diff, fill_value="extrapolate" 32 | ) 33 | self.dy_curve = interpolate.interp1d( 34 | arc_length_cumulated, y_values_diff, fill_value="extrapolate" 35 | ) 36 | 37 | (self.s_samples, self.poses) = self.sample_curve( 38 | self.x_curve, self.y_curve, self.length, self.PARAM_CURVE_SAMPLE_DISTANCE 39 | ) 40 | 41 | def __call__(self, lon: float) -> Tuple[float, float]: 42 | return self.x_curve(lon), self.y_curve(lon) 43 | 44 | def get_dx_dy(self, lon: float) -> Tuple[float, float]: 45 | idx_pose = self._get_idx_segment_for_lon(lon) 46 | pose = self.poses[idx_pose] 47 | return pose.normal 48 | 49 | def cartesian_to_frenet(self, position: Tuple[float, float]) -> Tuple[float, float]: 50 | """ 51 | Transform the point in Cartesian coordinates into Frenet coordinates of the curve 52 | """ 53 | 54 | pose = self.poses[-1] 55 | projection = pose.project_onto_normal(position) 56 | if projection >= 0: 57 | lon = self.s_samples[-1] + projection 58 | lat = pose.project_onto_orthonormal(position) 59 | return lon, lat 60 | 61 | for idx in list(range(len(self.s_samples) - 1))[::-1]: 62 | pose = self.poses[idx] 63 | projection = pose.project_onto_normal(position) 64 | if projection >= 0: 65 | if projection < pose.distance_to_origin(position): 66 | lon = self.s_samples[idx] + projection 67 | lat = pose.project_onto_orthonormal(position) 68 | return lon, lat 69 | else: 70 | ValueError("No valid projection could be found") 71 | pose = self.poses[0] 72 | lon = pose.project_onto_normal(position) 73 | lat = pose.project_onto_orthonormal(position) 74 | return lon, lat 75 | 76 | def frenet_to_cartesian(self, lon: float, lat: float) -> Tuple[float, float]: 77 | """ 78 | Convert the point from Frenet coordinates of the curve into Cartesian coordinates 79 | """ 80 | idx_segment = self._get_idx_segment_for_lon(lon) 81 | s = lon - self.s_samples[idx_segment] 82 | pose = self.poses[idx_segment] 83 | point = pose.position + s * pose.normal 84 | point += lat * pose.orthonormal 85 | return point 86 | 87 | def _get_idx_segment_for_lon(self, lon: float) -> int: 88 | """ 89 | Returns the index of the curve pose that corresponds to the longitudinal coordinate 90 | """ 91 | idx_smaller = np.argwhere(lon < self.s_samples) 92 | if len(idx_smaller) == 0: 93 | return len(self.s_samples) - 1 94 | if idx_smaller[0] == 0: 95 | return 0 96 | return int(idx_smaller[0]) - 1 97 | 98 | @staticmethod 99 | def sample_curve(x_curve, y_curve, length: float, CURVE_SAMPLE_DISTANCE=1): 100 | """ 101 | Create samples of the curve that are CURVE_SAMPLE_DISTANCE apart. These samples are used for Frenet to Cartesian 102 | conversion and vice versa 103 | """ 104 | num_samples = np.floor(length / CURVE_SAMPLE_DISTANCE) 105 | s_values = np.hstack( 106 | (CURVE_SAMPLE_DISTANCE * np.arange(0, int(num_samples) + 1), length) 107 | ) 108 | x_values = x_curve(s_values) 109 | y_values = y_curve(s_values) 110 | dx_values = np.diff(x_values) 111 | dx_values = np.hstack((dx_values, dx_values[-1])) 112 | dy_values = np.diff(y_values) 113 | dy_values = np.hstack((dy_values, dy_values[-1])) 114 | 115 | poses = [ 116 | CurvePose(x, y, dx, dy) 117 | for x, y, dx, dy in zip(x_values, y_values, dx_values, dy_values) 118 | ] 119 | 120 | return s_values, poses 121 | 122 | 123 | class CurvePose: 124 | """ 125 | Sample pose on a curve that is used for Frenet to Cartesian conversion 126 | """ 127 | 128 | def __init__(self, x: float, y: float, dx: float, dy: float): 129 | self.length = np.sqrt(dx**2 + dy**2) 130 | self.position = np.array([x, y]).flatten() 131 | self.normal = np.array([dx, dy]).flatten() / self.length 132 | self.orthonormal = np.array([-self.normal[1], self.normal[0]]).flatten() 133 | 134 | def distance_to_origin(self, point: Tuple[float, float]) -> float: 135 | """ 136 | Compute the distance between the point [x, y] and the pose origin 137 | """ 138 | return np.sqrt(np.sum((self.position - point) ** 2)) 139 | 140 | def project_onto_normal(self, point: Tuple[float, float]) -> float: 141 | """ 142 | Compute the longitudinal distance from pose origin to point by projecting the point onto the normal vector of the pose 143 | """ 144 | return self.normal.dot(point - self.position) 145 | 146 | def project_onto_orthonormal(self, point: Tuple[float, float]) -> float: 147 | """ 148 | Compute the lateral distance from pose origin to point by projecting the point onto the orthonormal vector of the pose 149 | """ 150 | return self.orthonormal.dot(point - self.position) 151 | -------------------------------------------------------------------------------- /highway_modify/highway_env/vehicle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/vehicle/__init__.py -------------------------------------------------------------------------------- /highway_modify/highway_env/vehicle/uncertainty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/vehicle/uncertainty/__init__.py -------------------------------------------------------------------------------- /highway_modify/highway_env/vehicle/uncertainty/estimation.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Union 2 | 3 | import numpy as np 4 | 5 | from highway_env.road.road import Road, LaneIndex, Route 6 | from highway_env.utils import confidence_polytope, is_consistent_dataset, Vector 7 | from highway_env.vehicle.behavior import LinearVehicle 8 | from highway_env.vehicle.uncertainty.prediction import IntervalVehicle, Polytope 9 | 10 | 11 | class RegressionVehicle(IntervalVehicle): 12 | 13 | """Estimator for the parameter of a LinearVehicle.""" 14 | 15 | def longitudinal_matrix_polytope(self) -> Polytope: 16 | return self.polytope_from_estimation(self.data["longitudinal"], self.theta_a_i, self.longitudinal_structure) 17 | 18 | def lateral_matrix_polytope(self) -> Polytope: 19 | return self.polytope_from_estimation(self.data["lateral"], self.theta_b_i, self.lateral_structure) 20 | 21 | def polytope_from_estimation(self, data: dict, parameter_box: np.ndarray, structure: Callable[[], Polytope])\ 22 | -> Polytope: 23 | if not data: 24 | return self.parameter_box_to_polytope(parameter_box, structure) 25 | theta_n_lambda, d_theta, _, _ = confidence_polytope(data, parameter_box=parameter_box) 26 | a, phi = structure() 27 | a0 = a + np.tensordot(theta_n_lambda, phi, axes=[0, 0]) 28 | da = [np.tensordot(d_theta_k, phi, axes=[0, 0]) for d_theta_k in d_theta] 29 | return a0, da 30 | 31 | 32 | class MultipleModelVehicle(LinearVehicle): 33 | def __init__(self, road: Road, 34 | position: Vector, 35 | heading: float = 0, 36 | speed: float = 0, 37 | target_lane_index: LaneIndex = None, 38 | target_speed: float = None, 39 | route: Route = None, 40 | enable_lane_change: bool = True, 41 | timer: bool = None, 42 | data: dict = None) -> None: 43 | super().__init__(road, position, heading, speed, target_lane_index, target_speed, route, 44 | enable_lane_change, timer, data) 45 | if not self.data: 46 | self.data = [] 47 | 48 | def act(self, action: Union[dict, str] = None) -> None: 49 | if self.collecting_data: 50 | self.update_possible_routes() 51 | super().act(action) 52 | 53 | def collect_data(self) -> None: 54 | """Collect the features for each possible route, and true observed outputs.""" 55 | for route, data in self.data: 56 | self.add_features(data, route[0], output_lane=self.target_lane_index) 57 | 58 | def update_possible_routes(self) -> None: 59 | """ 60 | Update a list of possible routes that this vehicle could be following. 61 | 62 | - Add routes at the next intersection 63 | - Step the current lane in each route 64 | - Reject inconsistent routes 65 | """ 66 | 67 | for route in self.get_routes_at_intersection(): # Candidates 68 | # Unknown lane -> first lane 69 | for i, lane_index in enumerate(route): 70 | route[i] = lane_index if lane_index[2] is not None else (lane_index[0], lane_index[1], 0) 71 | # Is this route already considered, or a suffix of a route already considered ? 72 | for known_route, _ in self.data: 73 | if known_route == route: 74 | break 75 | elif len(known_route) < len(route) and route[:len(known_route)] == known_route: 76 | self.data = [(r, d) if r != known_route else (route, d) for r, d in self.data] 77 | break 78 | else: 79 | self.data.append((route.copy(), {})) # Add it 80 | 81 | # Step the lane being followed in each possible route 82 | for route, _ in self.data: 83 | if self.road.network.get_lane(route[0]).after_end(self.position): 84 | route.pop(0) 85 | 86 | # Reject inconsistent hypotheses 87 | for route, data in self.data.copy(): 88 | if data: 89 | if not is_consistent_dataset(data["lateral"], parameter_box=LinearVehicle.STEERING_RANGE): 90 | self.data.remove((route, data)) 91 | 92 | def assume_model_is_valid(self, index: int) -> "LinearVehicle": 93 | """ 94 | Get a copy of this vehicle behaving according to one of its possible routes. 95 | 96 | :param index: index of the route to consider 97 | :return: a copy of the vehicle 98 | """ 99 | if not self.data: 100 | return self.create_from(self) 101 | index = min(index, len(self.data)-1) 102 | route, data = self.data[index] 103 | vehicle = RegressionVehicle.create_from(self) 104 | vehicle.target_lane_index = route[0] 105 | vehicle.route = route 106 | vehicle.data = data 107 | return vehicle 108 | -------------------------------------------------------------------------------- /highway_modify/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /highway_modify/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name=highway-env 3 | version=1.5 4 | author=Edouard Leurent 5 | author_email=eleurent@gmail.com 6 | description=An environment for simulated highway driving tasks. 7 | long_description=file:README.md 8 | long_description_content_type=text/markdown 9 | url=https://github.com/eleurent/highway-env 10 | license=MIT 11 | classifiers= 12 | Development Status :: 5 - Production/Stable 13 | Programming Language :: Python 14 | Programming Language :: Python :: 3 :: Only 15 | Programming Language :: Python :: 3.8 16 | License :: OSI Approved :: MIT License 17 | 18 | 19 | [options] 20 | setup_requires= 21 | pytest-runner 22 | install_requires= 23 | gym 24 | numpy 25 | pygame>=2.0.2 26 | matplotlib 27 | pandas 28 | scipy 29 | packages=find: 30 | tests_require= 31 | pytest 32 | 33 | [options.extras_require] 34 | deploy = pytest-runner; sphinx<1.7.3; sphinx_rtd_theme 35 | 36 | [options.packages.find] 37 | exclude = 38 | tests 39 | docs 40 | scripts 41 | 42 | 43 | [aliases] 44 | test=pytest 45 | -------------------------------------------------------------------------------- /highway_modify/setup.py: -------------------------------------------------------------------------------- 1 | # Following PEP 517/518, this file should not not needed and replaced instead by the setup.cfg file and pyproject.toml. 2 | # Unfortunately it is still required py the pip editable mode `pip install -e` 3 | # See https://stackoverflow.com/a/60885212 4 | 5 | from setuptools import setup 6 | 7 | if __name__ == "__main__": 8 | setup() -------------------------------------------------------------------------------- /highway_modify/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/tests/__init__.py -------------------------------------------------------------------------------- /highway_modify/tests/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/tests/envs/__init__.py -------------------------------------------------------------------------------- /highway_modify/tests/envs/test_actions.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import pytest 3 | 4 | import highway_env 5 | 6 | action_configs = [ 7 | {"type": "ContinuousAction"}, 8 | {"type": "DiscreteAction"}, 9 | {"type": "DiscreteMetaAction"}, 10 | ] 11 | 12 | 13 | @pytest.mark.parametrize("action_config", action_configs) 14 | def test_action_type(action_config): 15 | env = gym.make("highway-v0") 16 | env.configure({"action": action_config}) 17 | env.reset() 18 | for _ in range(3): 19 | action = env.action_space.sample() 20 | obs, _, _, _ = env.step(action) 21 | assert env.action_space.contains(action) 22 | assert env.observation_space.contains(obs) 23 | env.close() -------------------------------------------------------------------------------- /highway_modify/tests/envs/test_env_preprocessors.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | import highway_env 4 | 5 | 6 | def test_preprocessors(): 7 | env = gym.make('highway-v0') 8 | env = env.simplify() 9 | env = env.change_vehicles("highway_env.vehicle.behavior.IDMVehicle") 10 | env = env.set_preferred_lane(0) 11 | env = env.set_route_at_intersection("random") 12 | env = env.set_vehicle_field(("crashed", False)) 13 | env = env.call_vehicle_method(("plan_route_to", "1")) 14 | env = env.randomize_behavior() 15 | 16 | env.reset() 17 | for _ in range(3): 18 | action = env.action_space.sample() 19 | obs, reward, _, _ = env.step(action) 20 | env.close() 21 | 22 | assert env.observation_space.contains(obs) 23 | assert 0 <= reward <= 1 24 | 25 | -------------------------------------------------------------------------------- /highway_modify/tests/envs/test_gym.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import pytest 3 | 4 | import highway_env 5 | 6 | envs = [ 7 | "highway-v0", 8 | "merge-v0", 9 | "roundabout-v0", 10 | "intersection-v0", 11 | "intersection-v1", 12 | "parking-v0", 13 | "summon-v0", 14 | "two-way-v0", 15 | "lane-keeping-v0", 16 | "racetrack-v0", 17 | ] 18 | 19 | 20 | @pytest.mark.parametrize("env_spec", envs) 21 | def test_env_step(env_spec): 22 | env = gym.make(env_spec) 23 | 24 | env.reset() 25 | for _ in range(3): 26 | action = env.action_space.sample() 27 | obs, _, _, _ = env.step(action) 28 | env.close() 29 | 30 | assert env.observation_space.contains(obs) 31 | 32 | -------------------------------------------------------------------------------- /highway_modify/tests/envs/test_time.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import timeit 3 | import gym 4 | 5 | import highway_env 6 | 7 | 8 | def wrapper(func, *args, **kwargs): 9 | def wrapped(): 10 | return func(*args, **kwargs) 11 | return wrapped 12 | 13 | 14 | def time_env(env_name, steps=20): 15 | env = gym.make(env_name) 16 | env.reset() 17 | for _ in range(steps): 18 | _, _, done, _ = env.step(env.action_space.sample()) 19 | env.reset() if done else _ 20 | env.close() 21 | 22 | 23 | def test_running_time(repeat=1): 24 | for env_name, steps in [ 25 | ("highway-v0", 10), 26 | ("highway-fast-v0", 10), 27 | ("parking-v0", 20) 28 | ]: 29 | env_time = wrapper(time_env, env_name, steps) 30 | time_spent = timeit.timeit(env_time, number=repeat) / repeat 31 | env = gym.make(env_name) 32 | time_simulated = steps / env.unwrapped.config["policy_frequency"] 33 | real_time_ratio = time_simulated / time_spent 34 | print("Real time ratio for {}: {}".format(env_name, real_time_ratio)) 35 | assert real_time_ratio > 0.5 # let's not be too ambitious for now 36 | 37 | 38 | if __name__ == "__main__": 39 | test_running_time() 40 | -------------------------------------------------------------------------------- /highway_modify/tests/graphics/test_render.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import pytest 4 | 5 | import highway_env 6 | 7 | envs = ["highway-v0", "merge-v0"] 8 | 9 | 10 | @pytest.mark.parametrize("env_spec", envs) 11 | def test_render(env_spec): 12 | env = gym.make(env_spec) 13 | env.configure({"offscreen_rendering": True}) 14 | img = env.render(mode="rgb_array") 15 | env.close() 16 | assert isinstance(img, np.ndarray) 17 | assert img.shape == (env.config["screen_height"], env.config["screen_width"], 3) # (H,W,C) 18 | 19 | 20 | @pytest.mark.parametrize("env_spec", envs) 21 | def test_obs_grayscale(env_spec, stack_size=4): 22 | env = gym.make(env_spec) 23 | env.configure({ 24 | "offscreen_rendering": True, 25 | "observation": { 26 | "type": "GrayscaleObservation", 27 | "observation_shape": (env.config["screen_width"], env.config["screen_height"]), 28 | "stack_size": stack_size, 29 | "weights": [0.2989, 0.5870, 0.1140], 30 | } 31 | }) 32 | obs = env.reset() 33 | env.close() 34 | assert isinstance(obs, np.ndarray) 35 | assert obs.shape == (stack_size, env.config["screen_width"], env.config["screen_height"]) 36 | -------------------------------------------------------------------------------- /highway_modify/tests/road/test_road.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from highway_env.road.lane import StraightLane, CircularLane, PolyLane 5 | from highway_env.road.road import Road, RoadNetwork 6 | from highway_env.vehicle.controller import ControlledVehicle 7 | 8 | 9 | @pytest.fixture 10 | def net() -> RoadNetwork: 11 | # Diamond 12 | net = RoadNetwork() 13 | net.add_lane(0, 1, StraightLane([0, 0], [10, 0])) 14 | net.add_lane(1, 2, StraightLane([10, 0], [5, 5])) 15 | net.add_lane(2, 0, StraightLane([5, 5], [0, 0])) 16 | net.add_lane(1, 3, StraightLane([10, 0], [5, -5])) 17 | net.add_lane(3, 0, StraightLane([5, -5], [0, 0])) 18 | print(net.graph) 19 | 20 | return net 21 | 22 | 23 | def test_network(net): 24 | # Road 25 | road = Road(network=net) 26 | v = ControlledVehicle(road, [5, 0], heading=0, target_speed=2) 27 | road.vehicles.append(v) 28 | assert v.lane_index == (0, 1, 0) 29 | 30 | # Lane changes 31 | dt = 1/15 32 | lane_index = v.target_lane_index 33 | lane_changes = 0 34 | for _ in range(int(20/dt)): 35 | road.act() 36 | road.step(dt) 37 | if lane_index != v.target_lane_index: 38 | lane_index = v.target_lane_index 39 | lane_changes += 1 40 | assert lane_changes >= 3 41 | 42 | 43 | def test_network_to_from_config(net): 44 | config_dict = net.to_config() 45 | net_2 = RoadNetwork.from_config(config_dict) 46 | assert len(net.graph) == len(net_2.graph) 47 | 48 | 49 | def test_polylane(): 50 | lane = CircularLane( 51 | center=[0, 0], 52 | radius=10, 53 | start_phase=0, 54 | end_phase=3.14, 55 | ) 56 | 57 | num_samples = int(lane.length / 5) 58 | sampled_centreline = [ 59 | lane.position(longitudinal=lon, lateral=0) 60 | for lon in np.linspace(0, lane.length, num_samples) 61 | ] 62 | sampled_left_boundary = [ 63 | lane.position(longitudinal=lon, lateral=0.5 * lane.width_at(longitudinal=lon)) 64 | for lon in np.linspace(0, lane.length, num_samples) 65 | ] 66 | sampled_right_boundary = [ 67 | lane.position(longitudinal=lon, lateral=-0.5 * lane.width_at(longitudinal=lon)) 68 | for lon in np.linspace(0, lane.length, num_samples) 69 | ] 70 | polylane = PolyLane( 71 | lane_points=sampled_centreline, 72 | left_boundary_points=sampled_left_boundary, 73 | right_boundary_points=sampled_right_boundary, 74 | ) 75 | 76 | # sample boundaries from both lanes and assert equal 77 | 78 | num_samples = int(lane.length / 3) 79 | # original lane 80 | sampled_centreline = [ 81 | lane.position(longitudinal=lon, lateral=0) 82 | for lon in np.linspace(0, lane.length, num_samples) 83 | ] 84 | sampled_left_boundary = [ 85 | lane.position(longitudinal=lon, lateral=0.5 * lane.width_at(longitudinal=lon)) 86 | for lon in np.linspace(0, lane.length, num_samples) 87 | ] 88 | sampled_right_boundary = [ 89 | lane.position(longitudinal=lon, lateral=-0.5 * lane.width_at(longitudinal=lon)) 90 | for lon in np.linspace(0, lane.length, num_samples) 91 | ] 92 | 93 | # polylane 94 | polylane_sampled_centreline = [ 95 | polylane.position(longitudinal=lon, lateral=0) 96 | for lon in np.linspace(0, polylane.length, num_samples) 97 | ] 98 | polylane_sampled_left_boundary = [ 99 | polylane.position( 100 | longitudinal=lon, lateral=0.5 * polylane.width_at(longitudinal=lon) 101 | ) 102 | for lon in np.linspace(0, polylane.length, num_samples) 103 | ] 104 | polylane_sampled_right_boundary = [ 105 | polylane.position( 106 | longitudinal=lon, lateral=-0.5 * polylane.width_at(longitudinal=lon) 107 | ) 108 | for lon in np.linspace(0, polylane.length, num_samples) 109 | ] 110 | 111 | # assert equal (very coarse because of coarse sampling) 112 | assert all( 113 | np.linalg.norm( 114 | np.array(sampled_centreline) - np.array(polylane_sampled_centreline), axis=1 115 | ) 116 | < 0.7 117 | ) 118 | assert all( 119 | np.linalg.norm( 120 | np.array(sampled_left_boundary) - np.array(polylane_sampled_left_boundary), 121 | axis=1, 122 | ) 123 | < 0.7 124 | ) 125 | assert all( 126 | np.linalg.norm( 127 | np.array(sampled_right_boundary) 128 | - np.array(polylane_sampled_right_boundary), 129 | axis=1, 130 | ) 131 | < 0.7 132 | ) 133 | -------------------------------------------------------------------------------- /highway_modify/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from highway_env.utils import rotated_rectangles_intersect 4 | 5 | 6 | def test_rotated_rectangles_intersect(): 7 | assert rotated_rectangles_intersect(([12.86076812, 28.60182391], 5.0, 2.0, -0.4675779906495494), 8 | ([9.67753944, 28.90585412], 5.0, 2.0, -0.3417019364473201)) 9 | assert rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([0, 1], 2, 1, 0)) 10 | assert not rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([0, 2.1], 2, 1, 0)) 11 | assert not rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([1, 1.1], 2, 1, 0)) 12 | assert rotated_rectangles_intersect(([0, 0], 2, 1, np.pi/4), ([1, 1.1], 2, 1, 0)) 13 | -------------------------------------------------------------------------------- /highway_modify/tests/vehicle/test_behavior.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from highway_env.vehicle.objects import Obstacle 4 | from highway_env.road.road import Road, RoadNetwork 5 | from highway_env.vehicle.behavior import IDMVehicle, LinearVehicle 6 | 7 | FPS = 15 8 | vehicle_types = [IDMVehicle, LinearVehicle] 9 | 10 | 11 | @pytest.mark.parametrize("vehicle_type", vehicle_types) 12 | def test_stop_before_obstacle(vehicle_type): 13 | road = Road(RoadNetwork.straight_road_network(lanes=1)) 14 | vehicle = vehicle_type(road=road, position=[0, 0], speed=20, heading=0) 15 | obstacle = Obstacle(road=road, position=[80, 0]) 16 | road.vehicles.append(vehicle) 17 | road.objects.append(obstacle) 18 | for _ in range(10 * FPS): 19 | road.act() 20 | road.step(dt=1/FPS) 21 | assert not vehicle.crashed 22 | assert vehicle.position[0] == pytest.approx(obstacle.position[0] - vehicle_type.DISTANCE_WANTED, abs=1) 23 | assert vehicle.position[1] == pytest.approx(0) 24 | assert vehicle.speed == pytest.approx(0, abs=1) 25 | assert vehicle.heading == pytest.approx(0) 26 | -------------------------------------------------------------------------------- /highway_modify/tests/vehicle/test_control.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from highway_env.road.lane import StraightLane 4 | from highway_env.road.road import Road, RoadNetwork 5 | from highway_env.vehicle.controller import ControlledVehicle 6 | 7 | FPS = 15 8 | 9 | 10 | def test_step(): 11 | v = ControlledVehicle(road=None, position=[0, 0], speed=20, heading=0) 12 | for _ in range(2 * FPS): 13 | v.step(dt=1/FPS) 14 | assert v.position[0] == pytest.approx(40) 15 | assert v.position[1] == pytest.approx(0) 16 | assert v.speed == pytest.approx(20) 17 | assert v.heading == pytest.approx(0) 18 | 19 | 20 | def test_lane_change(): 21 | road = Road(RoadNetwork.straight_road_network(2)) 22 | v = ControlledVehicle(road=road, position=road.network.get_lane(("0", "1", 0)).position(0, 0), speed=20, heading=0) 23 | v.act('LANE_RIGHT') 24 | for _ in range(3 * FPS): 25 | v.act() 26 | v.step(dt=1/FPS) 27 | assert v.speed == pytest.approx(20) 28 | assert v.position[1] == pytest.approx(StraightLane.DEFAULT_WIDTH, abs=StraightLane.DEFAULT_WIDTH/4) 29 | assert v.lane_index[2] == 1 30 | 31 | 32 | def test_speed_control(): 33 | road = Road(RoadNetwork.straight_road_network(1)) 34 | v = ControlledVehicle(road=road, position=road.network.get_lane(("0", "1", 0)).position(0, 0), speed=20, heading=0) 35 | v.act('FASTER') 36 | for _ in range(int(3 * v.TAU_ACC * FPS)): 37 | v.act() 38 | v.step(dt=1/FPS) 39 | assert v.speed == pytest.approx(20 + v.DELTA_SPEED, abs=0.5) 40 | assert v.position[1] == pytest.approx(0) 41 | assert v.lane_index[2] == 0 42 | -------------------------------------------------------------------------------- /highway_modify/tests/vehicle/test_dynamics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from highway_env.road.road import Road, RoadNetwork 4 | from highway_env.vehicle.kinematics import Vehicle 5 | from highway_env.vehicle.objects import Obstacle, Landmark 6 | 7 | FPS = 15 8 | 9 | 10 | def test_step(): 11 | v = Vehicle(road=None, position=[0, 0], speed=20, heading=0) 12 | for _ in range(2*FPS): 13 | v.step(dt=1/FPS) 14 | assert v.position[0] == pytest.approx(40) 15 | assert v.position[1] == pytest.approx(0) 16 | assert v.speed == pytest.approx(20) 17 | assert v.heading == pytest.approx(0) 18 | 19 | 20 | def test_act(): 21 | v = Vehicle(road=None, position=[0, 0], speed=20, heading=0) 22 | v.act({'acceleration': 1, 'steering': 0}) 23 | for _ in range(1 * FPS): 24 | v.step(dt=1/FPS) 25 | assert v.speed == pytest.approx(21) 26 | 27 | v.act({'acceleration': 0, 'steering': 0.5}) 28 | for _ in range(1 * FPS): 29 | v.step(dt=1/FPS) 30 | assert v.speed == pytest.approx(21) 31 | assert v.position[1] > 0 32 | 33 | 34 | def test_brake(): 35 | v = Vehicle(road=None, position=[0, 0], speed=20, heading=0) 36 | for _ in range(10 * FPS): 37 | v.act({'acceleration': min(max(-1 * v.speed, -6), 6), 'steering': 0}) 38 | v.step(dt=1/FPS) 39 | assert v.speed == pytest.approx(0, abs=0.01) 40 | 41 | 42 | def test_front(): 43 | r = Road(RoadNetwork.straight_road_network(1)) 44 | v1 = Vehicle(road=r, position=[0, 0], speed=20) 45 | v2 = Vehicle(road=r, position=[10, 0], speed=10) 46 | r.vehicles.extend([v1, v2]) 47 | 48 | assert v1.lane_distance_to(v2) == pytest.approx(10) 49 | assert v2.lane_distance_to(v1) == pytest.approx(-10) 50 | 51 | 52 | def test_collision(): 53 | # Collision between two vehicles 54 | r = Road(RoadNetwork.straight_road_network(1)) 55 | v1 = Vehicle(road=r, position=[0, 0], speed=10) 56 | v2 = Vehicle(road=r, position=[4, 0], speed=20) 57 | v1.handle_collisions(v2) 58 | 59 | assert v1.crashed and v2.crashed 60 | # Collision between a vehicle and an obstacle 61 | v3 = Vehicle(road=r, position=[20, 0], speed=10) 62 | o = Obstacle(road=r, position=[23, 0]) 63 | v3.handle_collisions(o) 64 | 65 | assert v3.crashed and o.crashed 66 | # Collision between a vehicle and a landmark 67 | v4 = Vehicle(road=r, position=[40, 0], speed=10) 68 | l = Landmark(road=r, position=[43, 0]) 69 | v4.handle_collisions(l) 70 | 71 | assert v4.crashed is False 72 | assert l.hit 73 | -------------------------------------------------------------------------------- /highway_modify/tests/vehicle/test_uncertainty.py: -------------------------------------------------------------------------------- 1 | from highway_env.road.road import Road, RoadNetwork 2 | from highway_env.vehicle.uncertainty.prediction import IntervalVehicle 3 | 4 | FPS = 15 5 | 6 | 7 | def test_partial(): 8 | road = Road(RoadNetwork.straight_road_network()) 9 | v = IntervalVehicle(road, position=[0, 0], speed=20, heading=0) 10 | for _ in range(2 * FPS): 11 | v.step(dt=1/FPS, mode="partial") 12 | assert v.interval.position[0, 0] <= v.position[0] <= v.interval.position[1, 0] 13 | assert v.interval.position[0, 1] <= v.position[1] <= v.interval.position[1, 1] 14 | assert v.interval.heading[0] <= v.heading <= v.interval.heading[1] 15 | 16 | 17 | def test_predictor(): 18 | road = Road(RoadNetwork.straight_road_network()) 19 | v = IntervalVehicle(road, position=[0, 0], speed=20, heading=0) 20 | for _ in range(2 * FPS): 21 | v.step(dt=1/FPS, mode="predictor") 22 | assert v.interval.position[0, 0] <= v.position[0] <= v.interval.position[1, 0] 23 | assert v.interval.position[0, 1] <= v.position[1] <= v.interval.position[1, 1] 24 | assert v.interval.heading[0] <= v.heading <= v.interval.heading[1] 25 | -------------------------------------------------------------------------------- /introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/introduction.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import hydra 4 | import types 5 | import torch 6 | import os 7 | import random 8 | import gym 9 | import torch.nn.functional as F 10 | import utils.util as util 11 | import itertools 12 | import numpy as np 13 | from tensorboardX import SummaryWriter 14 | from itertools import count 15 | from make_envs import make_env 16 | from omegaconf import DictConfig, OmegaConf 17 | from dataset.rs_memory import Memory 18 | from dataset.load_data import Dataset 19 | from torch.autograd import Variable 20 | from model.sac_rs import SAC_RS 21 | torch.set_num_threads(2) 22 | cur_pth = os.getcwd() 23 | def get_args(cfg: DictConfig): 24 | # cfg.device = "cpu" 25 | cfg.device = "cuda:0" if torch.cuda.is_available() else "cpu" 26 | cfg.hydra_base_dir = os.getcwd() 27 | print(OmegaConf.to_yaml(cfg)) 28 | return cfg 29 | 30 | 31 | def make_agent(env, args): 32 | obs_dim = env.observation_space.shape[0] 33 | action_dim = env.action_space.shape[0] 34 | action_range = [ 35 | float(env.action_space.low.min()), 36 | float(env.action_space.high.max()) 37 | ] 38 | args.agent.obs_dim = obs_dim 39 | args.agent.action_dim = action_dim 40 | agent = SAC_RS(obs_dim, action_dim, action_range, args.train.batch, args) 41 | return agent 42 | 43 | 44 | 45 | def get_re_obs(obs): 46 | re_obs = np.array(obs) 47 | sz = re_obs.shape 48 | for i in range(1,sz[0]): 49 | re_obs[i]=re_obs[i]-re_obs[0] 50 | return re_obs 51 | 52 | def save(agent,args,cnt): 53 | output_dir=f'{args.env.name}' 54 | if not os.path.exists(output_dir): 55 | os.mkdir(output_dir) 56 | agent.save(f'{output_dir}/{args.agent.name}_{cnt}') 57 | print("saved successfully!") 58 | 59 | @hydra.main(config_path="config", config_name="config") 60 | def main(cfg: DictConfig): 61 | args = get_args(cfg) 62 | random.seed(args.seed) 63 | np.random.seed(args.seed) 64 | torch.manual_seed(args.seed) 65 | env_args=args.env 66 | env = make_env(args) 67 | eval_env = make_env(args) 68 | env.seed(args.seed) 69 | eval_env.seed(args.seed + 10) 70 | print(cur_pth) 71 | dataset_0=Dataset(cur_pth, args) 72 | g1 = int(env_args.g1) 73 | REPLAY_MEMORY = int(env_args.replay_mem) # total buffer size 74 | INITIAL_MEMORY = int(env_args.initial_mem) # buffer size that can start learning 75 | EPISODE_STEPS = int(env_args.eps_steps) # maximum epoch_step number 76 | ROUND_LEARN_STEPS = int(env_args.round_steps) 77 | LEARN_STEPS = ROUND_LEARN_STEPS*dataset_0.expert_data["lengths"][0] # maximum learning_step number 78 | agent = make_agent(env, args) 79 | online_memory_replay = Memory(REPLAY_MEMORY//2, args.seed+1) 80 | learn_step = 0 81 | all_step = 0 82 | sg_count = 0 83 | writer = SummaryWriter(log_dir="./logs") 84 | output_dir=f'./data/{args.env.name}/CSIRL/{dataset_0.get_tra_num()}' 85 | if not os.path.exists(output_dir): 86 | os.makedirs(output_dir) 87 | output_dir = output_dir + f'/{args.seed}.pkl' 88 | test_reward = [] 89 | test_step = [] 90 | for _1 in count(): 91 | sg_count += 1 92 | save(agent, args, sg_count) 93 | print("| subgoal count %d |" %(sg_count)) 94 | online_memory_replay.clear() 95 | begin_learn = False 96 | goal_learn_step = 0 97 | for __ in count(): 98 | if goal_learn_step > ROUND_LEARN_STEPS: 99 | break 100 | state = env.reset() 101 | episode_reward = 0 102 | done = False 103 | #print(_) 104 | train_reward = -999.9 105 | for episode_step in range(EPISODE_STEPS): 106 | # env.render() 107 | if learn_step % args.env.eval_interval == 1 and begin_learn == True: 108 | eval_returns, eval_timesteps = util.evaluate(agent, eval_env, num_episodes=args.eval.eps) 109 | returns = np.mean(eval_returns) 110 | writer.add_scalar('eval/episode_reward', returns, learn_step) 111 | test_step.append(learn_step) 112 | test_reward.append(returns) 113 | print("| test | steps: %2d | episode_reward: %.3f |" %(learn_step,returns)) 114 | record_data = {"steps": test_step, "rewards": test_reward} 115 | torch.save(record_data, output_dir) 116 | if all_step < args.num_seed_steps: 117 | # Seed replay buffer with random actions 118 | action = env.action_space.sample() 119 | else: 120 | with util.eval_mode(agent): 121 | action = agent.choose_action(state, sample=True) 122 | next_state, reward, done, _ = env.step(action) 123 | train_reward = max(train_reward, -_["dis"]) 124 | re_obs = get_re_obs(state) 125 | reward1= util.get_matching_reward(state, next_state, dataset_0, agent.get_reward(torch.tensor(re_obs)), g1, args) 126 | done_no_lim = done 127 | if str(env.__class__.__name__).find('TimeLimit') >= 0 and episode_step + 1 == env._max_episode_steps: 128 | done_no_lim = 0 129 | online_memory_replay.add((state,next_state, action, re_obs, reward1, done_no_lim)) 130 | if online_memory_replay.size() > INITIAL_MEMORY: 131 | if begin_learn is False: 132 | print('Learn begins!') 133 | begin_learn = True 134 | 135 | goal_learn_step += 1 136 | learn_step += 1 137 | agent.update(online_memory_replay, dataset_0, writer, learn_step) 138 | if learn_step == LEARN_STEPS: 139 | print('Finished!') 140 | writer.close() 141 | record_data = {"steps":test_step, "rewards": test_reward} 142 | print(output_dir) 143 | torch.save(record_data,output_dir) 144 | return 145 | if done: 146 | break 147 | state = next_state 148 | if begin_learn: 149 | writer.add_scalar('train/reward',train_reward,learn_step) 150 | print("\n| train | steps: %2d | episode_reward: %.3f |" %(learn_step,train_reward)) 151 | eval_returns, eval_timesteps = util.evaluate(agent, eval_env, num_episodes=args.eval.eps) 152 | returns = np.mean(eval_returns) 153 | writer.add_scalar('eval/episode_reward', returns, learn_step) 154 | test_step.append(learn_step) 155 | test_reward.append(returns) 156 | print("| test | steps: %2d | episode_reward: %.3f |" %(learn_step,returns)) 157 | dataset_0.select_subgoal(agent, args) 158 | 159 | writer.close() 160 | if __name__ == "__main__": 161 | main() -------------------------------------------------------------------------------- /make_envs.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import highway_env 3 | from stable_baselines3.common.atari_wrappers import AtariWrapper 4 | from stable_baselines3.common.monitor import Monitor 5 | 6 | from wrappers.atari_wrapper import ScaledFloatFrame, FrameStack, PyTorchFrame 7 | from wrappers.normalize_action_wrapper import check_and_normalize_box_actions 8 | import envs 9 | import numpy as np 10 | 11 | # Register all custom envs 12 | envs.register_custom_envs() 13 | 14 | 15 | def make_atari(env): 16 | env = AtariWrapper(env) 17 | env = PyTorchFrame(env) 18 | env = FrameStack(env, 4) 19 | return env 20 | 21 | 22 | def is_atari(env_name): 23 | return env_name in ['PongNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'SpaceInvadersNoFrameskip-v4'] 24 | 25 | 26 | def is_highway(env_name): 27 | return env_name in ['highway-fast-v0'] 28 | 29 | def is_merge(env_name): 30 | return env_name in ['merge-v0'] 31 | 32 | def is_roundabout(env_name): 33 | return env_name in ['roundabout-v0','roundabout-v1'] 34 | 35 | def is_intersection(env_name): 36 | return env_name in ['intersection-v0'] 37 | 38 | def is_mujoco(env_name): 39 | return env_name in ['antmaze-umaze-v0'] 40 | 41 | class HighwayObs(gym.ObservationWrapper): 42 | def __init__(self, env): 43 | super(HighwayObs, self).__init__(env) 44 | shape = self.observation_space.shape 45 | self.observation_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(shape[0] * shape[1],), dtype=np.float32) 46 | 47 | def observation(self, observation): 48 | return observation.flatten() 49 | 50 | def make_env(args, monitor=True): 51 | print(args.env.name) 52 | env = gym.make(args.env.name) 53 | 54 | if monitor: 55 | env = Monitor(env, "gym") 56 | 57 | if is_atari(args.env.name): 58 | env = make_atari(env) 59 | 60 | if is_highway(args.env.name): 61 | env = HighwayObs(env) 62 | if args.env.action_type == 'continues': 63 | env_config = { 64 | "action": { 65 | "type": "ContinuousAction" 66 | }, 67 | "is_record": False, 68 | "total_time": 0.2, 69 | "simulation_frequency": 1, 70 | "duration": 150, 71 | "vehicles_speed": args.env.speed, 72 | "vehicles_density": args.env.density 73 | } 74 | env.configure(env_config) 75 | env.reset() 76 | if is_merge(args.env.name): 77 | env = HighwayObs(env) 78 | if args.env.action_type == 'continues': 79 | env_config = { 80 | "action": { 81 | "type": "ContinuousAction" 82 | }, 83 | "is_record": False, 84 | "total_time": 0.2, 85 | "simulation_frequency": 1, 86 | "duration": 60 87 | } 88 | env.configure(env_config) 89 | env.reset() 90 | if is_roundabout(args.env.name): 91 | env = HighwayObs(env) 92 | if args.env.action_type == 'continues': 93 | env_config = { 94 | "action": { 95 | "type": "ContinuousAction" 96 | }, 97 | "is_record": False, 98 | "total_time": 0.2, 99 | "simulation_frequency": 1, 100 | "duration": 55 101 | } 102 | env.configure(env_config) 103 | env.reset() 104 | if is_intersection(args.env.name): 105 | env = HighwayObs(env) 106 | if args.env.action_type == 'continues': 107 | env_config = { 108 | "action": { 109 | "type": "ContinuousAction" 110 | }, 111 | "is_record": False, 112 | "total_time": 0.2, 113 | "destination": args.env.destination, 114 | "finish_position":None, 115 | "simulation_frequency": 1, 116 | "duration": 65 117 | } 118 | if args.env.destination=="o11": 119 | env_config["finish_position"]=[-45.0, -2.0] 120 | elif args.env.destination=="o21": 121 | env_config["finish_position"]=[2.0, -45.0] 122 | else: 123 | env_config["finish_position"]=[45.0, 6.0] 124 | env.configure(env_config) 125 | env.reset() 126 | # Normalize box actions to [-1, 1] 127 | env = check_and_normalize_box_actions(env) 128 | return env 129 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/model/__init__.py -------------------------------------------------------------------------------- /model/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/model/agent.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.5.0 2 | alabaster==0.7.8 3 | antlr4-python3-runtime==4.8 4 | asn1crypto==0.24.0 5 | atari-py==0.2.6 6 | atomicwrites==1.2.1 7 | attrs==18.2.0 8 | autobahn==17.10.1 9 | Automat==0.6.0 10 | Babel==2.4.0 11 | bleach==2.1.2 12 | blinker==1.4 13 | box2d-py==2.3.8 14 | cachetools==4.2.4 15 | carla==0.9.13 16 | cbor==1.0.0 17 | certifi==2018.8.24 18 | cffi==1.15.1 19 | chardet==3.0.4 20 | charset-normalizer==2.0.12 21 | click==8.0.4 22 | cloud-init==22.2 23 | cloudpickle==1.3.0 24 | colorama==0.3.7 25 | command-not-found==0.3 26 | configobj==5.0.6 27 | constantly==15.1.0 28 | cryptography==2.1.4 29 | cycler==0.10.0 30 | Cython==0.29.32 31 | dataclasses==0.8 32 | decorator==4.1.2 33 | distro-info===0.18ubuntu0.18.04.1 34 | docker-pycreds==0.4.0 35 | docopt==0.6.2 36 | docutils==0.14 37 | entrypoints==0.2.3.post1 38 | enum34==1.1.6 39 | future==0.16.0 40 | gitdb==4.0.9 41 | GitPython==3.1.18 42 | glfw==2.5.5 43 | google-auth==1.35.0 44 | google-auth-oauthlib==0.4.6 45 | grpcio==1.48.2 46 | gym==0.17.1 47 | gym-minigrid==1.0.2 48 | html5lib==0.999999999 49 | httplib2==0.9.2 50 | hydra-core==1.0.6 51 | hyperlink==17.3.1 52 | idna==2.7 53 | imageio==2.4.1 54 | imagesize==0.7.1 55 | importlib-metadata==4.8.3 56 | importlib-resources==5.4.0 57 | incremental==16.10.1 58 | ipykernel==4.8.2 59 | ipython==5.5.0 60 | ipython_genutils==0.2.0 61 | ipywidgets==6.0.0 62 | Jinja2==2.10 63 | jsonpatch==1.16 64 | jsonpickle==0.9.6 65 | jsonpointer==1.10 66 | jsonschema==2.6.0 67 | jupyter-client==5.2.2 68 | jupyter-core==4.4.0 69 | keras==2.10.0 70 | keyring==10.6.0 71 | keyrings.alt==3.0 72 | kiwisolver==1.0.1 73 | language-selector==0.1 74 | lockfile==0.12.2 75 | lz4==0.10.1 76 | Markdown==3.3.7 77 | MarkupSafe==1.0 78 | matplotlib==3.0.0 79 | mistune==0.8.3 80 | mock==2.0.0 81 | more-itertools==4.3.0 82 | mpi4py==2.0.0 83 | mpyq==0.2.5 84 | mujoco-maze==0.2.0 85 | mujoco-py==2.0.2.0 86 | munch==2.3.2 87 | nbconvert==5.3.1 88 | nbformat==4.4.0 89 | netifaces==0.10.4 90 | notebook==5.2.2 91 | numpy==1.19.5 92 | numpydoc==0.7.0 93 | oauthlib==3.2.2 94 | olefile==0.45.1 95 | omegaconf==2.0.6 96 | opencv-python==4.5.1.48 97 | PAM==0.4.2 98 | pandas==1.1.5 99 | pandocfilters==1.4.2 100 | pathlib2==2.3.2 101 | pathtools==0.1.2 102 | pbr==4.3.0 103 | pexpect==4.2.1 104 | pickleshare==0.7.4 105 | Pillow==5.3.0 106 | pluggy==0.7.1 107 | portpicker==1.2.0 108 | probscale==0.2.3 109 | promise==2.3 110 | prompt-toolkit==1.0.15 111 | protobuf==3.19.6 112 | psutil==5.9.4 113 | py==1.6.0 114 | py-ubjson==0.8.5 115 | pyasn1==0.4.2 116 | pyasn1-modules==0.2.1 117 | pycparser==2.21 118 | pycrypto==2.6.1 119 | pygame==1.9.4 120 | pyglet==1.5.0 121 | Pygments==2.2.0 122 | PyGObject==3.26.1 123 | PyJWT==1.5.3 124 | PyNaCl==1.1.2 125 | pyOpenSSL==17.5.0 126 | pyparsing==2.2.2 127 | PySC2==2.0.2 128 | pyserial==3.4 129 | pytest==3.8.2 130 | python-apt==1.6.5+ubuntu0.6 131 | python-dateutil==2.7.3 132 | python-debian==0.1.32 133 | python-snappy==0.5 134 | PyTrie==0.2 135 | pytz==2018.3 136 | pyxdg==0.25 137 | PyYAML==6.0 138 | pyzmq==16.0.2 139 | qrcode==5.3 140 | requests==2.27.1 141 | requests-oauthlib==1.3.1 142 | requests-unixsocket==0.1.5 143 | roman==2.0.0 144 | rsa==4.9 145 | s2clientprotocol==4.6.1.68195.0 146 | sacred==0.7.2 147 | scipy==1.1.0 148 | SecretStorage==2.3.1 149 | sentry-sdk==1.11.1 150 | service-identity==16.0.0 151 | setproctitle==1.2.3 152 | shortuuid==1.0.11 153 | simplegeneric==0.8.1 154 | six==1.11.0 155 | sk-video==1.1.10 156 | smmap==5.0.0 157 | snakeviz==1.0.0 158 | sos==4.3 159 | Sphinx==1.6.7 160 | ssh-import-id==5.7 161 | stable-baselines3==1.0 162 | systemd-python==234 163 | tensorboard==2.4.0 164 | tensorboard-logger==0.1.0 165 | tensorboard-plugin-wit==1.8.1 166 | tensorboardX==2.1 167 | terminado==0.7 168 | testpath==0.3.1 169 | torch==1.7.1 170 | torchvision==0.8.2 171 | tornado==5.1.1 172 | tqdm==4.42.1 173 | traitlets==4.3.2 174 | Twisted==17.9.0 175 | txaio==2.8.1 176 | typing_extensions==4.1.1 177 | u-msgpack-python==2.1 178 | ubuntu-advantage-tools==27.0 179 | ufw==0.36 180 | unattended-upgrades==0.1 181 | urllib3==1.26.12 182 | wandb==0.13.6 183 | wcwidth==0.1.7 184 | webencodings==0.5 185 | websocket-client==0.53.0 186 | Werkzeug==2.0.3 187 | whichcraft==0.5.2 188 | wrapt==1.10.11 189 | wsaccel==0.6.2 190 | zipp==3.6.0 191 | zope.interface==4.3.2 192 | -------------------------------------------------------------------------------- /scripts/highway-fast-continues-v0-s35-d1.sh: -------------------------------------------------------------------------------- 1 | 2 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=0 3 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=1 4 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=2 5 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=3 6 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=4 7 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=5 8 | -------------------------------------------------------------------------------- /scripts/intersection-continues-o1.sh: -------------------------------------------------------------------------------- 1 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=0 2 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=1 3 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=2 4 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=3 5 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=4 6 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=5 7 | 8 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=0 9 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=1 10 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=2 11 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=3 12 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=4 13 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=5 14 | 15 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=0 16 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=1 17 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=2 18 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=3 19 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=4 20 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=5 21 | 22 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=0 23 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=1 24 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=2 25 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=3 26 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=4 27 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=5 28 | 29 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=0 30 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=1 31 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=2 32 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=3 33 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=4 34 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=5 35 | 36 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=0 37 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=1 38 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=2 39 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=3 40 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=4 41 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=5 42 | -------------------------------------------------------------------------------- /scripts/merge-v0.sh: -------------------------------------------------------------------------------- 1 | 2 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=0 3 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=1 4 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=2 5 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=3 6 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=4 7 | python main.py env=merge-continues-v0 expert.tra="expert_data/merge-continues-v0/25.npy" seed=5 8 | -------------------------------------------------------------------------------- /scripts/roundabout-v1.sh: -------------------------------------------------------------------------------- 1 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=0 2 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=1 3 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=2 4 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=3 5 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=4 6 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=5 7 | 8 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=0 9 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=1 10 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=2 11 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=3 12 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=4 13 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=5 14 | 15 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=0 16 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=1 17 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=2 18 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=3 19 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=4 20 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=5 21 | 22 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=0 23 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=1 24 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=2 25 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=3 26 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=4 27 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=5 28 | 29 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=0 30 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=1 31 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=2 32 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=3 33 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=4 34 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=5 35 | 36 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=0 37 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=1 38 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=2 39 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=3 40 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=4 41 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=5 42 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/utils/__init__.py -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import glob 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | from torchvision.utils import make_grid, save_image 8 | 9 | def write_to_file(dname, dmap, cmap, itr): 10 | fid = open(dname + '-results.log', 'a+') 11 | string_to_write = str(itr) 12 | for item in dmap: 13 | string_to_write += ' ' + '%.2f' % item 14 | string_to_write += ' ' + '%.2f' % cmap 15 | fid.write(string_to_write + '\n') 16 | fid.close() 17 | 18 | 19 | def get_labels(seq_len, n_subgoals): 20 | # Equi-partition labels 21 | stops = np.array(range(1, n_subgoals + 1)).astype('float32') / n_subgoals 22 | labels = np.zeros((seq_len, len(stops)), dtype=float) 23 | prev_idx = 0 24 | for i, stop in enumerate(stops): 25 | idx = int(seq_len * stop) 26 | labels[prev_idx:idx, i] = 1. 27 | prev_idx = idx 28 | return labels 29 | 30 | 31 | def dist(a, b): 32 | return np.sum(np.abs(a - b)) 33 | class eval_mode(object): 34 | def __init__(self, *models): 35 | self.models = models 36 | 37 | def __enter__(self): 38 | self.prev_states = [] 39 | for model in self.models: 40 | self.prev_states.append(model.training) 41 | model.train(False) 42 | 43 | def __exit__(self, *args): 44 | for model, state in zip(self.models, self.prev_states): 45 | model.train(state) 46 | return False 47 | 48 | def get_matching_reward(s, next_s, tra_dataset, reward_w, g1, args): 49 | _s = np.expand_dims(s,axis=0) 50 | id, sg = tra_dataset.find_subgoal(_s) 51 | sg = np.squeeze(sg, axis=0) 52 | h1 = np.linalg.norm(s[args.env.l_pos:args.env.r_pos+1]-sg[args.env.l_pos:args.env.r_pos+1]) 53 | h2 = np.linalg.norm(next_s[args.env.l_pos:args.env.r_pos+1]-sg[args.env.l_pos:args.env.r_pos+1]) 54 | reward_m = ( h1 - h2 ) * g1 55 | return reward_m 56 | 57 | 58 | def evaluate(actor, env, num_episodes=10, vis=True): 59 | total_timesteps = [] 60 | total_returns = [] 61 | 62 | while len(total_returns) < num_episodes: 63 | state = env.reset() 64 | done = False 65 | info={} 66 | ret = -999.9 67 | with eval_mode(actor): 68 | while not done: 69 | action = actor.choose_action(state, sample=False) 70 | next_state, reward, done, info = env.step(action) 71 | state = next_state 72 | ret = max(ret, -info['dis']) 73 | total_returns.append(ret) 74 | return total_returns, total_timesteps 75 | -------------------------------------------------------------------------------- /wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/wrappers/__init__.py -------------------------------------------------------------------------------- /wrappers/atari_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import torch 4 | from collections import deque 5 | from gym import spaces 6 | 7 | 8 | class FrameStack(gym.Wrapper): 9 | def __init__(self, env, k): 10 | """Stack k last frames. 11 | Returns lazy array, which is much more memory efficient. 12 | Expects inputs to be of shape num_channels x height x width. 13 | """ 14 | gym.Wrapper.__init__(self, env) 15 | self.k = k 16 | self.frames = deque([], maxlen=k) 17 | shp = env.observation_space.shape 18 | self.observation_space = spaces.Box(low=0, high=255, shape=( 19 | shp[0] * k, shp[1], shp[2]), dtype=np.uint8) 20 | 21 | def reset(self): 22 | ob = self.env.reset() 23 | for _ in range(self.k): 24 | self.frames.append(ob) 25 | return self._get_ob() 26 | 27 | def step(self, action): 28 | ob, reward, done, info = self.env.step(action) 29 | self.frames.append(ob) 30 | return self._get_ob(), reward, done, info 31 | 32 | def _get_ob(self): 33 | assert len(self.frames) == self.k 34 | return LazyFrames(list(self.frames)) 35 | 36 | 37 | class ScaledFloatFrame(gym.ObservationWrapper): 38 | def __init__(self, env): 39 | gym.ObservationWrapper.__init__(self, env) 40 | self.observation_space = gym.spaces.Box( 41 | low=0, high=1, shape=env.observation_space.shape, dtype=np.float32) 42 | 43 | def observation(self, observation): 44 | # careful! This undoes the memory optimization, use 45 | # with smaller replay buffers only. 46 | return np.array(observation).astype(np.float32) / 255.0 47 | 48 | 49 | class LazyFrames(object): 50 | def __init__(self, frames): 51 | """This object ensures that common frames between the observations are only stored once. 52 | It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay 53 | buffers. 54 | This object should only be converted to numpy array before being passed to the model.""" 55 | self._frames = frames 56 | self._out = None 57 | 58 | def _force(self): 59 | if self._out is None: 60 | self._out = np.concatenate(self._frames, axis=0) 61 | self._frames = None 62 | return self._out 63 | 64 | def __array__(self, dtype=None): 65 | out = self._force() 66 | if dtype is not None: 67 | out = out.astype(dtype) 68 | return out 69 | 70 | def __len__(self): 71 | return len(self._force()) 72 | 73 | def __getitem__(self, i): 74 | return self._force()[i] 75 | 76 | def count(self): 77 | frames = self._force() 78 | return frames.shape[frames.ndim - 1] 79 | 80 | def frame(self, i): 81 | return self._force()[..., i] 82 | 83 | 84 | class PyTorchFrame(gym.ObservationWrapper): 85 | """Image shape to num_channels x height x width""" 86 | 87 | def __init__(self, env): 88 | super(PyTorchFrame, self).__init__(env) 89 | shape = self.observation_space.shape 90 | self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=( 91 | shape[-1], shape[0], shape[1]), dtype=np.uint8) 92 | 93 | def observation(self, observation): 94 | return np.rollaxis(observation, 2) 95 | -------------------------------------------------------------------------------- /wrappers/normalize_action_wrapper.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Wrapper to normalize gym.spaces.Box actions in [-1, 1].""" 17 | 18 | import gym 19 | from gym import spaces 20 | import numpy as np 21 | 22 | 23 | class NormalizeBoxActionWrapper(gym.ActionWrapper): 24 | """Rescale the action space of the environment.""" 25 | 26 | def __init__(self, env): 27 | if not isinstance(env.action_space, spaces.Box): 28 | raise ValueError('env %s does not use spaces.Box.' % str(env)) 29 | super(NormalizeBoxActionWrapper, self).__init__(env) 30 | # self._max_episode_steps = env.max_episode_steps 31 | 32 | def action(self, action): 33 | # rescale the action 34 | low, high = self.env.action_space.low, self.env.action_space.high 35 | scaled_action = low + (action + 1.0) * (high - low) / 2.0 36 | scaled_action = np.clip(scaled_action, low, high) 37 | 38 | return scaled_action 39 | 40 | def reverse_action(self, scaled_action): 41 | low, high = self.env.action_space.low, self.env.action_space.high 42 | action = (scaled_action - low) * 2.0 / (high - low) - 1.0 43 | return action 44 | 45 | 46 | def check_and_normalize_box_actions(env): 47 | """Wrap env to normalize actions if [low, high] != [-1, 1].""" 48 | if isinstance(env.action_space, spaces.Box): 49 | low, high = env.action_space.low, env.action_space.high 50 | if (np.abs(low + np.ones_like(low)).max() > 1e-6 or 51 | np.abs(high - np.ones_like(high)).max() > 1e-6): 52 | print('--> Normalizing environment actions.') 53 | return NormalizeBoxActionWrapper(env) 54 | 55 | # Environment does not need to be normalized. 56 | return env 57 | --------------------------------------------------------------------------------