├── LICENSE
├── README.md
├── __init__.py
├── config
    ├── __init__.py
    ├── agent
    │   └── sac.yaml
    ├── cfg.py
    ├── config.yaml
    └── env
    │   ├── highway-fast-continues-v0_s35_d1.yaml
    │   ├── intersection-continues-v0-o1.yaml
    │   ├── merge-continues-v0.yaml
    │   └── roundabout_continues_v1.yaml
├── dataset
    ├── __init__.py
    ├── load_data.py
    └── rs_memory.py
├── exp-highway-table.png
├── exp-highway.png
├── expert_data
    ├── highway-fast-continues-v0-s35-d1
    │   ├── 1.npy
    │   ├── 10.npy
    │   ├── 14.npy
    │   ├── 15.npy
    │   ├── 19.npy
    │   ├── 20.npy
    │   ├── 24.npy
    │   ├── 25.npy
    │   ├── 4.npy
    │   ├── 5.npy
    │   └── 9.npy
    ├── intersection-continues-v0-o1
    │   ├── 1.npy
    │   ├── 10.npy
    │   ├── 14.npy
    │   ├── 15.npy
    │   ├── 19.npy
    │   ├── 20.npy
    │   ├── 24.npy
    │   ├── 25.npy
    │   ├── 4.npy
    │   ├── 5.npy
    │   └── 9.npy
    ├── merge-continues-v0
    │   ├── 1.npy
    │   ├── 10.npy
    │   ├── 14.npy
    │   ├── 15.npy
    │   ├── 19.npy
    │   ├── 20.npy
    │   ├── 24.npy
    │   ├── 25.npy
    │   ├── 4.npy
    │   ├── 5.npy
    │   └── 9.npy
    └── roundabout-continues-v1
    │   ├── 1.npy
    │   ├── 10.npy
    │   ├── 14.npy
    │   ├── 15.npy
    │   ├── 19.npy
    │   ├── 20.npy
    │   ├── 24.npy
    │   ├── 25.npy
    │   ├── 4.npy
    │   ├── 5.npy
    │   └── 9.npy
├── framework.png
├── highway_modify
    ├── .github
    │   └── workflows
    │   │   ├── build.yml
    │   │   └── release.yml
    ├── .gitignore
    ├── CITATION.cff
    ├── LICENSE
    ├── README.md
    ├── codecov.yml
    ├── docs
    │   ├── Makefile
    │   ├── requirements.txt
    │   └── source
    │   │   ├── actions
    │   │       └── index.rst
    │   │   ├── bibliography
    │   │       ├── biblio.bib
    │   │       └── index.rst
    │   │   ├── conf.py
    │   │   ├── dynamics
    │   │       ├── index.rst
    │   │       ├── road
    │   │       │   ├── lane.rst
    │   │       │   ├── regulation.rst
    │   │       │   └── road.rst
    │   │       └── vehicle
    │   │       │   ├── behavior.rst
    │   │       │   ├── controller.rst
    │   │       │   └── kinematics.rst
    │   │   ├── environments
    │   │       ├── highway.rst
    │   │       ├── index.rst
    │   │       ├── intersection.rst
    │   │       ├── merge.rst
    │   │       ├── parking.rst
    │   │       ├── racetrack.rst
    │   │       └── roundabout.rst
    │   │   ├── faq.rst
    │   │   ├── graphics
    │   │       └── index.rst
    │   │   ├── index.rst
    │   │   ├── installation.rst
    │   │   ├── make_your_own.rst
    │   │   ├── multi_agent.rst
    │   │   ├── observations
    │   │       └── index.rst
    │   │   ├── quickstart.rst
    │   │   ├── rewards
    │   │       └── index.rst
    │   │   └── user_guide.rst
    ├── highway_env
    │   ├── __init__.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── common
    │   │   │   ├── __init__.py
    │   │   │   ├── abstract.py
    │   │   │   ├── action.py
    │   │   │   ├── finite_mdp.py
    │   │   │   ├── graphics.py
    │   │   │   └── observation.py
    │   │   ├── exit_env.py
    │   │   ├── highway_env.py
    │   │   ├── intersection_env.py
    │   │   ├── lane_keeping_env.py
    │   │   ├── merge_env.py
    │   │   ├── parking_env.py
    │   │   ├── racetrack_env.py
    │   │   ├── roundabout_line2_env.py
    │   │   ├── roundabout_line4_env.py
    │   │   ├── summon_env.py
    │   │   ├── two_way_env.py
    │   │   └── u_turn_env.py
    │   ├── interval.py
    │   ├── road
    │   │   ├── __init__.py
    │   │   ├── graphics.py
    │   │   ├── lane.py
    │   │   ├── regulation.py
    │   │   ├── road.py
    │   │   └── spline.py
    │   ├── utils.py
    │   └── vehicle
    │   │   ├── __init__.py
    │   │   ├── behavior.py
    │   │   ├── controller.py
    │   │   ├── dynamics.py
    │   │   ├── graphics.py
    │   │   ├── kinematics.py
    │   │   ├── objects.py
    │   │   └── uncertainty
    │   │       ├── __init__.py
    │   │       ├── estimation.py
    │   │       └── prediction.py
    ├── pyproject.toml
    ├── setup.cfg
    ├── setup.py
    └── tests
    │   ├── __init__.py
    │   ├── envs
    │       ├── __init__.py
    │       ├── test_actions.py
    │       ├── test_env_preprocessors.py
    │       ├── test_gym.py
    │       └── test_time.py
    │   ├── graphics
    │       └── test_render.py
    │   ├── road
    │       └── test_road.py
    │   ├── test_utils.py
    │   └── vehicle
    │       ├── test_behavior.py
    │       ├── test_control.py
    │       ├── test_dynamics.py
    │       └── test_uncertainty.py
├── introduction.png
├── main.py
├── make_envs.py
├── model
    ├── __init__.py
    ├── agent.py
    ├── ego_attention.py
    ├── sac.py
    ├── sac_models.py
    └── sac_rs.py
├── requirements.txt
├── scripts
    ├── highway-fast-continues-v0-s35-d1.sh
    ├── intersection-continues-o1.sh
    ├── merge-v0.sh
    └── roundabout-v1.sh
├── utils
    ├── __init__.py
    └── util.py
└── wrappers
    ├── __init__.py
    ├── atari_wrapper.py
    └── normalize_action_wrapper.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Curricular Subgoal for Inverse Reinforcement Learning
 2 | 
 3 | [![License: Apache](https://img.shields.io/badge/License-Apache-blue.svg)](LICENSE)
 4 | [![arXiv](https://img.shields.io/badge/arXiv-2306.08232-b31b1b.svg)](https://arxiv.org/abs/2306.08232)
 5 | 
 6 | 
 7 | Official codebase for paper [Curricular Subgoal for Inverse Reinforcement Learning](https://arxiv.org/abs/2306.08232).
 8 | 
 9 | <div align="center">
10 | <img src="https://github.com/Plankson/CSIRL/blob/master/introduction.png" width="50%">
11 | </div>
12 | 
13 | ## Overview
14 | 
15 | **TLDR:** Our main contribution is a dedicated curricular subgoal-based IRL framework that enables multi-stage imitation based on expert demonstrations. Extensive experiments conducted on the D4RL and autonomous driving benchmarks show that our proposed CSIRL framework yields significantly superior performance to state-of-the-art competitors, as well as better interpretability in the training process. Moreover, the robustness analysis experiments show that CSIRL still maintains high performance even with only one expert trajectory.
16 | 
17 | **Abstract:** Inverse Reinforcement Learning (IRL) aims to reconstruct the reward function from expert demonstrations to facilitate policy learning, and has demonstrated its remarkable success in imitation learning. To promote expert-like behavior, existing IRL methods mainly focus on learning global reward functions to minimize the trajectory difference between the imitator and the expert. However, these global designs are still limited by the redundant noise and error propagation problems, leading to the unsuitable reward assignment and thus downgrading the agent capability in complex multi-stage tasks. In this paper, we propose a novel Curricular Subgoal-based Inverse Reinforcement Learning (CSIRL) framework, that explicitly disentangles one task with several local subgoals to guide agent imitation. Specifically, CSIRL firstly introduces decision uncertainty of the trained agent over expert trajectories to dynamically select subgoals, which directly determines the exploration boundary of different task stages. To further acquire local reward functions for each stage, we customize a meta-imitation objective based on these curricular subgoals to train an intrinsic reward generator. Experiments on the D4RL and autonomous driving benchmarks demonstrate that the proposed methods yields results superior to the state-of-the-art counterparts, as well as better interpretability.
18 | 
19 | ![image](https://github.com/Plankson/CSIRL/blob/master/framework.png)
20 | 
21 | ## Prerequisites
22 | 
23 | #### Install dependencies
24 | 
25 | See `requirments.txt` file for more information about how to install the dependencies.
26 | 
27 | #### Install highway-env
28 | It should be noted that we make some modification on the original [highway-env](https://github.com/eleurent/highway-env) to make it more fit the real driving environment. The modified highway-env is provided by `highway_modify`, which can be installed by running:
29 | 
30 | ```bash
31 | cd highway_modify
32 | pip install -e .
33 | ```
34 | 
35 | 
36 | ## Usage
37 | Detailed instructions to replicate the results in the paper are contained in `scripts` directory. 
38 | Here we give the form of the instructions. 
39 | 
40 | ```bash
41 | # highway-fast
42 | python main.py env=highway-fast-continues-v0_s35_d1 expert.tra=<EXPERT_DATASET_PATH> seed=<RANDOM_SEED>
43 | 
44 | # merge
45 | python main.py env=merge-continues-v0 expert.tra=<EXPERT_DATASET_PATH> seed=<RANDOM_SEED>
46 | 
47 | # roundabout
48 | python main.py env=roundabout-continues-v1 expert.tra=<EXPERT_DATASET_PATH> seed=<RANDOM_SEED>
49 | 
50 | # intersection
51 | python main.py env=intersection-continues-v0-o1 expert.tra=<EXPERT_DATASET_PATH> seed=<RANDOM_SEED>
52 | ```
53 | 
54 | Make sure to replace `EXPERT_DATASET_PATH` with the path to the corresponding dataset in `expert_data`.
55 | 
56 | 
57 | ![image](https://github.com/Plankson/CSIRL/blob/master/exp-highway.png)
58 | 
59 | 
60 | ![image](https://github.com/Plankson/CSIRL/blob/master/exp-highway-table.png)
61 | 
62 | ## Citation
63 | 
64 | If you find this work useful for your research, please cite our paper:
65 | 
66 | ```
67 | @article{liu2023CSIRL,
68 |   title={Curricular Subgoal for Inverse Reinforcement Learning},
69 |   author={Liu, Shunyu and Qing, Yunpeng and Xu, Shuqi and Wu, Hongyan and Zhang, Jiangtao and Cong, Jingyuan and Liu, Yunfu and Song, Mingli},
70 |   journal={arXiv preprint arXiv:2306.08232},
71 |   year={2023}
72 | }
73 | ```
74 | 
75 | ## Contact
76 | 
77 | Please feel free to contact me via email (<liushunyu@zju.edu.cn>, <qingyunpeng@zju.edu.cn>) if you are interested in my research :)
78 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/__init__.py


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/config/__init__.py


--------------------------------------------------------------------------------
/config/agent/sac.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | agent:
 4 |   name: sac
 5 |   _target_: model.sac.SAC
 6 |   obs_dim: ??? # to be specified later
 7 |   action_dim: ??? # to be specified later
 8 | 
 9 |   critic_cfg: ${q_net}
10 |   actor_cfg: ${diag_gaussian_actor}
11 |   rs_cfg: ${intrinsic_reward_net}
12 |   init_temp: 1e-2 # use a low temp for IL
13 | 
14 |   alpha_lr: 3e-4
15 |   alpha_betas: [0.9, 0.999]
16 | 
17 |   actor_lr: 3e-4
18 |   actor_betas: [0.9, 0.999]
19 |   actor_update_frequency: 1
20 | 
21 |   critic_lr: 3e-4
22 |   critic_betas: [0.9, 0.999]
23 |   critic_tau: 0.005
24 |   critic_target_update_frequency: 1
25 | 
26 |   grad_lr: 0.01
27 |   rs_lr: 0.001
28 |   rs_betas: [0.9, 0.999]
29 |   rs_update_frequency: 1
30 |   # learn temperature coefficient (disabled by default)
31 |   learn_temp: false
32 | 
33 |   # Use either value_dice actor or normal SAC actor loss
34 |   vdice_actor: false
35 | 
36 | q_net:
37 |   _target_: model.sac_models.MultiQCritic
38 |   obs_dim: ${agent.obs_dim}
39 |   action_dim: ${agent.action_dim}
40 |   hidden_dim: 256
41 |   hidden_depth: 2
42 |   q_net_num: 5
43 | 
44 | intrinsic_reward_net:
45 |   _target_: model.sac_models.Intrinsic_Reward_Generator
46 |   input_dim: ${agent.obs_dim}
47 |   hidden_dim: 256
48 |   hidden_depth: 2
49 | 
50 | diag_gaussian_actor:
51 |   _target_: model.sac_models.DiagGaussianActor
52 |   obs_dim: ${agent.obs_dim}
53 |   action_dim: ${agent.action_dim}
54 |   hidden_dim: 256
55 |   hidden_depth: 2
56 |   log_std_bounds: [-5, 2]


--------------------------------------------------------------------------------
/config/cfg.py:
--------------------------------------------------------------------------------
1 | import  argparse
2 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | exp_name: ''
 2 | project_name: ${env.name}
 3 | 
 4 | cuda_deterministic: False
 5 | device: ??? # to be specified later
 6 | use_rs: True
 7 | gamma: 0.99
 8 | seed: 0
 9 | num_seed_steps: 0 # Don't need seeding for IL (Use 1000 for RL)
10 | soft_mean: True
11 | sigma: 0.2
12 | insert_subgoal_exp: False
13 | subgoal_num_actions: 1
14 | train:
15 |   batch: 64
16 |   use_target: False
17 |   soft_update: False
18 | expert:
19 | 
20 | eval:
21 |   eps: 10
22 | 
23 | env:
24 |   replay_mem: 50000
25 |   initial_mem: 1280
26 |   eps_steps: 1000
27 |   eval_interval: 1e3
28 | 
29 | # Extra args
30 | hydra_base_dir: ""
31 | 
32 | # Number of actor updates per env step
33 | num_actor_updates: 1
34 | 
35 | 
36 | defaults:
37 |   - agent: sac
38 |   - env: roundabout_continues_v1
39 |   - method: simple_sac


--------------------------------------------------------------------------------
/config/env/highway-fast-continues-v0_s35_d1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | env:
 4 |   name: highway-fast-v0
 5 |   action_type: continues
 6 |   density: 1
 7 |   speed: 35
 8 |   # learn_steps: 1e5
 9 |   # eval_interval: 1e3
10 | 
11 |   replay_mem: 1e6
12 |   # initial_mem: 10000
13 |   round_steps: 2e4
14 |   eps_steps: 100000
15 |   eval_interval: 1e3
16 |   expert_data: ${expert}
17 |   first_step: 5
18 |   delta: 5
19 |   sample_uc: 0.03
20 |   l_ego_s: 0
21 |   r_ego_s: 4
22 |   l_pos: 1
23 |   r_pos: 1
24 |   g1: 700.0
25 | expert:
26 |   subsample_freq: 1
27 |   basic_tra: "/expert_data/highway-fast-continues-v0-s35-d1/1.npy"
28 |   tra: "/expert_data/highway-fast-continues-v0-s35-d1/20.npy"
29 | 
30 | agent:
31 |   name: sac
32 | 
33 | num_actor_updates: 1
34 | 
35 | train:
36 |   use_target: true
37 |   soft_update: true
38 |   batch: 256
39 | 
40 | q_net:
41 |   _target_: model.sac_models.MultiQCritic


--------------------------------------------------------------------------------
/config/env/intersection-continues-v0-o1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | env:
 4 |   name: intersection-v0
 5 |   action_type: continues
 6 |   destination: o11
 7 |   # learn_steps: 1e5
 8 |   # eval_interval: 1e3
 9 | 
10 |   replay_mem: 1e6
11 |   # initial_mem: 10000
12 |   round_steps: 2e4
13 |   eps_steps: 100000
14 |   eval_interval: 1e3
15 |   expert_data: ${expert}
16 |   first_step: 5
17 |   delta: 5
18 |   sample_uc: 0.03
19 |   l_ego_s: 0
20 |   r_ego_s: 6
21 |   l_pos: 1
22 |   r_pos: 2
23 |   g1: 100.0
24 | expert:
25 |   subsample_freq: 1
26 |   basic_tra: "/expert_data/intersection-continues-v0-o1/1.npy"
27 |   tra: "/expert_data/intersection-continues-v0-o1/20.npy"
28 | 
29 | agent:
30 |   name: sac
31 | 
32 | num_actor_updates: 1
33 | 
34 | train:
35 |   use_target: true
36 |   soft_update: true
37 |   batch: 256
38 | 
39 | q_net:
40 |   _target_: model.sac_models.MultiQCritic


--------------------------------------------------------------------------------
/config/env/merge-continues-v0.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | env:
 4 |   name: merge-v0
 5 |   action_type: continues
 6 |   # learn_steps: 1e5
 7 |   # eval_interval: 1e3
 8 | 
 9 |   replay_mem: 1e6
10 |   # initial_mem: 10000
11 |   round_steps: 2e4
12 |   eps_steps: 100000
13 |   eval_interval: 1e3
14 |   expert_data: ${expert}
15 |   first_step: 5
16 |   delta: 5
17 |   sample_uc: 0.03
18 |   l_ego_s: 0
19 |   r_ego_s: 4
20 |   l_pos: 1
21 |   r_pos: 1
22 |   g1: 500.0
23 | expert:
24 |   basic_tra: "/expert_data/merge-continues-v0/1.npy"
25 |   tra: "/expert_data/merge-continues-v0/20.npy"
26 | 
27 | agent:
28 |   name: sac
29 | 
30 | num_actor_updates: 1
31 | 
32 | train:
33 |   use_target: true
34 |   soft_update: true
35 |   batch: 256
36 | 
37 | q_net:
38 |   _target_: model.sac_models.MultiQCritic


--------------------------------------------------------------------------------
/config/env/roundabout_continues_v1.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | env:
 4 |   name: roundabout-v1
 5 |   action_type: continues
 6 |   # learn_steps: 1e5
 7 |   # eval_interval: 1e3
 8 | 
 9 |   replay_mem: 1e6
10 |   # initial_mem: 10000
11 |   round_steps: 2e4
12 |   eps_steps: 100000
13 |   eval_interval: 1e3
14 |   expert_data: ${expert}
15 |   first_step: 5
16 |   delta: 5
17 |   sample_uc: 0.03
18 |   l_ego_s: 0
19 |   r_ego_s: 6
20 |   l_pos: 1
21 |   r_pos: 2
22 |   g1: 100.0
23 | expert:
24 |   basic_tra: "/expert_data/roundabout-continues-v1/1.npy"
25 |   tra: "/expert_data/roundabout-continues-v1/20.npy"
26 | 
27 | agent:
28 |   name: sac
29 | 
30 | num_actor_updates: 1
31 | 
32 | train:
33 |   use_target: true
34 |   soft_update: true
35 |   batch: 256
36 | 
37 | q_net:
38 |   _target_: model.sac_models.MultiQCritic


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/dataset/__init__.py


--------------------------------------------------------------------------------
/dataset/load_data.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | import glob
  5 | 
  6 | import torch
  7 | import utils
  8 | import time
  9 | import glob
 10 | import config
 11 | class Dataset():
 12 |     # fro this dataset, the subgoal_trajectory count is 1.
 13 |     def __init__(self, cur_pth, args):
 14 |         self.args = args
 15 |         self.expert_data = np.load(cur_pth+args.expert.basic_tra, allow_pickle=True).item()
 16 |         self.states = self.expert_data["states"]
 17 |         self.action = self.expert_data["actions"]
 18 |         self.next_states = self.expert_data["next_states"]
 19 |         self.all_data = np.load(cur_pth+args.expert.tra,allow_pickle=True).item()
 20 |         self.all_state = np.vstack([self.all_data["states"],self.states]).reshape(-1,self.states.shape[2])
 21 |         self.all_action = np.vstack([self.all_data["actions"],self.action]).reshape(-1,self.action.shape[2]).clip(-1.0,1.0)
 22 |         # extract self state
 23 |         self.is_done = self.expert_data["dones"]
 24 |         self.tra_num = self.expert_data["dones"].shape[0]
 25 |         self.goals = np.zeros((self.tra_num),dtype=np.int32) ## goals: state id
 26 |         for i in range(self.tra_num):  ## TODO: Set random goal at start
 27 |             # self.goals[i] = random.randint(1,args.first_step)
 28 |             self.goals[i] = random.randint(1,args.env.first_step)
 29 |         self.belongs = np.zeros(self.expert_data["rewards"].shape, dtype=np.int16) ##
 30 |         self.reset_belongs()
 31 |         self.insert_new_subgoal()
 32 | 
 33 |     def reset_belongs(self):
 34 |         for i in range(self.tra_num):
 35 |             for j in range(self.expert_data["lengths"][i]):
 36 |                 # if j != 0:
 37 |                 #     self.belongs[i][j] = min( random.randint(max(j + 1, self.belongs[ i][j-1]), j+self.args.env.first_step), self.states[0].shape[0]-1)
 38 |                 # else:
 39 |                 #     self.belongs[i][j] = min( random.randint(j + 1, j + self.args.env.first_step), self.states[0].shape[0] - 1)
 40 |                 self.belongs[i][j] = min(j+1,self.states[0].shape[0]-1)
 41 |     def insert_new_subgoal(self,pos=None):# pos list
 42 |         # self.goals: tra_num
 43 |         for i in range(self.tra_num):
 44 |             if pos != None:
 45 |                 if pos[i]< self.goals[i]:
 46 |                     print("??? There is some error in subgoal setting at trajectory %d, goal: %d , new goal: %d" %(i, self.goals[i], pos[i] ) )
 47 |                     return False
 48 |                 self.goals[i]=pos[i]
 49 |             for j in range(self.goals[i]):
 50 |                 self.belongs[i][j] = self.goals[i]
 51 |             print(self.goals[i])
 52 |             for j in range(self.expert_data["lengths"][i]):
 53 |                 print(self.belongs[i][j], end=' ')
 54 |         return True
 55 |     def find_subgoal(self, state):
 56 |         # state: batch_num * state_dim
 57 |         # return: batch_num * state_dim
 58 |         focus_state=state[:,self.args.env.l_ego_s:self.args.env.r_ego_s+1]
 59 |         subgoals=np.zeros(state.shape)
 60 |         for i in range(state.shape[0]):
 61 |             id=0
 62 |             min_dis = float('inf')
 63 |             for j in range(self.states[0].shape[0]): #TODO:there is only one trajectory to get subgoal!
 64 |                 dis=np.linalg.norm(focus_state[i]-self.states[0][j][self.args.env.l_ego_s:self.args.env.r_ego_s+1])
 65 |                 if dis <= min_dis:
 66 |                     min_dis = dis
 67 |                     id = self.belongs[0][j]
 68 |             subgoals[i]=self.states[0][id]
 69 |         return id, subgoals
 70 | 
 71 |     def sample(self,device):
 72 |         batch_size = 32
 73 |         indexes = np.random.choice(np.arange(self.expert_data["lengths"][0]), size=batch_size, replace=False)
 74 |         batch_state, batch_action = [self.states[0][i] for i in indexes], [self.action[0][i] for i in indexes]
 75 |         batch_state = np.array(batch_state)
 76 |         batch_action = np.array(batch_action)
 77 |         batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device)
 78 |         batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device)
 79 |         return batch_state, batch_action
 80 | 
 81 |     def all_sample(self,device):
 82 |         batch_size = 32
 83 |         indexes = np.random.choice(np.arange(self.all_state.shape[0]), size=batch_size, replace=False)
 84 |         batch_state, batch_action = [self.all_state[i] for i in indexes], [self.all_action[i] for i in indexes]
 85 |         batch_state = np.array(batch_state)
 86 |         batch_action = np.array(batch_action)
 87 |         batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device)
 88 |         batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device)
 89 |         return batch_state, batch_action
 90 |     def sqil_sample(self,device):
 91 |         batch_size = 32
 92 |         indexes = np.random.choice(np.arange(self.all_state.shape[0]), size=batch_size, replace=False)
 93 |         batch_state, batch_action ,batch_next_state, batch_done = [self.all_state[i] for i in indexes], [self.all_action[i] for i in indexes], [self.all_state[min(i+1,self.expert_data["lengths"][0]-1)] for i in indexes], [1.0 if i==self.expert_data["lengths"][0]-1 else 0.0 for i in indexes]
 94 |         batch_state = np.array(batch_state)
 95 |         batch_action = np.array(batch_action)
 96 |         batch_next_state = np.array(batch_next_state)
 97 |         batch_done = np.array(batch_done)
 98 |         batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device)
 99 |         batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device)
100 |         batch_next_state = torch.as_tensor(batch_next_state, dtype=torch.float, device=device)
101 |         batch_done = torch.as_tensor(batch_done, dtype=torch.float, device=device)
102 |         return batch_state, batch_action, batch_next_state,batch_done
103 |     def get_tra_num(self):
104 |         return self.all_data["dones"].shape[0]
105 | 
106 |     def select_subgoal(self, agent, args):
107 |         flag = False
108 |         test_s = torch.squeeze(torch.tensor(self.states).float(), dim=0)
109 |         if args.insert_subgoal_exp == True:
110 |             test_a = torch.squeeze(torch.tensor(self.action).float(), dim=0)
111 |         else:
112 |             test_a = agent.choose_action(test_s, sample=True)
113 |             test_a = torch.squeeze(torch.tensor(test_a).float(), dim=0)
114 |         UC = agent.getUC(test_s, test_a).squeeze()
115 |         target_uc = args.env.delta
116 |         base_uc = UC[self.goals[0]]
117 |         cnt = 1.0
118 |         for i in range(self.goals[0] + 1, self.expert_data["lengths"][0]):
119 |             if (args.soft_mean and base_uc * target_uc < UC[i]) or (
120 |                     (not args.soft_mean) and base_uc * target_uc / cnt < UC[i]):  # TODO more specific condition?:
121 |                 pos = np.array(i, dtype=np.int16).reshape((1, 1))
122 |                 # pos += random.randint(1,args.env.next_step)
123 |                 flag = self.insert_new_subgoal(pos)
124 |                 break
125 |             base_uc = base_uc * args.sigma + (1 - args.sigma) * UC[i] if args.soft_mean else base_uc + UC[i]
126 |             cnt += 1.0 if args.soft_mean else 0.0
127 |         print(self.goals[0])
128 |         return flag
129 | def get_reward(state):
130 |     return None
131 | 


--------------------------------------------------------------------------------
/dataset/rs_memory.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from collections import deque
 3 | import numpy as np
 4 | import random
 5 | import torch
 6 | 
 7 | 
 8 | class LazyFrames(object):
 9 |     def __init__(self, frames):
10 |         """This object ensures that common frames between the observations are only stored once.
11 |         It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
12 |         buffers.
13 |         This object should only be converted to numpy array before being passed to the model."""
14 |         self._frames = frames
15 |         self._out = None
16 | 
17 |     def _force(self):
18 |         if self._out is None:
19 |             self._out = np.concatenate(self._frames, axis=0)
20 |             self._frames = None
21 |         return self._out
22 | 
23 |     def __array__(self, dtype=None):
24 |         out = self._force()
25 |         if dtype is not None:
26 |             out = out.astype(dtype)
27 |         return out
28 | 
29 |     def __len__(self):
30 |         return len(self._force())
31 | 
32 |     def __getitem__(self, i):
33 |         return self._force()[i]
34 | 
35 |     def count(self):
36 |         frames = self._force()
37 |         return frames.shape[frames.ndim - 1]
38 | 
39 |     def frame(self, i):
40 |         return self._force()[..., i]
41 | 
42 | 
43 | class Memory(object):
44 |     def __init__(self, memory_size: int, seed: int = 0) -> None:
45 |         random.seed(seed)
46 |         self.memory_size = memory_size
47 |         self.buffer = deque(maxlen=self.memory_size)
48 | 
49 |     def add(self, experience) -> None:
50 |         self.buffer.append(experience)
51 | 
52 |     def size(self):
53 |         return len(self.buffer)
54 | 
55 |     def sample(self, batch_size: int, continuous: bool = True):
56 |         if batch_size > len(self.buffer):
57 |             batch_size = len(self.buffer)
58 |         if continuous:
59 |             rand = random.randint(0, len(self.buffer) - batch_size)
60 |             return [self.buffer[i] for i in range(rand, rand + batch_size)]
61 |         else:
62 |             indexes = np.random.choice(np.arange(len(self.buffer)), size=batch_size, replace=False)
63 |             return [self.buffer[i] for i in indexes]
64 | 
65 |     def clear(self):
66 |         self.buffer.clear()
67 | 
68 | 
69 |     def get_samples(self, batch_size, device):
70 |         batch = self.sample(batch_size, False)
71 | 
72 |         # batch_state, batch_next_state, batch_action, batch_re_obs, batch_reward1, batch_reward2, batch_done = zip(*batch)
73 |         batch_state, batch_next_state, batch_action, batch_re_obs, batch_reward1, batch_done = zip(*batch)
74 | 
75 |         # Scale obs for atari. TODO: Use flags
76 |         if isinstance(batch_state[0], LazyFrames):
77 |             # Use lazyframes for improved memory storage (same as original DQN)
78 |             batch_state = np.array(batch_state) / 255.0
79 |         if isinstance(batch_next_state[0], LazyFrames):
80 |             batch_next_state = np.array(batch_next_state) / 255.0
81 |         batch_state = np.array(batch_state)
82 |         batch_next_state = np.array(batch_next_state)
83 |         batch_action = np.array(batch_action)
84 | 
85 |         batch_state = torch.as_tensor(batch_state, dtype=torch.float, device=device)
86 |         batch_next_state = torch.as_tensor(batch_next_state, dtype=torch.float, device=device)
87 |         batch_action = torch.as_tensor(batch_action, dtype=torch.float, device=device)
88 |         batch_re_obs = torch.as_tensor(batch_re_obs, dtype=torch.float, device=device)
89 |         if batch_action.ndim == 1:
90 |             batch_action = batch_action.unsqueeze(1)
91 |         batch_reward1 = torch.as_tensor(batch_reward1, dtype=torch.float, device=device).unsqueeze(1)
92 |         # batch_reward2 = torch.as_tensor(batch_reward2, dtype=torch.float, device=device).unsqueeze(1)
93 |         batch_done = torch.as_tensor(batch_done, dtype=torch.float, device=device).unsqueeze(1)
94 | 
95 |         # return batch_state, batch_next_state, batch_action,batch_re_obs, batch_reward1, batch_reward2, batch_done
96 |         return batch_state, batch_next_state, batch_action,batch_re_obs, batch_reward1, batch_done
97 | 
98 | 


--------------------------------------------------------------------------------
/exp-highway-table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/exp-highway-table.png


--------------------------------------------------------------------------------
/exp-highway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/exp-highway.png


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/1.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/10.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/14.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/14.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/15.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/15.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/19.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/19.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/20.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/20.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/24.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/25.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/25.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/4.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/5.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/5.npy


--------------------------------------------------------------------------------
/expert_data/highway-fast-continues-v0-s35-d1/9.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/highway-fast-continues-v0-s35-d1/9.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/1.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/10.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/14.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/14.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/15.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/15.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/19.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/19.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/20.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/20.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/24.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/25.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/25.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/4.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/5.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/5.npy


--------------------------------------------------------------------------------
/expert_data/intersection-continues-v0-o1/9.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/intersection-continues-v0-o1/9.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/1.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/10.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/14.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/14.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/15.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/15.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/19.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/19.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/20.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/20.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/24.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/25.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/25.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/4.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/5.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/5.npy


--------------------------------------------------------------------------------
/expert_data/merge-continues-v0/9.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/merge-continues-v0/9.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/1.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/10.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/10.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/14.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/14.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/15.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/15.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/19.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/19.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/20.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/20.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/24.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/24.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/25.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/25.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/4.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/5.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/5.npy


--------------------------------------------------------------------------------
/expert_data/roundabout-continues-v1/9.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/expert_data/roundabout-continues-v1/9.npy


--------------------------------------------------------------------------------
/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/framework.png


--------------------------------------------------------------------------------
/highway_modify/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: build
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python 3.8
20 |       uses: actions/setup-python@v1
21 |       with:
22 |         python-version: 3.8
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         sudo pip install pygame
27 |         pip install -e .[deploy]
28 |     - name: Lint with flake8
29 |       run: |
30 |         pip install flake8
31 |         # stop the build if there are Python syntax errors or undefined names
32 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
33 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
34 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
35 |     - name: Test with pytest
36 |       run: |
37 |         pip install pytest
38 |         pip install pytest-cov
39 |         pytest --cov=./ --cov-report=xml
40 |     - name: Upload coverage to Codecov  
41 |       uses: codecov/codecov-action@v1
42 |       with:
43 |         file: ./coverage.xml
44 |         flags: unittests
45 |         fail_ci_if_error: true
46 | 


--------------------------------------------------------------------------------
/highway_modify/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | on:
 3 |   release:
 4 |     types:
 5 |       - published
 6 | 
 7 | jobs:
 8 |   release:
 9 |     name: Deploy release to PyPI
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout source
13 |         uses: actions/checkout@v1
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v1
16 |         with:
17 |           python-version: 3.8
18 |       - name: Install dependencies
19 |         run: pip install wheel
20 |       - name: Build package
21 |         run: python setup.py sdist bdist_wheel
22 |       - name: Upload package
23 |         uses: pypa/gh-action-pypi-publish@master
24 |         with:
25 |           user: __token__
26 |           password: ${{ secrets.pypi_password }}


--------------------------------------------------------------------------------
/highway_modify/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled sources
 2 | *.pyc
 3 | build/
 4 | dist/
 5 | 
 6 | # Setup
 7 | **.egg*
 8 | 
 9 | # Jupyter notebooks
10 | **.ipynb_checkpoints*
11 | 
12 | # Sphinx documentation
13 | _build
14 | 
15 | # Editor files
16 | .idea
17 | 
18 | # Test files
19 | .pytest_cache
20 | .cache
21 | 
22 | # Outputs
23 | **/out/*


--------------------------------------------------------------------------------
/highway_modify/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Leurent"
 5 |   given-names: "Edouard"
 6 | title: "An Environment for Autonomous Driving Decision-Making"
 7 | version: 1.4
 8 | date-released: 2018-05-01
 9 | url: "https://github.com/eleurent/highway-env"
10 | 


--------------------------------------------------------------------------------
/highway_modify/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Edouard Leurent
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/highway_modify/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 |   status:
3 |     project:
4 |       default:
5 |         informational: true
6 |     patch:
7 |       default:
8 |         informational: true
9 | 


--------------------------------------------------------------------------------
/highway_modify/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = ../highway-env
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | apidoc:
18 | 	sphinx-apidoc -o $(SOURCEDIR) -e ../highway_env
19 | 
20 | http:
21 | 	python -mwebbrowser "http://localhost:8000/"
22 | 	python -m http.server 8000
23 | 
24 | 
25 | # Catch-all target: route all unknown targets to Sphinx using the new
26 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
27 | %: Makefile
28 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
29 | 


--------------------------------------------------------------------------------
/highway_modify/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | pygments==2.4.1
 2 | sphinx_rtd_theme
 3 | sphinxcontrib-bibtex<2.0.0
 4 | jupyter-sphinx
 5 | gym
 6 | numpy
 7 | pygame
 8 | matplotlib
 9 | pandas
10 | 
11 | # =========
12 | # Optionals
13 | # =========
14 | pytest
15 | scipy
16 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/actions/index.rst:
--------------------------------------------------------------------------------
  1 | .. _actions:
  2 | 
  3 | .. py:module:: highway_env.envs.common.action
  4 | 
  5 | Actions
  6 | =============
  7 | 
  8 | Similarly to :ref:`observations <observations>`, **several types of actions** can be used in every environment. They are defined in the
  9 | :py:mod:`~highway_env.envs.common.action` module.
 10 | Each environment comes with a *default* action type, which can be changed or customised using
 11 | :ref:`environment configurations <configuration>`. For instance,
 12 | 
 13 | 
 14 | .. code-block:: python
 15 | 
 16 |     import gym
 17 |     import highway_env
 18 | 
 19 |     env = gym.make('highway-v0')
 20 |     env.configure({
 21 |         "action": {
 22 |             "type": "ContinuousAction"
 23 |         }
 24 |     })
 25 |     env.reset()
 26 | 
 27 | 
 28 | Continuous Actions
 29 | -------------------
 30 | 
 31 | The :py:class:`~highway_env.envs.common.action.ContinuousAction` type allows the agent to directly set the low-level
 32 | controls of the :ref:`vehicle kinematics <vehicle_kinematics>`, namely the throttle :math:`a` and steering angle :math:`\delta`.
 33 | 
 34 | .. note::
 35 |     The control of throttle and steering can be enabled or disabled through the
 36 |     :py:attr:`~highway_env.envs.common.action.ContinuousAction.longitudinal` and :py:attr:`~highway_env.envs.common.action.ContinuousAction.lateral`
 37 |     configurations, respectively. Thus, the action space can be either 1D or 2D.
 38 | 
 39 | Discrete Actions
 40 | -------------------
 41 | 
 42 | The :py:class:`~highway_env.envs.common.action.DiscreteAction` is a uniform quantization of the :py:class:`~highway_env.envs.common.action.ContinuousAction` above.
 43 | 
 44 | The :py:attr:`~highway_env.envs.common.action.DiscreteAction.actions_per_axis` parameter allows to set the quantization step. Similarly to continuous actions, the longitudinal and lateral axis can be enabled or disabled separately.
 45 | 
 46 | 
 47 | 
 48 | Discrete Meta-Actions
 49 | ----------------------
 50 | 
 51 | The :py:class:`~highway_env.envs.common.action.DiscreteMetaAction` type adds a layer of :ref:`speed and steering controllers <vehicle_controller>`
 52 | on top of the continuous low-level control, so that the ego-vehicle can automatically follow the road at a desired velocity.
 53 | Then, the available **meta-actions** consist in *changing the target lane and speed* that are used as setpoints for the low-level controllers.
 54 | 
 55 | The full corresponding action space is defined in :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.ACTIONS_ALL`
 56 | 
 57 | .. code-block:: python
 58 | 
 59 |     ACTIONS_ALL = {
 60 |             0: 'LANE_LEFT',
 61 |             1: 'IDLE',
 62 |             2: 'LANE_RIGHT',
 63 |             3: 'FASTER',
 64 |             4: 'SLOWER'
 65 |         }
 66 | 
 67 | Some of these actions might not be always available (lane changes at the edges of the roads, or accelerating/decelrating
 68 | beyond the maximum/minimum velocity), and the list of available actions can be accessed with :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.get_available_actions` method.
 69 | Taking an unavailable action is equivalent to taking the ``IDLE`` action.
 70 | 
 71 | Similarly to continuous actions, the longitudinal (speed changes) and lateral (lane changes) actions can be disabled separately
 72 | through the :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.longitudinal` and :py:attr:`~highway_env.envs.common.action.DiscreteMetaAction.lateral` parameters.
 73 | For instance, in the default configuration of the :ref:`intersection <environments_intersection>` environment, only the speed is controlled by the agent,
 74 | while the lateral control of the vehicle is automatically performed by a :ref:`steering controller <vehicle_controller>` to track a desired lane.
 75 | 
 76 | 
 77 | Manual control
 78 | ----------------
 79 | 
 80 | The environments can be used as a simulation:
 81 | 
 82 | .. code-block:: python
 83 | 
 84 |     env = gym.make("highway-v0")
 85 |     env.configure({
 86 |         "manual_control": True
 87 |     })
 88 |     env.reset()
 89 |     done = False
 90 |     while not done:
 91 |         env.step(env.action_space.sample())  # with manual control, these actions are ignored
 92 | 
 93 | The ego-vehicle is controlled by directional arrows keys, as defined in
 94 | :py:class:`~highway_env.envs.common.graphics.EventHandler`
 95 | 
 96 | API
 97 | --------
 98 | 
 99 | .. automodule:: highway_env.envs.common.action
100 |     :members:
101 | 
102 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/bibliography/biblio.bib:
--------------------------------------------------------------------------------
 1 | @article{Treiber2000,
 2 | 	author = {Treiber, Martin and Hennecke, Ansgar and Helbing, Dirk},
 3 | 	journal = {Physical Review E - Statistical Physics, Plasmas, Fluids, and Related Interdisciplinary Topics},
 4 | 	number = {2},
 5 | 	pages = {1805--1824},
 6 | 	title = {{Congested traffic states in empirical observations and microscopic simulations}},
 7 | 	volume = {62},
 8 | 	year = {2000}
 9 | }
10 | 
11 | @article{Kesting2007,
12 |     abstract = {A general model (minimizing overall braking induced by lane change, MOBIL) is proposed to derive lane-changing rules for discretionary and mandatory lane changes for a wide class of car-following models. Both the utility of a given lane and the risk associated with lane changes are determined In terms of longitudinal accelerations calculated with micro-scopic traffic models. This determination allows for the formulation of compact and general safety and incentive criteria for both symmetric and asymmetric passing rules. Moreover, anticipative elements and the crucial influence of velocity differences of these car-following models are automatically transferred to the lane-changing rules. Although the safety criterion prevents critical lane changes and collisions, the incentive criterion takes into account the advantages and disadvantages of other drivers associated with a lane change via the "politeness factor." The parameter allows one to vary the motivation for lane changing from purely egoistic to more cooperative driving behavior. This novel feature allows one first to prevent lane changes for a marginal advantage if they obstruct other drivers and second to let an aggressive driver induce the lane change of a slower driver ahead in order to no longer be obstructed. This phenomenon is common for asymmetric passing rules with a dedicated lane for passing. The model is applied to traffic simulations of cars and trucks with the Intelligent driver model as the underlying car-following model. An open system with an on-ramp is studied, and the resulting lanechanging rate is investigated as a function of the spatial coordinate as well as a function of traffic density.},
13 |     author = {Kesting, Arne and Treiber, Martin and Helbing, Dirk},
14 |     doi = {10.3141/1999-10},
15 |     isbn = {9780309104258},
16 |     issn = {03611981},
17 |     journal = {Transportation Research Record},
18 |     title = {{General lane-changing model MOBIL for car-following models}},
19 |     year = {2007}
20 | }
21 | 
22 | @article{Polack2017,
23 |     author = {Polack, Philip and Altch{\'{e}}, Florent and D'Andr{\'{e}}a-Novel, Brigitte},
24 |     isbn = {9781509048038},
25 |     journal = {IEEE Intelligent Vehicles Symposium},
26 |     pages = {6--8},
27 |     title = {{The Kinematic Bicycle Model : a Consistent Model for Planning Feasible Trajectories for Autonomous Vehicles ?}},
28 |     address   = {Los Angeles},
29 |     year = {2017}
30 | }
31 | 
32 | @article{Hren2008,
33 |     author = {Hren, Jean Fran{\c{c}}ois and Munos, R{\'{e}}mi},
34 |     journal = {Lecture Notes in Computer Science},
35 |     title = {{Optimistic planning of deterministic systems}},
36 |     year = {2008}
37 | }
38 | 
39 | @inproceedings{Andrychowicz2017,
40 |     abstract = {Dealing with sparse rewards is one of the biggest challenges in Reinforcement Learning (RL). We present a novel technique called Hindsight Experience Replay which allows sample-efficient learning from rewards which are sparse and binary and therefore avoid the need for complicated reward engineering. It can be combined with an arbitrary off-policy RL algorithm and may be seen as a form of implicit curriculum. We demonstrate our approach on the task of manipulating objects with a robotic arm. In particular, we run experiments on three different tasks: pushing, sliding, and pick-and-place, in each case using only binary rewards indicating whether or not the task is completed. Our ablation studies show that Hindsight Experience Replay is a crucial ingredient which makes training possible in these challenging environments. We show that our policies trained on a physics simulation can be deployed on a physical robot and successfully complete the task. The video presenting our experiments is available at https://goo.gl/SMrQnI.},
41 |     archivePrefix = {arXiv},
42 |     arxivId = {1707.01495},
43 |     author = {Andrychowicz, Marcin and Wolski, Filip and Ray, Alex and Schneider, Jonas and Fong, Rachel and Welinder, Peter and McGrew, Bob and Tobin, Josh and Abbeel, Pieter and Zaremba, Wojciech},
44 |     booktitle = {Advances in Neural Information Processing Systems},
45 |     eprint = {1707.01495},
46 |     issn = {10495258},
47 |     title = {{Hindsight experience replay}},
48 |     year = {2017}
49 | }
50 | 
51 | @article{Mnih2015,
52 | 	author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
53 | 	journal = {Nature},
54 | 	number = {7540},
55 | 	pages = {529--533},
56 | 	title = {{Human-level control through deep reinforcement learning}},
57 | 	volume = {518},
58 | 	year = {2015}
59 | }
60 | 
61 | @inproceedings{Leurent2019social,
62 |     title = {Social Attention for Autonomous Decision-Making in Dense Traffic},
63 |     author = {Edouard Leurent and Jean Mercat},
64 |     year = {2019},
65 |     booktitle = {Machine Learning for Autonomous Driving Workshop at the Thirty-third Conference on Neural Information Processing Systems (NeurIPS 2019)},
66 |     address = {Montreal, Canada},
67 |     month=dec,
68 |     eprint = {1911.12250},
69 |     archivePrefix = {arXiv},
70 |     primaryClass = {cs.SY}
71 | }
72 | 
73 | @misc{Qi2017pointnet,
74 |       title={PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation},
75 |       author={Charles R. Qi and Hao Su and Kaichun Mo and Leonidas J. Guibas},
76 |       year={2017},
77 |       eprint={1612.00593},
78 |       archivePrefix={arXiv},
79 |       primaryClass={cs.CV}
80 | }


--------------------------------------------------------------------------------
/highway_modify/docs/source/bibliography/index.rst:
--------------------------------------------------------------------------------
 1 | .. _bibliography:
 2 | 
 3 | Bibliography
 4 | ############
 5 | 
 6 | .. bibliography:: biblio.bib
 7 | 	:encoding: latin
 8 | 	:style: alpha
 9 | 	:all:
10 | 
11 | .. Fix to make sure bibliography appear when bibliography called in separate file
12 | .. latex+latin => latin
13 | .. :cited: => :all: see http://sphinxcontrib-bibtex.readthedocs.io/en/latest/usage.html#unresolved-citations-across-documents
14 | 
15 | .. :style: alpha, plain , unsrt, and unsrtalpha


--------------------------------------------------------------------------------
/highway_modify/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/stable/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('../..'))
 18 | 
 19 | 
 20 | # -- Path setup for jupyter-sphix --------------------------------------------
 21 | # See https://jupyter-sphinx.readthedocs.io/en/latest/#configuration-options
 22 | # BUT this does not seem to work with Anaconda on windows
 23 | import os
 24 | package_path = os.path.abspath('../..')
 25 | os.environ['PYTHONPATH'] = ':'.join(filter(None, (package_path, os.environ.get('PYTHONPATH', ''))))
 26 | 
 27 | 
 28 | # -- Project information -----------------------------------------------------
 29 | 
 30 | project = 'highway-env'
 31 | copyright = '2018, Edouard Leurent'
 32 | author = 'Edouard Leurent'
 33 | 
 34 | # The short X.Y version
 35 | version = ''
 36 | # The full version, including alpha/beta/rc tags
 37 | release = ''
 38 | 
 39 | 
 40 | # -- General configuration ---------------------------------------------------
 41 | 
 42 | # If your documentation needs a minimal Sphinx version, state it here.
 43 | #
 44 | # needs_sphinx = '1.0'
 45 | 
 46 | # Add any Sphinx extension module names here, as strings. They can be
 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 48 | # ones.
 49 | extensions = [
 50 |     'sphinx.ext.autodoc',
 51 |     'sphinx.ext.coverage',
 52 |     'sphinx.ext.githubpages',
 53 |     'sphinx.ext.viewcode',
 54 |     'sphinx.ext.autosectionlabel',
 55 |     'sphinxcontrib.bibtex',
 56 |     'jupyter_sphinx'
 57 | ]
 58 | 
 59 | autodoc_default_flags = ['members', 'private-members', 'undoc-members', 'special-members']
 60 | autodoc_member_order = 'bysource'
 61 | 
 62 | # Add any paths that contain templates here, relative to this directory.
 63 | templates_path = ['_templates']
 64 | 
 65 | # The suffix(es) of source filenames.
 66 | # You can specify multiple suffix as a list of string:
 67 | #
 68 | # source_suffix = ['.rst', '.md']
 69 | source_suffix = '.rst'
 70 | 
 71 | # The master toctree document.
 72 | master_doc = 'index'
 73 | 
 74 | # The language for content autogenerated by Sphinx. Refer to documentation
 75 | # for a list of supported languages.
 76 | #
 77 | # This is also used if you do content translation via gettext catalogs.
 78 | # Usually you set "language" from the command line for these cases.
 79 | language = None
 80 | 
 81 | # List of patterns, relative to source directory, that match files and
 82 | # directories to ignore when looking for source files.
 83 | # This pattern also affects html_static_path and html_extra_path .
 84 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 85 | 
 86 | # The name of the Pygments (syntax highlighting) style to use.
 87 | pygments_style = 'sphinx'
 88 | 
 89 | 
 90 | # -- Options for HTML output -------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | #
 95 | html_theme = 'sphinx_rtd_theme'
 96 | 
 97 | # Theme options are theme-specific and customize the look and feel of a theme
 98 | # further.  For a list of options available for each theme, see the
 99 | # documentation.
100 | #
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom static files (such as style sheets) here,
104 | # relative to this directory. They are copied after the builtin static files,
105 | # so a file named "default.css" will overwrite the builtin "default.css".
106 | html_static_path = ['_static']
107 | 
108 | # Custom sidebar templates, must be a dictionary that maps document names
109 | # to template names.
110 | #
111 | # The default sidebars (for documents that don't match any pattern) are
112 | # defined by theme itself.  Builtin themes are using these templates by
113 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
114 | # 'searchbox.html']``.
115 | #
116 | # html_sidebars = {}
117 | 
118 | 
119 | # -- Options for HTMLHelp output ---------------------------------------------
120 | 
121 | # Output file base name for HTML help builder.
122 | htmlhelp_basename = 'highway-envdoc'
123 | 
124 | 
125 | # -- Options for LaTeX output ------------------------------------------------
126 | 
127 | latex_elements = {
128 |     # The paper size ('letterpaper' or 'a4paper').
129 |     #
130 |     # 'papersize': 'letterpaper',
131 | 
132 |     # The font size ('10pt', '11pt' or '12pt').
133 |     #
134 |     # 'pointsize': '10pt',
135 | 
136 |     # Additional stuff for the LaTeX preamble.
137 |     #
138 |     # 'preamble': '',
139 | 
140 |     # Latex figure (float) alignment
141 |     #
142 |     # 'figure_align': 'htbp',
143 | }
144 | 
145 | # Grouping the document tree into LaTeX files. List of tuples
146 | # (source start file, target name, title,
147 | #  author, documentclass [howto, manual, or own class]).
148 | latex_documents = [
149 |     (master_doc, 'highway-env.tex', 'highway-env Documentation',
150 |      'Edouard Leurent', 'manual'),
151 | ]
152 | 
153 | 
154 | # -- Options for manual page output ------------------------------------------
155 | 
156 | # One entry per manual page. List of tuples
157 | # (source start file, name, description, authors, manual section).
158 | man_pages = [
159 |     (master_doc, 'highway-env', 'highway-env Documentation',
160 |      [author], 1)
161 | ]
162 | 
163 | 
164 | # -- Options for Texinfo output ----------------------------------------------
165 | 
166 | # Grouping the document tree into Texinfo files. List of tuples
167 | # (source start file, target name, title, author,
168 | #  dir menu entry, description, category)
169 | texinfo_documents = [
170 |     (master_doc, 'highway-env', 'highway-env Documentation',
171 |      author, 'highway-env', 'One line description of project.',
172 |      'Miscellaneous'),
173 | ]
174 | 
175 | 
176 | # -- Extension configuration -------------------------------------------------
177 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/index.rst:
--------------------------------------------------------------------------------
 1 | .. _dynamics:
 2 | 
 3 | Dynamics
 4 | ############
 5 | 
 6 | The dynamics of every environment describes how vehicles move and behave through time.
 7 | There are two important sections that affect these dynamics: the description of the roads, and the vehicle physics and behavioral models.
 8 | 
 9 | Roads
10 | ========
11 | 
12 | 
13 | A :py:class:`~highway_env.road.road.Road` is composed of a :py:class:`~highway_env.road.road.RoadNetwork` and a list
14 | of :py:class:`~highway_env.vehicle.kinematics.Vehicle`.
15 | 
16 | .. toctree::
17 |   :maxdepth: 1
18 | 
19 |   road/lane
20 |   road/road
21 |   road/regulation
22 | 
23 | Vehicles
24 | ========
25 | 
26 | .. toctree::
27 |   :maxdepth: 1
28 | 
29 |   vehicle/kinematics
30 |   vehicle/controller
31 |   vehicle/behavior


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/road/lane.rst:
--------------------------------------------------------------------------------
 1 | .. _road_lane:
 2 | 
 3 | Lane
 4 | #########
 5 | 
 6 | The geometry of lanes are described by :py:class:`~highway_env.road.lane.AbstractLane` objects, as a parametrized center line curve, providing a local coordinate system.
 7 | 
 8 | Conversions between the (longi, lat) coordinates in the Frenet frame and the global :math:`x,y` coordinates are ensured by the :py:meth:`~highway_env.road.lane.AbstractLane.position` and :py:meth:`~highway_env.road.lane.AbstractLane.local_coordinates` methods.
 9 | 
10 | The main implementations are:
11 | 
12 | - :py:class:`~highway_env.road.lane.StraightLane`
13 | - :py:class:`~highway_env.road.lane.SineLane`
14 | - :py:class:`~highway_env.road.lane.CircularLane`
15 | 
16 | API
17 | ***
18 | 
19 | .. automodule:: highway_env.road.lane
20 |     :members:
21 | 
22 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/road/regulation.rst:
--------------------------------------------------------------------------------
 1 | .. _road_regulation:
 2 | 
 3 | Road regulation
 4 | #########
 5 | 
 6 | A :py:class:`~highway_env.road.regulation.RegulatedRoad` is a :py:class:`~highway_env.road.road.Road` in which the behavior of vehicles take or give the right of way at an intersection based on the :py:attr:`~highway_env.road.lane.AbstractLane.priority` lane attribute.
 7 | 
 8 | On such a road, some rules are enforced:
 9 | 
10 | - most of the time, vehicles behave as usual;
11 | - however, they try to predict collisions with other vehicles through the :py:meth:`~highway_env.road.regulation.RegulatedRoad.is_conflict_possible` method;
12 | - when it is the case, right of way is arbitrated through the :py:meth:`~highway_env.road.regulation.RegulatedRoad.respect_priorities` method, and the yielding vehicle target velocity is set to 0 until the conflict is resolved.
13 | 
14 | API
15 | ***
16 | 
17 | .. automodule:: highway_env.road.regulation
18 |     :members:
19 | 
20 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/road/road.rst:
--------------------------------------------------------------------------------
 1 | .. _road_road:
 2 | 
 3 | Road
 4 | #########
 5 | 
 6 | 
 7 | A :py:class:`~highway_env.road.road.Road` is composed of a :py:class:`~highway_env.road.road.RoadNetwork` and a list
 8 | of :py:class:`~highway_env.vehicle.kinematics.Vehicle`.
 9 | 
10 | The :py:class:`~highway_env.road.road.RoadNetwork` describes the topology of the road infrastructure as a graph,
11 | where edges represent lanes and nodes represent intersections. It contains a :py:attr:`~highway_env.road.road.RoadNetwork.graph` dictionary which stores the :py:class:`~highway_env.road.lane.AbstractLane` geometries by their :py:class:`~highway_env.road.road.LaneIndex`.
12 | A :py:class:`~highway_env.road.road.LaneIndex` is a tuple containing:
13 | 
14 | - a string identifier of a starting position
15 | - a string identifier of an ending position
16 | - an integer giving the index of the described lane, in the (unique) road from the starting to the ending position
17 | 
18 | For instance, the geometry of the second lane in the road going from the ``"lab"`` to the ``"pub"`` can be obtained by:
19 | 
20 | .. code-block:: python
21 | 
22 |     lane = road.road_network.graph["lab"]["pub"][1]
23 | 
24 | The actual positions of the lab and the pub are defined in the ``lane```geometry object.
25 | 
26 | API
27 | *******
28 | 
29 | .. automodule:: highway_env.road.road
30 |     :members:
31 | 
32 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/vehicle/behavior.rst:
--------------------------------------------------------------------------------
 1 | .. _vehicle_behavior:
 2 | 
 3 | Behavior
 4 | ==========
 5 | 
 6 | .. py:module::highway_env.vehicle.behavior
 7 | 
 8 | Other simulated vehicles follow simple and realistic behaviors that dictate how they accelerate and
 9 | steer on the road. They are implemented in the :py:class:`~highway_env.vehicle.behavior.IDMVehicle` class.
10 | 
11 | Longitudinal Behavior
12 | ~~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | The acceleration of the vehicle is given by the *Intelligent Driver Model* (IDM) from :cite:`Treiber2000`.
15 | 
16 | .. math::
17 |         \dot{v} &= a\left[1-\left(\frac{v}{v_0}\right)^\delta - \left(\frac{d^*}{d}\right)^2\right] \\
18 |         d^* &= d_0 + Tv + \frac{v\Delta v}{2\sqrt{ab}} \\
19 | 
20 | where :math:`v` is the vehicle velocity, :math:`d` is the distance to its front vehicle.
21 | The dynamics are parametrised by:
22 | 
23 | - :math:`v_0` the desired velocity, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.target_velocity`
24 | - :math:`T` the desired time gap, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.TIME_WANTED`
25 | - :math:`d_0` the jam distance, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.DISTANCE_WANTED`
26 | - :math:`a,\,b` the maximum acceleration and deceleration, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.COMFORT_ACC_MAX` and :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.COMFORT_ACC_MIN`
27 | - :math:`\delta` the velocity exponent, as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.DELTA`
28 | 
29 | It is implemented in :py:meth:`~highway_env.vehicle.behavior.IDMVehicle.acceleration` method.
30 | 
31 | Lateral Behavior
32 | ~~~~~~~~~~~~~~~~
33 | 
34 | The discrete lane change decisions are given by the *Minimizing Overall Braking Induced by Lane change* (MOBIL) model from :cite:`Kesting2007`.
35 | According to this model, a vehicle decides to change lane when:
36 | 
37 | - it is **safe** (do not cut-in):
38 | 
39 | .. math::
40 |       \tilde{a}_n \geq - b_\text{safe};
41 | 
42 | - there is an **incentive** (for the ego-vehicle and possibly its followers):
43 | 
44 | .. math::
45 |       \underbrace{\tilde{a}_c - a_c}_{\text{ego-vehicle}} + p\left(\underbrace{\tilde{a}_n - a_n}_{\text{new follower}} + \underbrace{\tilde{a}_o - a_o}_{\text{old follower}}\right) \geq \Delta a_\text{th},
46 | 
47 | where
48 | 
49 | - :math:`c` is the center (ego-) vehicle, :math:`o` is its old follower *before* the lane change, and :math:`n` is its new follower *after* the lane change
50 | - :math:`a, \tilde{a}` are the acceleration of the vehicles *before* and *after* the lane change, respectively.
51 | - :math:`p` is a politeness coefficient, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.POLITENESS`
52 | - :math:`\Delta a_\text{th}` the acceleration gain required to trigger a lane change, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.LANE_CHANGE_MIN_ACC_GAIN`
53 | - :math:`b_\text{safe}` the maximum braking imposed to a vehicle during a cut-in, implemented as :py:attr:`~highway_env.vehicle.behavior.IDMVehicle.LANE_CHANGE_MAX_BRAKING_IMPOSED`
54 | 
55 | 
56 | It is implemented in the :py:meth:`~highway_env.vehicle.behavior.IDMVehicle.mobil` method.
57 | 
58 | .. note::
59 |     In the :py:class:`~highway_env.vehicle.behavior.LinearVehicle` class, the longitudinal and lateral behaviours
60 |     are approximated as linear weightings of several features, such as the distance and speed difference to the leading
61 |     vehicle.
62 | 
63 | 
64 | 
65 | API
66 | ***
67 | 
68 | .. automodule:: highway_env.vehicle.behavior
69 |     :members:
70 | 
71 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/vehicle/controller.rst:
--------------------------------------------------------------------------------
 1 | .. _vehicle_controller:
 2 | 
 3 | Control
 4 | ========
 5 | 
 6 | The :py:class:`~highway_env.vehicle.controller.ControlledVehicle` class implements a low-level controller on top of a :py:class:`~highway_env.vehicle.kinematics.Vehicle`, allowing to track a given target speed and follow a target lane.
 7 | The controls are computed when calling the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.act` method.
 8 | 
 9 | Longitudinal controller
10 | -----------------------
11 | 
12 | The longitudinal controller is a simple proportional controller:
13 | 
14 | .. math::
15 |     a = K_p(v_r - v),
16 | 
17 | where
18 | 
19 | - :math:`a` is the vehicle acceleration (throttle);
20 | - :math:`v` is the vehicle velocity;
21 | - :math:`v_r` is the reference velocity;
22 | - :math:`K_p` is the controller proportional gain, implemented as :py:attr:`~highway_env.vehicle.controller.ControlledVehicle.KP_A`.
23 | 
24 | It is implemented in the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.speed_control` method.
25 | 
26 | Lateral controller
27 | -----------------------
28 | 
29 | The lateral controller is a simple proportional-derivative controller, combined with some non-linearities that invert those of the :ref:`kinematics model <vehicle_kinematics>`.
30 | 
31 | Position control
32 | ~~~~~~~~~~~~~~~~
33 | 
34 | .. math::
35 |     v_{\text{lat},r} &= -K_{p,\text{lat}} \Delta_{\text{lat}}, \\
36 |     \Delta \psi_{r} &= \arcsin \left(\frac{v_{\text{lat},r}}{v}\right),
37 | 
38 | Heading control
39 | ~~~~~~~~~~~~~~~~
40 | 
41 | .. math::
42 |     \psi_r &= \psi_L + \Delta \psi_{r}, \\
43 |     \dot{\psi}_r &= K_{p,\psi} (\psi_r - \psi), \\
44 |     \delta &= \arcsin \left(\frac{1}{2} \frac{l}{v} \dot{\psi}_r\right), \\
45 | 
46 | where
47 | 
48 | - :math:`\Delta_{\text{lat}}` is the lateral position of the vehicle with respect to the lane center-line;
49 | - :math:`v_{\text{lat},r}` is the lateral velocity command;
50 | - :math:`\Delta \psi_{r}` is a heading variation to apply the lateral velocity command;
51 | - :math:`\psi_L` is the lane heading (at some lookahead position to anticipate turns);
52 | - :math:`\psi_r` is the target heading to follow the lane heading and position;
53 | - :math:`\dot{\psi}_r` is the yaw rate command;
54 | - :math:`\delta` is the front wheels angle control;
55 | - :math:`K_{p,\text{lat}}` and :math:`K_{p,\psi}` are the position and heading control gains.
56 | 
57 | It is implemented in the :py:meth:`~highway_env.vehicle.controller.ControlledVehicle.steering_control` method.
58 | 
59 | API
60 | ----
61 | 
62 | .. automodule:: highway_env.vehicle.controller
63 |     :members:
64 | 
65 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/dynamics/vehicle/kinematics.rst:
--------------------------------------------------------------------------------
 1 | .. _vehicle_kinematics:
 2 | 
 3 | .. py:module::highway_env.vehicle.kinematics
 4 | 
 5 | Kinematics
 6 | ==================
 7 | 
 8 | The vehicles kinematics are represented in the :py:class:`~highway_env.vehicle.kinematics.Vehicle` class by the *Kinematic Bicycle Model* :cite:`Polack2017`.
 9 | 
10 | .. math::
11 |         \dot{x}&=v\cos(\psi+\beta) \\
12 |         \dot{y}&=v\sin(\psi+\beta) \\
13 |         \dot{v}&=a \\
14 |         \dot{\psi}&=\frac{v}{l}\sin\beta \\
15 |         \beta&=\tan^{-1}(1/2\tan\delta), \\
16 | 
17 | where
18 | 
19 | - :math:`(x, y)` is the vehicle position;
20 | - :math:`v` its forward speed;
21 | - :math:`\psi` its heading;
22 | - :math:`a` is the acceleration command;
23 | - :math:`\beta` is the slip angle at the center of gravity;
24 | - :math:`\delta` is the front wheel angle used as a steering command.
25 | 
26 | These calculations appear in the :py:meth:`~highway_env.vehicle.kinematics.Vehicle.step` method.
27 | 
28 | API
29 | ***
30 | 
31 | .. automodule:: highway_env.vehicle.kinematics
32 |     :members:
33 | 
34 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/highway.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_highway:
 2 | 
 3 | .. currentmodule:: highway_env.envs.highway_env
 4 | 
 5 | Highway
 6 | **********
 7 | 
 8 | In this task, the ego-vehicle is driving on a multilane highway populated with other vehicles. The agent's objective is to reach a high speed while avoiding collisions with neighbouring vehicles. Driving on the right side of the road is also rewarded.
 9 | 
10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway.gif
11 |    :width: 80%
12 |    :align: center
13 |    :name: fig:highway_env
14 | 
15 | Usage
16 | ==========
17 | 
18 | .. code-block:: python
19 | 
20 |     env = gym.make("highway-v0")
21 | 
22 | 
23 | Default configuration
24 | =====================
25 | 
26 | .. code-block:: python
27 | 
28 |     {
29 |         "observation": {
30 |             "type": "Kinematics"
31 |         },
32 |         "action": {
33 |             "type": "DiscreteMetaAction",
34 |         },
35 |         "lanes_count": 4,
36 |         "vehicles_count": 50,
37 |         "duration": 40,  # [s]
38 |         "initial_spacing": 2,
39 |         "collision_reward": -1,  # The reward received when colliding with a vehicle.
40 |         "reward_speed_range": [20, 30],  # [m/s] The reward for high speed is mapped linearly from this range to [0, HighwayEnv.HIGH_SPEED_REWARD].
41 |         "simulation_frequency": 15,  # [Hz]
42 |         "policy_frequency": 1,  # [Hz]
43 |         "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle",
44 |         "screen_width": 600,  # [px]
45 |         "screen_height": 150,  # [px]
46 |         "centering_position": [0.3, 0.5],
47 |         "scaling": 5.5,
48 |         "show_trajectories": False,
49 |         "render_agent": True,
50 |         "offscreen_rendering": False
51 |     }
52 | 
53 | More specifically, it is defined in:
54 | 
55 | .. automethod:: HighwayEnv.default_config
56 | 
57 | Faster variant
58 | =====================
59 | 
60 | A faster (x15 speedup) variant is also available with:
61 | 
62 | .. code-block:: python
63 | 
64 |     env = gym.make("highway-fast-v0")
65 | 
66 | 
67 | The details of this variant are described `here <https://github.com/eleurent/highway-env/issues/223>`_.
68 | 
69 | API
70 | =====
71 | 
72 | .. autoclass:: HighwayEnv
73 |     :members:
74 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/index.rst:
--------------------------------------------------------------------------------
 1 | .. _environments:
 2 | 
 3 | The Environments
 4 | ############
 5 | 
 6 | .. toctree::
 7 |   :maxdepth: 1
 8 | 
 9 |   highway
10 |   merge
11 |   roundabout
12 |   parking
13 |   intersection
14 |   racetrack


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/intersection.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_intersection:
 2 | 
 3 | .. currentmodule:: highway_env.envs.intersection_env
 4 | 
 5 | Intersection
 6 | ************
 7 | 
 8 | An intersection negotiation task with dense traffic.
 9 | 
10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/intersection-env.gif
11 |    :width: 80%
12 |    :align: center
13 |    :name: fig:intersection_env
14 | 
15 | 
16 | .. warning::
17 | 
18 |     It's quite hard to come up with good decentralized behaviors for other agents to avoid each other. Of course, this
19 |     could be achieved by sophisticated centralized schedulers, or traffic lights, but to keep things simple a
20 |     :ref:`rudimentary collision prediction <road_regulation>` was added in the behaviour of other vehicles.
21 | 
22 |     This simple system sometime fails which results in collisions, blocking the way for the ego-vehicle.
23 |     I figured it was fine for my own purpose, since it did not happen too often and it's reasonable to expect
24 |     the ego-vehicle to simply wait the end of episode in these situations. But I agree that it is not ideal,
25 |     and I welcome any contribution on that matter.
26 | 
27 | Usage
28 | ==========
29 | 
30 | .. code-block:: python
31 | 
32 |     env = gym.make("intersection-v0")
33 | 
34 | 
35 | Default configuration
36 | =====================
37 | 
38 | .. code-block:: python
39 | 
40 |     {
41 |         "observation": {
42 |             "type": "Kinematics",
43 |             "vehicles_count": 15,
44 |             "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
45 |             "features_range": {
46 |                 "x": [-100, 100],
47 |                 "y": [-100, 100],
48 |                 "vx": [-20, 20],
49 |                 "vy": [-20, 20],
50 |             },
51 |             "absolute": True,
52 |             "flatten": False,
53 |             "observe_intentions": False
54 |         },
55 |         "action": {
56 |             "type": "DiscreteMetaAction",
57 |             "longitudinal": False,
58 |             "lateral": True
59 |         },
60 |         "duration": 13,  # [s]
61 |         "destination": "o1",
62 |         "initial_vehicle_count": 10,
63 |         "spawn_probability": 0.6,
64 |         "screen_width": 600,
65 |         "screen_height": 600,
66 |         "centering_position": [0.5, 0.6],
67 |         "scaling": 5.5 * 1.3,
68 |         "collision_reward": IntersectionEnv.COLLISION_REWARD,
69 |         "normalize_reward": False
70 |     }
71 | 
72 | More specifically, it is defined in:
73 | 
74 | .. automethod:: IntersectionEnv.default_config
75 | 
76 | API
77 | =====
78 | 
79 | .. autoclass:: IntersectionEnv
80 |     :members:
81 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/merge.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_merge:
 2 | 
 3 | .. currentmodule:: highway_env.envs.merge_env
 4 | 
 5 | Merge
 6 | **********
 7 | 
 8 | In this task, the ego-vehicle starts on a main highway but soon approaches a road junction with incoming vehicles on the access ramp. The agent's objective is now to maintain a high speed while making room for the vehicles so that they can safely merge in the traffic.
 9 | 
10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/merge-env.gif
11 |    :width: 80%
12 |    :align: center
13 |    :name: fig:merge_env
14 | 
15 | Usage
16 | ==========
17 | 
18 | .. code-block:: python
19 | 
20 |     env = gym.make("merge-v0")
21 | 
22 | 
23 | Default configuration
24 | =====================
25 | 
26 | .. code-block:: python
27 | 
28 |     {
29 |         "observation": {
30 |             "type": "TimeToCollision"
31 |         },
32 |         "action": {
33 |             "type": "DiscreteMetaAction"
34 |         },
35 |         "simulation_frequency": 15,  # [Hz]
36 |         "policy_frequency": 1,  # [Hz]
37 |         "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle",
38 |         "screen_width": 600,  # [px]
39 |         "screen_height": 150,  # [px]
40 |         "centering_position": [0.3, 0.5],
41 |         "scaling": 5.5,
42 |         "show_trajectories": False,
43 |         "render_agent": True,
44 |         "offscreen_rendering": False
45 |     }
46 | 
47 | More specifically, it is defined in:
48 | 
49 | .. automethod:: MergeEnv.default_config
50 | 
51 | API
52 | =====
53 | 
54 | .. autoclass:: MergeEnv
55 |     :members:
56 | 
57 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/parking.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_parking:
 2 | 
 3 | .. currentmodule:: highway_env.envs.parking_env
 4 | 
 5 | Parking
 6 | **********
 7 | 
 8 | A goal-conditioned continuous control task in which the ego-vehicle must park in a given space with the appropriate heading.
 9 | 
10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/parking-env.gif
11 |    :width: 80%
12 |    :align: center
13 |    :name: fig:parking_env
14 | 
15 | Usage
16 | ==========
17 | 
18 | .. code-block:: python
19 | 
20 |     env = gym.make("parking-v0")
21 | 
22 | 
23 | Default configuration
24 | =====================
25 | 
26 | .. code-block:: python
27 | 
28 |     {
29 |         "observation": {
30 |             "type": "KinematicsGoal",
31 |             "features": ['x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'],
32 |             "scales": [100, 100, 5, 5, 1, 1],
33 |             "normalize": False
34 |         },
35 |         "action": {
36 |             "type": "ContinuousAction"
37 |         },
38 |         "simulation_frequency": 15,
39 |         "policy_frequency": 5,
40 |         "screen_width": 600,
41 |         "screen_height": 300,
42 |         "centering_position": [0.5, 0.5],
43 |         "scaling": 7
44 |         "show_trajectories": False,
45 |         "render_agent": True,
46 |         "offscreen_rendering": False
47 |     }
48 | 
49 | More specifically, it is defined in:
50 | 
51 | .. automethod:: ParkingEnv.default_config
52 | 
53 | API
54 | =====
55 | 
56 | .. autoclass:: ParkingEnv
57 |     :members:
58 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/racetrack.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_racetrack:
 2 | 
 3 | .. currentmodule:: highway_env.envs.racetrack_env
 4 | 
 5 | Racetrack
 6 | **********
 7 | 
 8 | A continuous control environment, where the he agent has to follow the tracks while avoiding collisions with other vehicles.
 9 | 
10 | Credits and many thanks to `@supperted825 <https://github.com/supperted825>`_ for the `idea and initial implementation <https://github.com/eleurent/highway-env/issues/231>`_.
11 | 
12 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/racetrack-env.gif
13 |    :width: 80%
14 |    :align: center
15 |    :name: fig:racetrack_env
16 | 
17 | Usage
18 | ==========
19 | 
20 | .. code-block:: python
21 | 
22 |     env = gym.make("racetrack-v0")
23 | 
24 | 
25 | Default configuration
26 | =====================
27 | 
28 | .. code-block:: python
29 | 
30 |     {
31 |         "observation": {
32 |             "type": "OccupancyGrid",
33 |             "features": ['presence', 'on_road'],
34 |             "grid_size": [[-18, 18], [-18, 18]],
35 |             "grid_step": [3, 3],
36 |             "as_image": False,
37 |             "align_to_vehicle_axes": True
38 |         },
39 |         "action": {
40 |             "type": "ContinuousAction",
41 |             "longitudinal": False,
42 |             "lateral": True
43 |         },
44 |         "simulation_frequency": 15,
45 |         "policy_frequency": 5,
46 |         "duration": 300,
47 |         "collision_reward": -1,
48 |         "lane_centering_cost": 4,
49 |         "action_reward": -0.3,
50 |         "controlled_vehicles": 1,
51 |         "other_vehicles": 1,
52 |         "screen_width": 600,
53 |         "screen_height": 600,
54 |         "centering_position": [0.5, 0.5],
55 |         "scaling": 7
56 |         "show_trajectories": False,
57 |         "render_agent": True,
58 |         "offscreen_rendering": False
59 |     }
60 | 
61 | More specifically, it is defined in:
62 | 
63 | .. automethod:: RacetrackEnv.default_config
64 | 
65 | API
66 | =====
67 | 
68 | .. autoclass:: RacetrackEnv
69 |     :members:
70 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/environments/roundabout.rst:
--------------------------------------------------------------------------------
 1 | .. _environments_roundabout:
 2 | 
 3 | .. currentmodule:: highway_env.envs.roundabout_env
 4 | 
 5 | Roundabout
 6 | **********
 7 | 
 8 | In this task, the ego-vehicle if approaching a roundabout with flowing traffic. It will follow its planned route automatically, but has to handle lane changes and longitudinal control to pass the roundabout as fast as possible while avoiding collisions.
 9 | 
10 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/roundabout-env.gif
11 |    :width: 80%
12 |    :align: center
13 |    :name: fig:roundabout_env
14 | 
15 | Usage
16 | ==========
17 | 
18 | .. code-block:: python
19 | 
20 |     env = gym.make("roundabout-v0")
21 | 
22 | 
23 | Default configuration
24 | =====================
25 | 
26 | .. code-block:: python
27 | 
28 |     {
29 |         "observation": {
30 |             "type": "TimeToCollision"
31 |         },
32 |         "action": {
33 |             "type": "DiscreteMetaAction"
34 |         },
35 |         "incoming_vehicle_destination": None,
36 |         "duration": 11
37 |         "simulation_frequency": 15,  # [Hz]
38 |         "policy_frequency": 1,  # [Hz]
39 |         "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle",
40 |         "screen_width": 600,  # [px]
41 |         "screen_height": 600,  # [px]
42 |         "centering_position": [0.5, 0.6],
43 |         "scaling": 5.5,
44 |         "show_trajectories": False,
45 |         "render_agent": True,
46 |         "offscreen_rendering": False
47 |     }
48 | 
49 | More specifically, it is defined in:
50 | 
51 | .. automethod:: RoundaboutEnv.default_config
52 | 
53 | API
54 | =====
55 | 
56 | .. autoclass:: RoundaboutEnv
57 |     :members:
58 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/faq.rst:
--------------------------------------------------------------------------------
 1 | .. _faq:
 2 | 
 3 | =============================
 4 | Frequently Asked Questions
 5 | =============================
 6 | 
 7 | 
 8 | This is a list of Frequently Asked Questions about highway-env.  Feel free to
 9 | suggest new entries!
10 | 
11 | I try to train an agent using the Kinematics Observation and an MLP model, but the resulting policy is not optimal. Why?
12 |     I also tend to get reasonable but sub-optimal policies using this observation-model pair.
13 |     In :cite:`Leurent2019social`, we argued that a possible reason is that the MLP output depends on the order of
14 |     vehicles in the observation. Indeed, if the agent revisits a given scene but observes vehicles described in a different
15 |     order, it will see it as a novel state and will not be able to reuse past information. Thus, the agent struggles to
16 |     make use of its observation.
17 | 
18 |     This can be addressed in two ways:
19 | 
20 |     * - Change the *model*, to use a permutation-invariant architecture which will not be sensitive to the vehicles order, such as *e.g.* :cite:`Qi2017pointnet` or :cite:`Leurent2019social`.
21 |     This example is implemented `here (DQN) <https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/intersection_social_dqn.ipynb>`_ or `here (SB3's PPO) <https://github.com/eleurent/highway-env/blob/master/scripts/sb3_highway_ppo_transformer.py>`_.
22 | 
23 |     * - Change the *observation*. For example, the :ref:`Grayscale Image` does not depend on an ordering. In this case, a CNN model is more suitable than an MLP model.
24 |     This example is implemented `here (SB3's DQN) <https://github.com/eleurent/highway-env/blob/master/scripts/sb3_highway_dqn_cnn.py>`_.
25 | 
26 | 
27 | My videos are too fast / have a low framerate.
28 |     This is because in openai/gym, a single video frame is generated at each call of ``env.step(action)``. However, in highway-env, the policy typically runs at a low-level frequency (e.g. 1 Hz) so that a long action (*e.g.* change lane) actually corresponds to several (typically, 15) simulation frames.
29 |     In order to also render these intermediate simulation frames, the following should be done:
30 | 
31 | .. code-block:: python
32 | 
33 |   import gym
34 |   import highway_env
35 | 
36 |   # Wrap the env by a RecordVideo wrapper
37 |   env = gym.make("highway-v0")
38 |   env = RecordVideo(env, video_folder="run",
39 |                 episode_trigger=lambda e: True)  # record all episodes
40 | 
41 |   # Provide the video recorder to the wrapped environment
42 |   # so it can send it intermediate simulation frames.
43 |   env.unwrapped.set_record_video_wrapper(env)
44 | 
45 |   # Record a video as usual
46 |   obs = env.reset()
47 |   done = False:
48 |   while not done:
49 |       action = env.action_space.sample()
50 |       obs, reward, done, info = env.step(action)
51 |       env.render()
52 |   env.close()


--------------------------------------------------------------------------------
/highway_modify/docs/source/graphics/index.rst:
--------------------------------------------------------------------------------
 1 | .. _graphics:
 2 | 
 3 | .. py:currentmodule::highway_env.envs.common.graphics
 4 | 
 5 | Graphics
 6 | =============
 7 | 
 8 | Environment rendering is done with `pygame <https://www.pygame.org/news>`_, which must be :ref:`installed separately <installation>`.
 9 | 
10 | A window is created at the first call of ``env.render()``. Its dimensions can be configured:
11 | 
12 | .. code-block:: python
13 | 
14 |     env = gym.make("roundabout-v0")
15 |     env.configure({
16 |         "screen_width": 640,
17 |         "screen_height": 480
18 |     })
19 |     env.reset()
20 |     env.render()
21 | 
22 | World surface
23 | --------------
24 | 
25 | The simulation is rendered in a :py:class:`~highway_env.envs.common.graphics.RoadSurface` pygame surface, which defines the location and zoom of the rendered location.
26 | By default, the rendered area is always centered on the ego-vehicle.
27 | Its initial scale and offset can be set with the ``"scaling"`` and ``"centering_position"`` configurations, and can also be
28 | updated during simulation using the O,L keys and K,M keys, respectively.
29 | 
30 | Scene graphics
31 | ---------------
32 | 
33 | - Roads are rendered in the :py:class:`~highway_env.road.graphics.RoadGraphics` class.
34 | - Vehicles are rendered in the :py:class:`~highway_env.vehicle.graphics.VehicleGraphics` class.
35 | 
36 | 
37 | API
38 | -----------
39 | 
40 | 
41 | .. automodule:: highway_env.envs.common.graphics
42 |     :members:
43 | 
44 | .. automodule:: highway_env.road.graphics
45 |     :members:
46 | 
47 | .. automodule:: highway_env.vehicle.graphics
48 |     :members:


--------------------------------------------------------------------------------
/highway_modify/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. highway-env documentation master file, created by
 2 |    sphinx-quickstart on Wed Feb 28 15:51:44 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | 
 7 | .. |Build Status| image:: https://github.com/eleurent/highway-env/workflows/build/badge.svg
 8 |    :target: https://github.com/eleurent/highway-env/workflows/build/
 9 | 
10 | .. |Documentation Status| image:: https://readthedocs.org/projects/highway-env/badge/?version=latest
11 |    :target: https://highway-env.readthedocs.io/en/latest/
12 | 
13 | .. |Downloads| image:: https://img.shields.io/pypi/dm/highway-env
14 |    :target: https://pypi.org/project/highway-env/
15 | 
16 | .. |Codacy Status| image:: https://api.codacy.com/project/badge/Grade/63847d9328f64fce9c137b03fcafcc27
17 |   :target: https://app.codacy.com/manual/eleurent/highway-env?utm_source=github.com&utm_medium=referral&utm_content=eleurent/highway-env&utm_campaign=Badge_Grade_Dashboard
18 | 
19 | .. |Coverage Status| image:: https://codecov.io/gh/eleurent/highway-env/branch/master/graph/badge.svg
20 |   :target: https://codecov.io/gh/eleurent/highway-env
21 | 
22 | .. |Contributors| image:: https://img.shields.io/github/contributors/eleurent/highway-env
23 |   :target: https://github.com/eleurent/highway-env/graphs/contributors
24 | 
25 | .. |Environments| image:: https://img.shields.io/github/search/eleurent/highway-env/import%20filename:*_env%20path:highway_env/envs?label=environments
26 |   :target: https://highway-env.readthedocs.io/en/latest/quickstart.html#all-the-environments
27 | 
28 | 
29 | |Build Status| |Documentation Status| |Downloads| |Codacy Status| |Coverage Status| |Contributors| |Environments|
30 | 
31 | Welcome to `highway-env <https://github.com/eleurent/highway-env>`_'s documentation!
32 | ====================================================================================
33 | 
34 | This project gathers a collection of environment for *decision-making* in Autonomous Driving.
35 | 
36 | The purpose of this documentation is to provide:
37 | 
38 | 1. a :ref:`quick start guide <quickstart>` describing the environments and their customization options;
39 | 2. a :ref:`detailed description <user_guide>` of the nuts and bolts of the project, and how *you* can contribute.
40 | 
41 | .. _index_how_to_cite_this_work:
42 | 
43 | How to cite this work?
44 | ======================
45 | 
46 | If you use this package, please consider citing it with this piece of
47 | BibTeX:
48 | 
49 | .. code:: bibtex
50 | 
51 |   @misc{highway-env,
52 |     author = {Leurent, Edouard},
53 |     title = {An Environment for Autonomous Driving Decision-Making},
54 |     year = {2018},
55 |     publisher = {GitHub},
56 |     journal = {GitHub repository},
57 |     howpublished = {\url{https://github.com/eleurent/highway-env}},
58 |   }
59 | 
60 | Documentation contents
61 | ======================
62 | 
63 | .. toctree::
64 |   :maxdepth: 2
65 | 
66 |   installation
67 |   quickstart
68 |   user_guide
69 |   faq
70 |   bibliography/index
71 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | Prerequisites
 7 | -------------
 8 | 
 9 | This project requires python3 (>=3.5)
10 | 
11 | The graphics require the installation of `pygame <https://www.pygame.org/news>`_, which itself has dependencies that must be installed manually.
12 | 
13 | 
14 | Ubuntu
15 | ~~~~~~
16 | 
17 | .. code-block:: bash
18 | 
19 |     sudo apt-get update -y
20 |     sudo apt-get install -y python-dev libsdl-image1.2-dev libsdl-mixer1.2-dev
21 |         libsdl-ttf2.0-dev libsdl1.2-dev libsmpeg-dev python-numpy subversion libportmidi-dev
22 |         ffmpeg libswscale-dev libavformat-dev libavcodec-dev libfreetype6-dev gcc
23 | 
24 | Windows 10
25 | ~~~~~~~~~~
26 | 
27 | We recommend using `Anaconda <https://conda.io/docs/user-guide/install/windows.html>`_.
28 | 
29 | 
30 | Stable release
31 | ---------------------
32 | To install the latest stable version:
33 | 
34 | .. code-block:: bash
35 | 
36 |     pip install highway-env
37 | 
38 | Development version
39 | ---------------------
40 | 
41 | To install the current development version:
42 | 
43 | .. code-block:: bash
44 | 
45 |     pip install --user git+https://github.com/eleurent/highway-env
46 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/make_your_own.rst:
--------------------------------------------------------------------------------
 1 | .. _make_your_own:
 2 | 
 3 | Make your own environment
 4 | ==========================
 5 | 
 6 | Here are the steps required to create a new environment.
 7 | 
 8 | .. note::
 9 |     Pull requests are welcome!
10 | 
11 | Set up files
12 | ------------
13 | 
14 | 1. Create a new ``your_env.py`` file in ``highway_env/envs/``
15 | 2. Define a class YourEnv, that must inherit from :py:class:`~highway_env.envs.common.abstract.AbstractEnv`
16 | 
17 | This class provides several useful functions:
18 | 
19 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.default_config` method, that provides a default configuration dictionary that can be overloaded.
20 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.define_spaces` method, that gives access to a choice of observation and action types, set from the environment configuration
21 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.step` method, which executes the desired actions (at policy frequency) and simulate the environment (at simulation frequency)
22 | * A :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.render` method, which renders the environment.
23 | 
24 | Create the scene
25 | ------------------
26 | 
27 | The first step is to create a :py:class:`~highway_env.road.road.RoadNetwork` that describes the geometry and topology of
28 | roads and lanes in the scene.
29 | This should be achieved in a ``YourEnv._make_road()`` method, called from ``YourEnv.reset()`` to set the ``self.road`` field.
30 | 
31 | See :ref:`Roads <road_road>` for reference, and existing environments as examples.
32 | 
33 | Create the vehicles
34 | ------------------
35 | 
36 | The second step is to populate your road network with vehicles. This should be achieved in a ``YourEnv._make_road()``
37 | method, called from ``YourEnv.reset()`` to set the ``self.road.vehicles`` list of :py:class:`~highway_env.vehicle.kinematics.Vehicle`.
38 | 
39 | First, define the controlled ego-vehicle by setting ``self.vehicle``. The class of controlled vehicle depends on the
40 | choice of action type, and can be accessed as ``self.action_type.vehicle_class``.
41 | Other vehicles can be created more freely, and added to the ``self.road.vehicles`` list.
42 | 
43 | See :ref:`vehicle behaviors <vehicle_behavior>` for reference, and existing environments as examples.
44 | 
45 | Make the environment configurable
46 | ------------------------------------
47 | 
48 | To make a part of your environment configurable, overload the :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.default_config`
49 | method to define new ``{"config_key": value}`` pairs with default values. These configurations then be accessed in your
50 | environment implementation with ``self.config["config_key"]``, and once the environment is created, it can be configured with
51 | ``env.configure({"config_key": other_value})`` followed by ``env.reset()``.
52 | 
53 | Register the environment
54 | ---------------------------
55 | 
56 | In ``highway_env/envs/your_env.py``, add the following line:
57 | 
58 | .. code-block:: python
59 | 
60 |     register(
61 |         id='your-env-v0',
62 |         entry_point='highway_env.envs:YourEnv',
63 |     )
64 | 
65 | and import it from ``highway_env/envs/__init__.py``:
66 | 
67 | .. code-block:: python
68 | 
69 |     from highway_env.envs.your_env import *
70 | 
71 | 
72 | Profit
73 | --------
74 | That's it!
75 | You should now be able to run the environment:
76 | 
77 | .. code-block:: python
78 | 
79 |     import gym
80 |     import highway_env
81 | 
82 |     env = gym.make('your-env-v0')
83 |     obs = env.reset()
84 |     obs, reward, done, info = env.step(env.action_space.sample())
85 |     env.render()
86 | 
87 | API
88 | -------
89 | 
90 | 
91 | .. automodule:: highway_env.envs.common.abstract
92 |     :members:
93 |     :private-members:
94 | 
95 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/multi_agent.rst:
--------------------------------------------------------------------------------
  1 | .. multi_agent:
  2 | 
  3 | The Multi-Agent setting
  4 | ==========================
  5 | 
  6 | Most environments can be configured to a multi-agent version. Here is how:
  7 | 
  8 | Increase the number of controlled vehicles
  9 | ------------------------------------------
 10 | 
 11 | To that end, update the :ref:`environment configuration <Configuring an environment>` to increase ``controlled_vehicles``
 12 | 
 13 | .. jupyter-execute::
 14 | 
 15 |   import gym
 16 |   import highway_env
 17 | 
 18 |   env = gym.make('highway-v0')
 19 |   env.seed(0)
 20 | 
 21 |   env.configure({"controlled_vehicles": 2})  # Two controlled vehicles
 22 |   env.configure({"vehicles_count": 1})  # A single other vehicle, for the sake of visualisation
 23 |   env.reset()
 24 | 
 25 |   from matplotlib import pyplot as plt
 26 |   %matplotlib inline
 27 |   plt.imshow(env.render(mode="rgb_array"))
 28 |   plt.title("Controlled vehicles are in green")
 29 |   plt.show()
 30 | 
 31 | Change the action space
 32 | -----------------------
 33 | 
 34 | Right now, since the action space has not been changed, only the first vehicle is controlled by ``env.step(action)``.
 35 | In order for the environment to accept a tuple of actions, its action type must be set to :py:class:`~highway_env.envs.common.action.MultiAgentAction`
 36 | The type of actions contained in the tuple must be described by a standard :ref:`action configuration <actions>` in the ``action_config`` field.
 37 | 
 38 | .. jupyter-execute::
 39 | 
 40 |   env.configure({
 41 |     "action": {
 42 |       "type": "MultiAgentAction",
 43 |       "action_config": {
 44 |         "type": "DiscreteMetaAction",
 45 |       }
 46 |     }
 47 |   })
 48 |   env.reset()
 49 | 
 50 |   _, (ax1, ax2) = plt.subplots(nrows=2)
 51 |   ax1.imshow(env.render(mode="rgb_array"))
 52 |   ax1.set_title("Initial state")
 53 | 
 54 |   # Make the first vehicle change to the left lane, and the second one to the right
 55 |   action_1, action_2 = 0, 2  # See highway_env.envs.common.action.DiscreteMetaAction.ACTIONS_ALL
 56 |   env.step((action_1, action_2))
 57 | 
 58 |   ax2.imshow(env.render(mode="rgb_array"))
 59 |   ax2.set_title("After sending actions to each vehicle")
 60 |   plt.show()
 61 | 
 62 | 
 63 | Change the observation space
 64 | -----------------------------
 65 | 
 66 | In order to actually decide what ``action_1`` and ``action_2`` should be, both vehicles must generate their own observations.
 67 | As before, since the observation space has not been changed no far, the observation only includes that of the first vehicle.
 68 | 
 69 | In order for the environment to return a tuple of observations -- one for each agent --, its observation type must be set to :py:class:`~highway_env.envs.common.observation.MultiAgentObservation`
 70 | The type of observations contained in the tuple must be described by a standard :ref:`observation configuration <observations>` in the ``observation_config`` field.
 71 | 
 72 | .. jupyter-execute::
 73 | 
 74 |   env.configure({
 75 |     "observation": {
 76 |       "type": "MultiAgentObservation",
 77 |       "observation_config": {
 78 |         "type": "Kinematics",
 79 |       }
 80 |     }
 81 |   })
 82 |   obs = env.reset()
 83 | 
 84 |   import pprint
 85 |   pprint.pprint(obs)
 86 | 
 87 | Wrapping it up
 88 | --------------
 89 | 
 90 | Here is a pseudo-code example of how a centralized multi-agent policy could be trained:
 91 | 
 92 | .. jupyter-execute::
 93 | 
 94 |   # Multi-agent environment configuration
 95 |   env.configure({
 96 |     "controlled_vehicles": 2,
 97 |     "observation": {
 98 |       "type": "MultiAgentObservation",
 99 |       "observation_config": {
100 |         "type": "Kinematics",
101 |       }
102 |     },
103 |     "action": {
104 |       "type": "MultiAgentAction",
105 |       "action_config": {
106 |         "type": "DiscreteMetaAction",
107 |       }
108 |     }
109 |   })
110 | 
111 |   # Dummy RL algorithm
112 |   class Model:
113 |     """ Dummy code for an RL algorithm, which predicts an action from an observation,
114 |     and update its model from observed transitions."""
115 | 
116 |     def predict(self, obs):
117 |       return 0
118 | 
119 |     def update(self, obs, action, next_obs, reward, info, done):
120 |       pass
121 |   model = Model()
122 | 
123 |   # A training episode
124 |   obs = env.reset()
125 |   done = False
126 |   while not done:
127 |     # Dispatch the observations to the model to get the tuple of actions
128 |     action = tuple(model.predict(obs_i) for obs_i in obs)
129 |     # Execute the actions
130 |     next_obs, reward, info, done = env.step(action)
131 |     # Update the model with the transitions observed by each agent
132 |     for obs_i, action_i, next_obs_i in zip(obs, action, next_obs):
133 |       model.update(obs_i, action_i, next_obs_i, reward, info, done)
134 |     obs = next_obs
135 | 
136 | 
137 | For example, this is supported by `eleurent/rl-agents <https://github.com/eleurent/rl-agents>`_'s DQN implementation, and can be run with
138 | 
139 | 
140 | .. code-block:: bash
141 | 
142 |   cd <path/to/rl-agents/scripts>
143 |   python experiments.py evaluate configs/IntersectionEnv/env_multi_agent.json \
144 |                                  configs/IntersectionEnv/agents/DQNAgent/ego_attention_2h.json \
145 |                                  --train --episodes=3000
146 | 
147 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/intersection_multi_agent.gif
148 | 
149 |    Video of a multi-agent episode with the trained policy.
150 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
  1 | .. _quickstart:
  2 | 
  3 | ===============
  4 | Getting Started
  5 | ===============
  6 | 
  7 | Making an environment
  8 | ------------------------
  9 | 
 10 | Here is a quick example of how to create an environment:
 11 | 
 12 | .. jupyter-execute::
 13 | 
 14 |   import gym
 15 |   import highway_env
 16 |   from matplotlib import pyplot as plt
 17 |   %matplotlib inline
 18 | 
 19 |   env = gym.make('highway-v0')
 20 |   env.reset()
 21 |   for _ in range(3):
 22 |       action = env.action_type.actions_indexes["IDLE"]
 23 |       obs, reward, done, info = env.step(action)
 24 |       env.render()
 25 | 
 26 |   plt.imshow(env.render(mode="rgb_array"))
 27 |   plt.show()
 28 | 
 29 | All the environments
 30 | ~~~~~~~~~~~~~~~~~~~~
 31 | Here is the list of all the environments available and their descriptions:
 32 | 
 33 | .. toctree::
 34 |   :maxdepth: 1
 35 | 
 36 |   environments/highway
 37 |   environments/merge
 38 |   environments/roundabout
 39 |   environments/parking
 40 |   environments/intersection
 41 |   environments/racetrack
 42 | 
 43 | .. _configuration:
 44 | 
 45 | Configuring an environment
 46 | ---------------------------
 47 | 
 48 | The :ref:`observations <observations>`, :ref:`actions <actions>`, :ref:`dynamics <dynamics>` and :ref:`rewards <rewards>`
 49 | of an environment are parametrized by a configuration, defined as a
 50 | :py:attr:`~highway_env.envs.common.abstract.AbstractEnv.config` dictionary.
 51 | After environment creation, the configuration can be accessed using the
 52 | :py:attr:`~highway_env.envs.common.abstract.AbstractEnv.config` attribute.
 53 | 
 54 | .. jupyter-execute::
 55 | 
 56 |   import pprint
 57 | 
 58 |   env = gym.make("highway-v0")
 59 |   pprint.pprint(env.config)
 60 | 
 61 | For example, the number of lanes can be changed with:
 62 | 
 63 | .. jupyter-execute::
 64 | 
 65 |   env.config["lanes_count"] = 2
 66 |   env.reset()
 67 |   plt.imshow(env.render(mode="rgb_array"))
 68 |   plt.show()
 69 | 
 70 | .. note::
 71 | 
 72 |     The environment must be :py:meth:`~highway_env.envs.common.abstract.AbstractEnv.reset` for the change of configuration
 73 |     to be effective.
 74 | 
 75 | 
 76 | Training an agent
 77 | -------------------
 78 | 
 79 | Reinforcement Learning agents can be trained using libraries such as `eleurent/rl-agents <https://github.com/eleurent/rl-agents>`_,
 80 | `openai/baselines <https://github.com/openai/baselines>`_ or `Stable Baselines3 <https://github.com/DLR-RM/stable-baselines3>`_.
 81 | 
 82 | Here is an example of SB3's DQN implementation trained on ``highway-fast-v0`` with its default kinematics observation and an MLP model.
 83 | 
 84 | .. |highway_dqn|  image:: https://colab.research.google.com/assets/colab-badge.svg
 85 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/sb3_highway_dqn.ipynb
 86 | 
 87 | |highway_dqn|
 88 | 
 89 | .. code-block:: python
 90 | 
 91 |   import gym
 92 |   import highway_env
 93 |   from stable_baselines3 import DQN
 94 | 
 95 |   env = gym.make("highway-fast-v0")
 96 |   model = DQN('MlpPolicy', env,
 97 |                 policy_kwargs=dict(net_arch=[256, 256]),
 98 |                 learning_rate=5e-4,
 99 |                 buffer_size=15000,
100 |                 learning_starts=200,
101 |                 batch_size=32,
102 |                 gamma=0.8,
103 |                 train_freq=1,
104 |                 gradient_steps=1,
105 |                 target_update_interval=50,
106 |                 verbose=1,
107 |                 tensorboard_log="highway_dqn/")
108 |   model.learn(int(2e4))
109 |   model.save("highway_dqn/model")
110 | 
111 |   # Load and test saved model
112 |   model = DQN.load("highway_dqn/model")
113 |   while True:
114 |     done = False
115 |     obs = env.reset()
116 |     while not done:
117 |       action, _states = model.predict(obs, deterministic=True)
118 |       obs, reward, done, info = env.step(action)
119 |       env.render()
120 | 
121 | A full run takes about 25mn on my laptop (fps=14). The following results are obtained:
122 | 
123 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway_fast_dqn.png
124 | 
125 |    Training curves, for 5 random seeds.
126 | 
127 | .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/highway_fast_dqn.gif
128 | 
129 |    Video of an episode run with the trained policy.
130 | 
131 | .. note::
132 | 
133 |     There are several ways to get better performances. For instance, `SB3 provides only vanilla Deep Q-Learning and has no extensions such as Double-DQN, Dueling-DQN and Prioritized Experience Replay <https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#notes>`_.
134 |     However, `eleurent/rl-agents <https://github.com/eleurent/rl-agents>`_'s implementation of DQN does provide those extensions, which yields better results. Improvements can also be obtained by changing the observation type or the model, see the :ref:`FAQ <faq>`.
135 | 
136 | 
137 | Examples on Google Colab
138 | -------------------------
139 | 
140 | Several scripts and notebooks to train driving policies on `highway-env` are available `on this page <https://github.com/eleurent/highway-env/tree/master/scripts>`_.
141 | Here are a few of them:
142 | 
143 | .. |highway_dqn_cnn|  image:: https://colab.research.google.com/assets/colab-badge.svg
144 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/sb3_highway_dqn_cnn.ipynb
145 | .. |planning_hw|  image:: https://colab.research.google.com/assets/colab-badge.svg
146 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/highway_planning.ipynb
147 | .. |parking_mb|  image:: https://colab.research.google.com/assets/colab-badge.svg
148 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/parking_model_based.ipynb
149 | .. |parking_her|  image:: https://colab.research.google.com/assets/colab-badge.svg
150 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/parking_her.ipynb
151 | .. |dqn_social|  image:: https://colab.research.google.com/assets/colab-badge.svg
152 |    :target: https://colab.research.google.com/github/eleurent/highway-env/blob/master/scripts/intersection_social_dqn.ipynb
153 | 
154 | - | Highway with image observations and a CNN model |highway_dqn_cnn|
155 |   | Train SB3's DQN on `highway-fast-v0` , but using :ref:`image observations <Grayscale Image>` and a CNN model for the value function.
156 | - | Trajectory Planning on Highway |planning_hw|
157 |   | Plan a trajectory on `highway-v0` using the `OPD` :cite:`Hren2008` implementation from `eleurent/rl-agents <https://github.com/eleurent/rl-agents>`_.
158 | - | A Model-based Reinforcement Learning tutorial on Parking |parking_mb|
159 |   | A tutorial written for `RLSS 2019 <https://rlss.inria.fr/>`_ and demonstrating the principle of model-based reinforcement learning on the `parking-v0` task.
160 | - | Parking with Hindsight Experience Replay |parking_her|
161 |   | Train a goal-conditioned `parking-v0` policy using the `HER` :cite:`Andrychowicz2017` implementation from `stable-baselines <https://github.com/hill-a/stable-baselines>`_.
162 | - | Intersection with DQN and social attention |dqn_social|
163 |   | Train an `intersection-v0` crossing policy using the social attention architecture :cite:`Leurent2019social` and the DQN implementation from `eleurent/rl-agents <https://github.com/eleurent/rl-agents>`_.


--------------------------------------------------------------------------------
/highway_modify/docs/source/rewards/index.rst:
--------------------------------------------------------------------------------
 1 | .. _rewards:
 2 | 
 3 | Rewards
 4 | ############
 5 | 
 6 | The reward function is defined in the :py:meth:`~highway_env.envs.common.abstract.AbstractEnv._reward` method, overloaded in every environment.
 7 | 
 8 | .. note::
 9 |     The choice of an appropriate reward function that yields realistic optimal driving behaviour is a challenging problem, that we do not address in this project.
10 |     In particular, we do not wish to specify every single aspect of the expected driving behaviour inside the reward function, such as keeping a safe distance to the front vehicle.
11 |     Instead, we would rather only specify a reward function as simple and straightforward as possible in order to see adequate behaviour emerge from learning.
12 |     In this perspective, keeping a safe distance is optimal not for being directly rewarded but for robustness against the uncertain behaviour of the leading vehicle, which could brake at any time.
13 | 
14 | Most environments
15 | -----------------
16 | 
17 | We generally focus on two features: a vehicle should
18 | 
19 | - progress quickly on the road;
20 | - avoid collisions.
21 | 
22 | Thus, the reward function is often composed of a velocity term and a collision term:
23 | 
24 | .. math::
25 |     R(s,a) = a\frac{v - v_\min}{v_\max - v_\min} - b\,\text{collision}
26 | 
27 | where :math:`v,\,v_\min,\,v_\max` are the current, minimum and maximum speed of the ego-vehicle respectively, and :math:`a,\,b` are two coefficients.
28 | 
29 | 
30 | .. note::
31 |     Since the rewards must be bounded, and the optimal policy is invariant by scaling and shifting rewards, we choose to normalize them in the :math:`[0, 1]` range, by convention.
32 |     Normalizing rewards has also been observed to be practically beneficial in deep reinforcement learning :cite:`Mnih2015`.
33 |     Note that we forbid negative rewards, since they may encourage the agent to prefer terminating an episode early (by causing a collision) rather than risking suffering a negative return if no satisfying trajectory can be found.
34 | 
35 | In some environments, the weight of the collision penalty can be configured through the `collision_penalty` parameter.
36 | 
37 | Goal environments
38 | -----------------
39 | 
40 | In the :ref:`Parking <environments_parking>` environment, however, the reward function must also specify the desired goal destination.
41 | Thus, the velocity term is replaced by a weighted p-norm between the agent state and the goal state.
42 | 
43 | 
44 | .. math::
45 |     R(s,a) = -\| s - s_g \|_{W,p}^p - b\,\text{collision}
46 | 
47 | where :math:`s = [x, y, v_x, v_y, \cos\psi, \sin\psi]`, :math:`s_g = [x_g, y_g, 0, 0, \cos\psi_g, \sin\psi_g]`, and
48 | :math:`\|x\|_{W,p} = (\sum_i |W_i x_i|^p)^{1/p}`. We use a p-norm rather than an Euclidean norm in order to have a narrower spike of rewards at the goal.
49 | 


--------------------------------------------------------------------------------
/highway_modify/docs/source/user_guide.rst:
--------------------------------------------------------------------------------
 1 | .. _user_guide:
 2 | 
 3 | User Guide
 4 | ============
 5 | 
 6 | .. toctree::
 7 |   :maxdepth: 2
 8 | 
 9 |   observations/index
10 |   actions/index
11 |   dynamics/index
12 |   rewards/index
13 |   graphics/index
14 |   multi_agent
15 |   make_your_own


--------------------------------------------------------------------------------
/highway_modify/highway_env/__init__.py:
--------------------------------------------------------------------------------
1 | # Hide pygame support prompt
2 | import os
3 | os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
4 | # Import the envs module so that envs register themselves
5 | import highway_env.envs
6 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from highway_env.envs.highway_env import *
 2 | from highway_env.envs.merge_env import *
 3 | from highway_env.envs.parking_env import *
 4 | from highway_env.envs.summon_env import *
 5 | from highway_env.envs.roundabout_line4_env import *
 6 | from highway_env.envs.roundabout_line2_env import *
 7 | from highway_env.envs.two_way_env import *
 8 | from highway_env.envs.intersection_env import *
 9 | from highway_env.envs.lane_keeping_env import *
10 | from highway_env.envs.u_turn_env import *
11 | from highway_env.envs.exit_env import *
12 | from highway_env.envs.racetrack_env import *
13 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/envs/common/__init__.py


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/exit_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import Tuple
  3 | from gym.envs.registration import register
  4 | 
  5 | from highway_env import utils
  6 | from highway_env.envs import HighwayEnv, CircularLane, Vehicle
  7 | from highway_env.envs.common.action import Action
  8 | from highway_env.road.road import Road, RoadNetwork
  9 | from highway_env.vehicle.controller import ControlledVehicle
 10 | 
 11 | 
 12 | class ExitEnv(HighwayEnv):
 13 |     """
 14 |     """
 15 |     @classmethod
 16 |     def default_config(cls) -> dict:
 17 |         config = super().default_config()
 18 |         config.update({
 19 |             "observation": {
 20 |                 "type": "ExitObservation",
 21 |                 "vehicles_count": 15,
 22 |                 "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
 23 |                 "clip": False
 24 |             },
 25 |             "action": {
 26 |                 "type": "DiscreteMetaAction",
 27 |                 "target_speeds": [18, 24, 30]
 28 |             },
 29 |             "lanes_count": 6,
 30 |             "collision_reward": 0,
 31 |             "high_speed_reward": 0.1,
 32 |             "right_lane_reward": 0,
 33 |             "goal_reward": 1,
 34 |             "vehicles_count": 20,
 35 |             "vehicles_density": 1.5,
 36 |             "controlled_vehicles": 1,
 37 |             "duration": 18,  # [s],
 38 |             "simulation_frequency": 5,
 39 |             "scaling": 5
 40 |         })
 41 |         return config
 42 | 
 43 |     def _reset(self) -> None:
 44 |         self._create_road()
 45 |         self._create_vehicles()
 46 | 
 47 |     def step(self, action) -> Tuple[np.ndarray, float, bool, dict]:
 48 |         obs, reward, terminal, info = super().step(action)
 49 |         info.update({"is_success": self._is_success()})
 50 |         return obs, reward, terminal, info
 51 | 
 52 |     def _create_road(self, road_length=1000, exit_position=400, exit_length=100) -> None:
 53 |         net = RoadNetwork.straight_road_network(self.config["lanes_count"], start=0,
 54 |                                                 length=exit_position, nodes_str=("0", "1"))
 55 |         net = RoadNetwork.straight_road_network(self.config["lanes_count"] + 1, start=exit_position,
 56 |                                                 length=exit_length, nodes_str=("1", "2"), net=net)
 57 |         net = RoadNetwork.straight_road_network(self.config["lanes_count"], start=exit_position+exit_length,
 58 |                                                 length=road_length-exit_position-exit_length,
 59 |                                                 nodes_str=("2", "3"), net=net)
 60 |         for _from in net.graph:
 61 |             for _to in net.graph[_from]:
 62 |                 for _id in range(len(net.graph[_from][_to])):
 63 |                     net.get_lane((_from, _to, _id)).speed_limit = 26 - 3.4 * _id
 64 |         exit_position = np.array([exit_position + exit_length, self.config["lanes_count"] * CircularLane.DEFAULT_WIDTH])
 65 |         radius = 150
 66 |         exit_center = exit_position + np.array([0, radius])
 67 |         lane = CircularLane(center=exit_center,
 68 |                             radius=radius,
 69 |                             start_phase=3*np.pi/2,
 70 |                             end_phase=2*np.pi,
 71 |                             forbidden=True)
 72 |         net.add_lane("2", "exit", lane)
 73 | 
 74 |         self.road = Road(network=net,
 75 |                          np_random=self.np_random,
 76 |                          record_history=self.config["show_trajectories"])
 77 | 
 78 |     def _create_vehicles(self) -> None:
 79 |         """Create some new random vehicles of a given type, and add them on the road."""
 80 |         self.controlled_vehicles = []
 81 |         for _ in range(self.config["controlled_vehicles"]):
 82 |             vehicle = Vehicle.create_random(self.road,
 83 |                                             speed=25,
 84 |                                             lane_from="0",
 85 |                                             lane_to="1",
 86 |                                             lane_id=0,
 87 |                                             spacing=self.config["ego_spacing"])
 88 |             vehicle = self.action_type.vehicle_class(self.road, vehicle.position, vehicle.heading, vehicle.speed)
 89 |             self.controlled_vehicles.append(vehicle)
 90 |             self.road.vehicles.append(vehicle)
 91 | 
 92 |         vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])
 93 |         for _ in range(self.config["vehicles_count"]):
 94 |             lanes = np.arange(self.config["lanes_count"])
 95 |             lane_id = self.road.np_random.choice(lanes, size=1,
 96 |                                                  p=lanes / lanes.sum()).astype(int)[0]
 97 |             lane = self.road.network.get_lane(("0", "1", lane_id))
 98 |             vehicle = vehicles_type.create_random(self.road,
 99 |                                                   lane_from="0",
100 |                                                   lane_to="1",
101 |                                                   lane_id=lane_id,
102 |                                                   speed=lane.speed_limit,
103 |                                                   spacing=1 / self.config["vehicles_density"],
104 |                                                   ).plan_route_to("3")
105 |             vehicle.enable_lane_change = False
106 |             self.road.vehicles.append(vehicle)
107 | 
108 |     def _reward(self, action: Action) -> float:
109 |         """
110 |         The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
111 |         :param action: the last action performed
112 |         :return: the corresponding reward
113 |         """
114 |         lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \
115 |             else self.vehicle.lane_index
116 |         scaled_speed = utils.lmap(self.vehicle.speed, self.config["reward_speed_range"], [0, 1])
117 |         reward = self.config["collision_reward"] * self.vehicle.crashed \
118 |                  + self.config["goal_reward"] * self._is_success() \
119 |                  + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1) \
120 |                  + self.config["right_lane_reward"] * lane_index[-1]
121 | 
122 |         reward = utils.lmap(reward,
123 |                           [self.config["collision_reward"], self.config["goal_reward"]],
124 |                           [0, 1])
125 |         reward = np.clip(reward, 0, 1)
126 |         return reward
127 | 
128 |     def _is_success(self):
129 |         lane_index = self.vehicle.target_lane_index if isinstance(self.vehicle, ControlledVehicle) \
130 |             else self.vehicle.lane_index
131 |         goal_reached = lane_index == ("1", "2", self.config["lanes_count"]) or lane_index == ("2", "exit", 0)
132 |         return goal_reached
133 | 
134 |     def _is_terminal(self) -> bool:
135 |         """The episode is over if the ego vehicle crashed or the time is out."""
136 |         return self.vehicle.crashed or self.steps >= self.config["duration"]
137 | 
138 | 
139 | # class DenseLidarExitEnv(DenseExitEnv):
140 | #     @classmethod
141 | #     def default_config(cls) -> dict:
142 | #         return dict(super().default_config(),
143 | #                     observation=dict(type="LidarObservation"))
144 | 
145 | 
146 | 
147 | 
148 | register(
149 |     id='exit-v0',
150 |     entry_point='highway_env.envs:ExitEnv',
151 | )
152 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/highway_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from gym.envs.registration import register
  3 | 
  4 | from highway_env import utils
  5 | from highway_env.envs.common.abstract import AbstractEnv
  6 | from highway_env.envs.common.action import Action
  7 | from highway_env.road.road import Road, RoadNetwork
  8 | from highway_env.utils import near_split
  9 | from highway_env.vehicle.controller import ControlledVehicle
 10 | from highway_env.vehicle.kinematics import Vehicle
 11 | 
 12 | 
 13 | class HighwayEnv(AbstractEnv):
 14 |     """
 15 |     A highway driving environment.
 16 | 
 17 |     The vehicle is driving on a straight highway with several lanes, and is rewarded for reaching a high speed,
 18 |     staying on the rightmost lanes and avoiding collisions.
 19 |     """
 20 | 
 21 |     @classmethod
 22 |     def default_config(cls) -> dict:
 23 |         config = super().default_config()
 24 |         config.update({
 25 |             "observation": {
 26 |                 "type": "Kinematics",
 27 |                 "absolute": True,
 28 |                 "features_range": {"x": [150, 700.], "y":[-12.,12.], "vx":[-80.,80.], "vy":[-80.,80.]},
 29 |             },
 30 |             "finish_position": [650.0, 8.0],
 31 |             "action": {
 32 |                 "type": "DiscreteMetaAction",
 33 |                 "absolute": True,
 34 |             },
 35 |             "lanes_count": 4,
 36 |             "vehicles_speed": 35,
 37 |             "vehicles_count": 50,
 38 |             "controlled_vehicles": 1,
 39 |             "initial_lane_id": None,
 40 |             "duration": 40,  # [s]
 41 |             "ego_spacing": 2,
 42 |             "vehicles_density": 1,
 43 |             "collision_reward": -1,    # The reward received when colliding with a vehicle.
 44 |             "right_lane_reward": 0.1,  # The reward received when driving on the right-most lanes, linearly mapped to
 45 |                                        # zero for other lanes.
 46 |             "high_speed_reward": 0.4,  # The reward received when driving at full speed, linearly mapped to zero for
 47 |                                        # lower speeds according to config["reward_speed_range"].
 48 |             "lane_change_reward": 0,   # The reward received at each lane change action.
 49 |             "reward_speed_range": [20, 30],
 50 |             "offroad_terminal": True
 51 |         })
 52 |         return config
 53 | 
 54 |     def _reset(self) -> None:
 55 |         self._create_road()
 56 |         self._create_vehicles()
 57 | 
 58 |     def _create_road(self) -> None:
 59 |         """Create a road composed of straight adjacent lanes."""
 60 |         self.road = Road(network=RoadNetwork.straight_road_network(self.config["lanes_count"], speed_limit=25),
 61 |                          np_random=self.np_random, record_history=self.config["show_trajectories"])
 62 | 
 63 |     def _create_vehicles(self) -> None:
 64 |         """Create some new random vehicles of a given type, and add them on the road."""
 65 |         other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])
 66 |         other_per_controlled = near_split(self.config["vehicles_count"], num_bins=self.config["controlled_vehicles"])
 67 | 
 68 |         self.controlled_vehicles = []
 69 |         for others in other_per_controlled:
 70 |             vehicle = Vehicle.create_random(
 71 |                 self.road,
 72 |                 speed=self.config['vehicles_speed'],
 73 |                 lane_id=self.config["initial_lane_id"],
 74 |                 spacing=self.config["ego_spacing"]
 75 |             )
 76 |             vehicle = self.action_type.vehicle_class(self.road, vehicle.position, vehicle.heading, vehicle.speed)
 77 |             self.controlled_vehicles.append(vehicle)
 78 |             self.road.vehicles.append(vehicle)
 79 | 
 80 |             for _ in range(others):
 81 |                 vehicle = other_vehicles_type.create_random(self.road, spacing=1 / self.config["vehicles_density"])
 82 |                 vehicle.randomize_behavior()
 83 |                 self.road.vehicles.append(vehicle)
 84 | 
 85 |     def _reward(self, action: Action) -> float:
 86 |         """
 87 |         The reward is defined to foster driving at high speed, on the rightmost lanes, and to avoid collisions.
 88 |         :param action: the last action performed
 89 |         :return: the corresponding reward
 90 |         """
 91 |         neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index)
 92 |         lane = self.vehicle.target_lane_index[2] if isinstance(self.vehicle, ControlledVehicle) \
 93 |             else self.vehicle.lane_index[2]
 94 |         # Use forward speed rather than speed, see https://github.com/eleurent/highway-env/issues/268
 95 |         forward_speed = self.vehicle.speed * np.cos(self.vehicle.heading)
 96 |         scaled_speed = utils.lmap(forward_speed, self.config["reward_speed_range"], [0, 1])
 97 |         reward = \
 98 |             + self.config["collision_reward"] * self.vehicle.crashed \
 99 |             + self.config["right_lane_reward"] * lane / max(len(neighbours) - 1, 1) \
100 |             + self.config["high_speed_reward"] * np.clip(scaled_speed, 0, 1)
101 |         reward = utils.lmap(reward,
102 |                           [self.config["collision_reward"],
103 |                            self.config["high_speed_reward"] + self.config["right_lane_reward"]],
104 |                           [0, 1])
105 |         reward = 0 if not self.vehicle.on_road else reward
106 |         return reward
107 | 
108 |     def _is_terminal(self) -> bool:
109 |         """The episode is over if the ego vehicle crashed or the time is out."""
110 |         return self.vehicle.crashed or \
111 |             self.steps >= self.config["duration"] or \
112 |             (self.config["offroad_terminal"] and not self.vehicle.on_road)
113 | 
114 |     def _cost(self, action: int) -> float:
115 |         """The cost signal is the occurrence of collision."""
116 |         return float(self.vehicle.crashed)
117 | 
118 | 
119 | class HighwayEnvFast(HighwayEnv):
120 |     """
121 |     A variant of highway-v0 with faster execution:
122 |         - lower simulation frequency
123 |         - fewer vehicles in the scene (and fewer lanes, shorter episode duration)
124 |         - only check collision of controlled vehicles with others
125 |     """
126 |     @classmethod
127 |     def default_config(cls) -> dict:
128 |         cfg = super().default_config()
129 |         cfg.update({
130 |             "simulation_frequency": 5,
131 |             "lanes_count": 3,
132 |             "vehicles_count": 20,
133 |             "duration": 15,  # [s]
134 |             "ego_spacing": 1.5,
135 |         })
136 |         return cfg
137 | 
138 |     def _create_vehicles(self) -> None:
139 |         super()._create_vehicles()
140 |         # Disable collision check for uncontrolled vehicles
141 |         for vehicle in self.road.vehicles:
142 |             if vehicle not in self.controlled_vehicles:
143 |                 vehicle.check_collisions = False
144 |     def _legal_terminal(self) -> bool:
145 |        if self.steps >= self.config["duration"] * self.config["policy_frequency"]:
146 |            return True
147 |        else :
148 |            return False
149 | 
150 | register(
151 |     id='highway-v0',
152 |     entry_point='highway_env.envs:HighwayEnv',
153 | )
154 | 
155 | register(
156 |     id='highway-fast-v0',
157 |     entry_point='highway_env.envs:HighwayEnvFast',
158 | )
159 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/lane_keeping_env.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, print_function, absolute_import
  2 | 
  3 | import copy
  4 | from typing import Tuple
  5 | 
  6 | import numpy as np
  7 | from gym.envs.registration import register
  8 | 
  9 | from highway_env.envs.common.abstract import AbstractEnv
 10 | from highway_env.road.lane import LineType, SineLane, StraightLane
 11 | from highway_env.road.road import Road, RoadNetwork
 12 | from highway_env.vehicle.dynamics import BicycleVehicle
 13 | 
 14 | 
 15 | class LaneKeepingEnv(AbstractEnv):
 16 | 
 17 |     """A lane keeping control task."""
 18 | 
 19 |     def __init__(self, config: dict = None) -> None:
 20 |         super().__init__(config)
 21 |         self.lane = None
 22 |         self.lanes = []
 23 |         self.trajectory = []
 24 |         self.interval_trajectory = []
 25 |         self.lpv = None
 26 | 
 27 |     @classmethod
 28 |     def default_config(cls) -> dict:
 29 |         config = super().default_config()
 30 |         config.update({
 31 |             "observation": {
 32 |                 "type": "AttributesObservation",
 33 |                 "attributes": ["state", "derivative", "reference_state"]
 34 |             },
 35 |             "action": {
 36 |                 "type": "ContinuousAction",
 37 |                 "steering_range": [-np.pi / 3, np.pi / 3],
 38 |                 "longitudinal": False,
 39 |                 "lateral": True,
 40 |                 "dynamical": True
 41 |             },
 42 |             "simulation_frequency": 10,
 43 |             "policy_frequency": 10,
 44 |             "state_noise": 0.05,
 45 |             "derivative_noise": 0.05,
 46 |             "screen_width": 600,
 47 |             "screen_height": 250,
 48 |             "scaling": 7,
 49 |             "centering_position": [0.4, 0.5]
 50 |         })
 51 |         return config
 52 | 
 53 |     def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
 54 |         if self.lanes and not self.lane.on_lane(self.vehicle.position):
 55 |             self.lane = self.lanes.pop(0)
 56 |         self.store_data()
 57 |         if self.lpv:
 58 |             self.lpv.set_control(control=action.squeeze(-1),
 59 |                                  state=self.vehicle.state[[1, 2, 4, 5]])
 60 |             self.lpv.step(1 / self.config["simulation_frequency"])
 61 | 
 62 |         self.action_type.act(action)
 63 |         obs = self.observation_type.observe()
 64 |         self._simulate()
 65 | 
 66 |         info = {}
 67 |         reward = self._reward(action)
 68 |         terminal = self._is_terminal()
 69 |         return obs, reward, terminal, info
 70 | 
 71 |     def _reward(self, action: np.ndarray) -> float:
 72 |         _, lat = self.lane.local_coordinates(self.vehicle.position)
 73 |         return 1 - (lat/self.lane.width)**2
 74 | 
 75 |     def _is_terminal(self) -> bool:
 76 |         return False  # not self.lane.on_lane(self.vehicle.position)
 77 | 
 78 |     def _reset(self) -> None:
 79 |         self._make_road()
 80 |         self._make_vehicles()
 81 | 
 82 |     def _make_road(self) -> None:
 83 |         net = RoadNetwork()
 84 |         lane = SineLane([0, 0], [500, 0], amplitude=5, pulsation=2*np.pi / 100, phase=0,
 85 |                         width=10, line_types=[LineType.STRIPED, LineType.STRIPED])
 86 |         net.add_lane("a", "b", lane)
 87 |         other_lane = StraightLane([50, 50], [115, 15],
 88 |                                   line_types=(LineType.STRIPED, LineType.STRIPED), width=10)
 89 |         net.add_lane("c", "d", other_lane)
 90 |         self.lanes = [other_lane, lane]
 91 |         self.lane = self.lanes.pop(0)
 92 |         net.add_lane("d", "a", StraightLane([115, 15], [115+20, 15+20*(15-50)/(115-50)],
 93 |                                             line_types=(LineType.NONE, LineType.STRIPED), width=10))
 94 |         road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"])
 95 |         self.road = road
 96 | 
 97 |     def _make_vehicles(self) -> None:
 98 |         road = self.road
 99 |         ego_vehicle = self.action_type.vehicle_class(
100 |             road, road.network.get_lane(("c", "d", 0)).position(50, -4),
101 |             heading=road.network.get_lane(("c", "d", 0)).heading_at(0),
102 |             speed=8.3)
103 |         road.vehicles.append(ego_vehicle)
104 |         self.vehicle = ego_vehicle
105 | 
106 |     @property
107 |     def dynamics(self) -> BicycleVehicle:
108 |         return self.vehicle
109 | 
110 |     @property
111 |     def state(self) -> np.ndarray:
112 |         if not self.vehicle:
113 |             return np.zeros((4, 1))
114 |         return self.vehicle.state[[1, 2, 4, 5]] + \
115 |                self.np_random.uniform(low=-self.config["state_noise"],
116 |                                       high=self.config["state_noise"],
117 |                                       size=self.vehicle.state[[0, 2, 4, 5]].shape)
118 | 
119 |     @property
120 |     def derivative(self) -> np.ndarray:
121 |         if not self.vehicle:
122 |             return np.zeros((4, 1))
123 |         return self.vehicle.derivative[[1, 2, 4, 5]] + \
124 |                self.np_random.uniform(low=-self.config["derivative_noise"],
125 |                                       high=self.config["derivative_noise"],
126 |                                       size=self.vehicle.derivative[[0, 2, 4, 5]].shape)
127 | 
128 |     @property
129 |     def reference_state(self) -> np.ndarray:
130 |         if not self.vehicle or not self.lane:
131 |             return np.zeros((4, 1))
132 |         longi, lat = self.lane.local_coordinates(self.vehicle.position)
133 |         psi_l = self.lane.heading_at(longi)
134 |         state = self.vehicle.state[[1, 2, 4, 5]]
135 |         return np.array([[state[0, 0] - lat], [psi_l], [0], [0]])
136 | 
137 |     def store_data(self) -> None:
138 |         if self.lpv:
139 |             state = self.vehicle.state.copy()
140 |             interval = []
141 |             for x_t in self.lpv.change_coordinates(self.lpv.x_i_t, back=True, interval=True):
142 |                 # lateral state to full state
143 |                 np.put(state, [1, 2, 4, 5], x_t)
144 |                 # full state to absolute coordinates
145 |                 interval.append(state.squeeze(-1).copy())
146 |             self.interval_trajectory.append(interval)
147 |         self.trajectory.append(copy.deepcopy(self.vehicle.state))
148 | 
149 | 
150 | register(
151 |     id='lane-keeping-v0',
152 |     entry_point='highway_env.envs:LaneKeepingEnv',
153 |     max_episode_steps=200
154 | )
155 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/merge_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from gym.envs.registration import register
  3 | 
  4 | from highway_env import utils
  5 | from highway_env.envs.common.abstract import AbstractEnv
  6 | from highway_env.road.lane import LineType, StraightLane, SineLane
  7 | from highway_env.road.road import Road, RoadNetwork
  8 | from highway_env.vehicle.controller import ControlledVehicle
  9 | from highway_env.vehicle.objects import Obstacle
 10 | 
 11 | 
 12 | class MergeEnv(AbstractEnv):
 13 | 
 14 |     """
 15 |     A highway merge negotiation environment.
 16 | 
 17 |     The ego-vehicle is driving on a highway and approached a merge, with some vehicles incoming on the access ramp.
 18 |     It is rewarded for maintaining a high speed and avoiding collisions, but also making room for merging
 19 |     vehicles.
 20 |     """
 21 | 
 22 |     @classmethod
 23 |     def default_config(cls) -> dict:
 24 |         cfg = super().default_config()
 25 |         cfg.update({
 26 |             "observation": {
 27 |                 "type": "Kinematics",
 28 |                 "absolute": True,
 29 |                 "features_range": {"x": [-500., 500.], "y":[-8.,8.], "vx":[-80.,80.], "vy":[-80.,80.]},
 30 |             },
 31 |             "finish_position": [400.0, 4.0],
 32 |             "collision_reward": -1,
 33 |             "right_lane_reward": 0.1,
 34 |             "high_speed_reward": 0.2,
 35 |             "merging_speed_reward": -0.5,
 36 |             "lane_change_reward": -0.05,
 37 |             "simulation_frequency": 5,
 38 |             "vehicles_count": 20,
 39 |             "duration": 12,  # [s]
 40 |             "ego_spacing": 1.5,
 41 |         })
 42 |         return cfg
 43 | 
 44 |     def _reward(self, action: int) -> float:
 45 |         """
 46 |         The vehicle is rewarded for driving with high speed on lanes to the right and avoiding collisions
 47 | 
 48 |         But an additional altruistic penalty is also suffered if any vehicle on the merging lane has a low speed.
 49 | 
 50 |         :param action: the action performed
 51 |         :return: the reward of the state-action transition
 52 |         """
 53 |         action_reward = {0: self.config["lane_change_reward"],
 54 |                          1: 0,
 55 |                          2: self.config["lane_change_reward"],
 56 |                          3: 0,
 57 |                          4: 0 }
 58 | 
 59 |         forward_speed = self.vehicle.speed * np.cos(self.vehicle.heading)
 60 |         scaled_speed = utils.lmap(forward_speed, [20,30], [0, 1])
 61 |         reward = self.config["collision_reward"] * self.vehicle.crashed \
 62 |             + self.config["right_lane_reward"] * self.vehicle.lane_index[2] / 1 \
 63 |             + self.config["high_speed_reward"] * scaled_speed
 64 | 
 65 |         # Altruistic penalty
 66 |         for vehicle in self.road.vehicles:
 67 |             if vehicle.lane_index == ("b", "c", 2) and isinstance(vehicle, ControlledVehicle):
 68 |                 reward += self.config["merging_speed_reward"] * \
 69 |                           (vehicle.target_speed - vehicle.speed) / vehicle.target_speed
 70 | 
 71 |         return utils.lmap(reward,
 72 |                           [self.config["collision_reward"] + self.config["merging_speed_reward"],
 73 |                            self.config["high_speed_reward"] + self.config["right_lane_reward"]],
 74 |                           [0, 1])
 75 | 
 76 |     def _is_terminal(self) -> bool:
 77 |         """The episode is over when a collision occurs or when the access ramp has been passed."""
 78 |         return self.vehicle.crashed or self.vehicle.position[0] > 370 or not self.vehicle.on_road or self.steps >= self.config["duration"]
 79 | 
 80 |     def _reset(self) -> None:
 81 |         self._make_road()
 82 |         self._make_vehicles()
 83 | 
 84 |     def _make_road(self) -> None:
 85 |         """
 86 |         Make a road composed of a straight highway and a merging lane.
 87 | 
 88 |         :return: the road
 89 |         """
 90 |         net = RoadNetwork()
 91 | 
 92 |         # Highway lanes
 93 |         ends = [150, 80, 80, 150]  # Before, converging, merge, after
 94 |         c, s, n = LineType.CONTINUOUS_LINE, LineType.STRIPED, LineType.NONE
 95 |         y = [0, StraightLane.DEFAULT_WIDTH]
 96 |         line_type = [[c, s], [n, c]]
 97 |         line_type_merge = [[c, s], [n, s]]
 98 |         for i in range(2):
 99 |             net.add_lane("a", "b", StraightLane([0, y[i]], [sum(ends[:2]), y[i]], line_types=line_type[i]))
100 |             net.add_lane("b", "c", StraightLane([sum(ends[:2]), y[i]], [sum(ends[:3]), y[i]], line_types=line_type_merge[i]))
101 |             net.add_lane("c", "d", StraightLane([sum(ends[:3]), y[i]], [sum(ends), y[i]], line_types=line_type[i]))
102 | 
103 |         # Merging lane
104 |         amplitude = 3.25
105 |         ljk = StraightLane([0, 6.5 + 4 + 4], [ends[0], 6.5 + 4 + 4], line_types=[c, c], forbidden=True)
106 |         lkb = SineLane(ljk.position(ends[0], -amplitude), ljk.position(sum(ends[:2]), -amplitude),
107 |                        amplitude, 2 * np.pi / (2*ends[1]), np.pi / 2, line_types=[c, c], forbidden=True)
108 |         lbc = StraightLane(lkb.position(ends[1], 0), lkb.position(ends[1], 0) + [ends[2], 0],
109 |                            line_types=[n, c], forbidden=True)
110 |         net.add_lane("j", "k", ljk)
111 |         net.add_lane("k", "b", lkb)
112 |         net.add_lane("b", "c", lbc)
113 |         road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"])
114 |         road.objects.append(Obstacle(road, lbc.position(ends[2], 0)))
115 |         self.road = road
116 | 
117 |     def _make_vehicles(self) -> None:
118 |         """
119 |         Populate a road with several vehicles on the highway and on the merging lane, as well as an ego-vehicle.
120 | 
121 |         :return: the ego-vehicle
122 |         """
123 |         road = self.road
124 |         ego_vehicle = self.action_type.vehicle_class(road,
125 |                                                      road.network.get_lane(("a", "b", 1)).position(30, 0),
126 |                                                      speed=30)
127 |         road.vehicles.append(ego_vehicle)
128 | 
129 |         other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])
130 |         road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 0)).position(90, 0), speed=29))
131 |         road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 1)).position(70, 0), speed=31))
132 |         road.vehicles.append(other_vehicles_type(road, road.network.get_lane(("a", "b", 0)).position(5, 0), speed=31.5))
133 | 
134 |         merging_v = other_vehicles_type(road, road.network.get_lane(("j", "k", 0)).position(110, 0), speed=20)
135 |         merging_v.target_speed = 30
136 |         road.vehicles.append(merging_v)
137 |         self.vehicle = ego_vehicle
138 | 
139 |     def _legal_terminal(self) -> bool:
140 |        if self.steps >= self.config["duration"] * self.config["policy_frequency"]:
141 |            return True
142 |        else :
143 |            return False
144 | 
145 | register(
146 |     id='merge-v0',
147 |     entry_point='highway_env.envs:MergeEnv',
148 | )
149 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/summon_env.py:
--------------------------------------------------------------------------------
  1 | from gym.envs.registration import register
  2 | import numpy as np
  3 | 
  4 | from highway_env import utils
  5 | from highway_env.envs import ParkingEnv
  6 | from highway_env.road.lane import StraightLane, LineType
  7 | from highway_env.road.road import Road, RoadNetwork
  8 | from highway_env.vehicle.kinematics import Vehicle
  9 | from highway_env.vehicle.objects import Landmark
 10 | 
 11 | 
 12 | class SummonEnv(ParkingEnv):
 13 | 
 14 |     """
 15 |     A continuous control environment.
 16 | 
 17 |     It implements a reach-type task, where the agent observes their position and speed and must
 18 |     control their acceleration and steering so as to reach a given goal.
 19 | 
 20 |     Credits to Vinny Ruia for the idea and initial implementation.
 21 |     """
 22 | 
 23 |     @classmethod
 24 |     def default_config(cls) -> dict:
 25 |         config = super().default_config()
 26 |         config.update({
 27 |             "vehicles_count": 10,
 28 |             "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle",
 29 |         })
 30 |         return config
 31 | 
 32 |     def _create_road(self, spots: int = 15) -> None:
 33 |         """
 34 |         Create a road composed of straight adjacent lanes.
 35 | 
 36 |         :param spots: number of parking spots
 37 |         """
 38 |         net = RoadNetwork()
 39 | 
 40 |         width = 4.0
 41 |         lt = (LineType.CONTINUOUS, LineType.CONTINUOUS)
 42 |         x_offset = 0
 43 |         y_offset = 12
 44 |         length = 8
 45 |         # Parking spots
 46 |         for k in range(spots):
 47 |             x = (k - spots // 2) * (width + x_offset) - width / 2
 48 |             net.add_lane("a", "b", StraightLane([x, y_offset], [x, y_offset + length],
 49 |                                                 width=width, line_types=lt, speed_limit=5))
 50 |             net.add_lane("b", "c", StraightLane([x, -y_offset], [x, -y_offset - length],
 51 |                                                 width=width, line_types=lt, speed_limit=5))
 52 | 
 53 |         self.spots = spots
 54 |         self.vehicle_starting = [x, y_offset + (length / 2)]
 55 |         self.num_middle_lanes = 0
 56 |         self.x_range = (int(spots / 2) + 1) * width
 57 | 
 58 |         # Generate the middle lane for the busy parking lot
 59 |         for y in np.arange(-y_offset + width, y_offset, width):
 60 |             net.add_lane("d", "e", StraightLane([-self.x_range, y], [self.x_range, y],
 61 |                                                 width=width,
 62 |                                                 line_types=(LineType.STRIPED, LineType.STRIPED),
 63 |                                                 speed_limit=5))
 64 |             self.num_middle_lanes += 1
 65 | 
 66 |         self.road = Road(network=net,
 67 |                          np_random=self.np_random,
 68 |                          record_history=self.config["show_trajectories"])
 69 | 
 70 |     def _create_vehicles(self, parked_probability: float = 0.75) -> None:
 71 |         """
 72 |         Create some new random vehicles of a given type, and add them on the road.
 73 | 
 74 |         :param parked_probability: probability that a spot is occupied
 75 |         """
 76 | 
 77 |         self.vehicle = self.action_type.vehicle_class(self.road,
 78 |                                                       self.vehicle_starting,
 79 |                                                       2 * np.pi * self.np_random.rand(), 0)
 80 |         self.road.vehicles.append(self.vehicle)
 81 | 
 82 |         goal_position = [self.np_random.choice([-2 * self.spots - 10, 2 * self.spots + 10]), 0]
 83 |         self.goal = Landmark(self.road, goal_position, heading=0)
 84 |         self.road.objects.append(self.goal)
 85 | 
 86 |         vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])
 87 |         for i in range(self.config["vehicles_count"]):
 88 |             is_parked = self.np_random.rand() <= parked_probability
 89 |             if not is_parked:
 90 |                 # Just an effort to spread the vehicles out
 91 |                 idx = self.np_random.randint(0, self.num_middle_lanes)
 92 |                 longitudinal = (i * 5) - (self.x_range / 8) * self.np_random.randint(-1, 1)
 93 |                 self.road.vehicles.append(
 94 |                     vehicles_type.make_on_lane(self.road, ("d", "e", idx), longitudinal, speed=2))
 95 |             else:
 96 |                 lane = ("a", "b", i) if self.np_random.rand() >= 0.5 else ("b", "c", i)
 97 |                 self.road.vehicles.append(Vehicle.make_on_lane(self.road, lane, 4, speed=0))
 98 | 
 99 |         for v in self.road.vehicles:  # Prevent early collisions
100 |             if v is not self.vehicle and np.linalg.norm(v.position - self.vehicle.position) < 20:
101 |                 self.road.vehicles.remove(v)
102 | 
103 |     def compute_reward(self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info: dict, p: float = 0.5) -> float:
104 |         """
105 |         Proximity to the goal is rewarded
106 | 
107 |         We use a weighted p-norm
108 |         :param achieved_goal: the goal that was achieved
109 |         :param desired_goal: the goal that was desired
110 |         :param info: any supplementary information
111 |         :param p: the Lp^p norm used in the reward. Use p<1 to have high kurtosis for rewards in [0, 1]
112 |         :return: the corresponding reward
113 |         """
114 |         return super().compute_reward(achieved_goal, desired_goal, info, p) + \
115 |             self.config["collision_reward"] * self.vehicle.crashed
116 | 
117 | 
118 | class SummonEnvActionRepeat(SummonEnv):
119 |     def __init__(self):
120 |         super().__init__()
121 |         self.configure({"policy_frequency": 1})
122 | 
123 | 
124 | register(
125 |     id='summon-v0',
126 |     entry_point='highway_env.envs:SummonEnv',
127 |     max_episode_steps=100
128 | )
129 | 
130 | register(
131 |     id='summon-ActionRepeat-v0',
132 |     entry_point='highway_env.envs:SummonEnvActionRepeat',
133 |     max_episode_steps=20
134 | )
135 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/envs/two_way_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from gym.envs.registration import register
  3 | 
  4 | from highway_env import utils
  5 | from highway_env.envs.common.abstract import AbstractEnv
  6 | from highway_env.road.lane import LineType, StraightLane
  7 | from highway_env.road.road import Road, RoadNetwork
  8 | from highway_env.vehicle.controller import MDPVehicle
  9 | 
 10 | 
 11 | class TwoWayEnv(AbstractEnv):
 12 | 
 13 |     """
 14 |     A risk management task: the agent is driving on a two-way lane with icoming traffic.
 15 | 
 16 |     It must balance making progress by overtaking and ensuring safety.
 17 | 
 18 |     These conflicting objectives are implemented by a reward signal and a constraint signal,
 19 |     in the CMDP/BMDP framework.
 20 |     """
 21 | 
 22 |     @classmethod
 23 |     def default_config(cls) -> dict:
 24 |         config = super().default_config()
 25 |         config.update({
 26 |             "observation": {
 27 |                 "type": "TimeToCollision",
 28 |                 "horizon": 5
 29 |             },
 30 |             "action": {
 31 |                 "type": "DiscreteMetaAction",
 32 |             },
 33 |             "collision_reward": 0,
 34 |             "left_lane_constraint": 1,
 35 |             "left_lane_reward": 0.2,
 36 |             "high_speed_reward": 0.8,
 37 |         })
 38 |         return config
 39 | 
 40 |     def _reward(self, action: int) -> float:
 41 |         """
 42 |         The vehicle is rewarded for driving with high speed
 43 |         :param action: the action performed
 44 |         :return: the reward of the state-action transition
 45 |         """
 46 |         neighbours = self.road.network.all_side_lanes(self.vehicle.lane_index)
 47 | 
 48 |         reward = self.config["high_speed_reward"] * self.vehicle.speed_index / (self.vehicle.target_speeds.size - 1) \
 49 |             + self.config["left_lane_reward"] \
 50 |                 * (len(neighbours) - 1 - self.vehicle.target_lane_index[2]) / (len(neighbours) - 1)
 51 |         return reward
 52 | 
 53 |     def _is_terminal(self) -> bool:
 54 |         """The episode is over if the ego vehicle crashed or the time is out."""
 55 |         return self.vehicle.crashed
 56 | 
 57 |     def _cost(self, action: int) -> float:
 58 |         """The constraint signal is the time spent driving on the opposite lane, and occurrence of collisions."""
 59 |         return float(self.vehicle.crashed) + float(self.vehicle.lane_index[2] == 0)/15
 60 | 
 61 |     def _reset(self) -> np.ndarray:
 62 |         self._make_road()
 63 |         self._make_vehicles()
 64 | 
 65 |     def _make_road(self, length=800):
 66 |         """
 67 |         Make a road composed of a two-way road.
 68 | 
 69 |         :return: the road
 70 |         """
 71 |         net = RoadNetwork()
 72 | 
 73 |         # Lanes
 74 |         net.add_lane("a", "b", StraightLane([0, 0], [length, 0],
 75 |                                             line_types=(LineType.CONTINUOUS_LINE, LineType.STRIPED)))
 76 |         net.add_lane("a", "b", StraightLane([0, StraightLane.DEFAULT_WIDTH], [length, StraightLane.DEFAULT_WIDTH],
 77 |                                             line_types=(LineType.NONE, LineType.CONTINUOUS_LINE)))
 78 |         net.add_lane("b", "a", StraightLane([length, 0], [0, 0],
 79 |                                             line_types=(LineType.NONE, LineType.NONE)))
 80 | 
 81 |         road = Road(network=net, np_random=self.np_random, record_history=self.config["show_trajectories"])
 82 |         self.road = road
 83 | 
 84 |     def _make_vehicles(self) -> None:
 85 |         """
 86 |         Populate a road with several vehicles on the road
 87 | 
 88 |         :return: the ego-vehicle
 89 |         """
 90 |         road = self.road
 91 |         ego_vehicle = self.action_type.vehicle_class(road,
 92 |                                                      road.network.get_lane(("a", "b", 1)).position(30, 0),
 93 |                                                      speed=30)
 94 |         road.vehicles.append(ego_vehicle)
 95 |         self.vehicle = ego_vehicle
 96 | 
 97 |         vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])
 98 |         for i in range(3):
 99 |             self.road.vehicles.append(
100 |                 vehicles_type(road,
101 |                               position=road.network.get_lane(("a", "b", 1))
102 |                               .position(70+40*i + 10*self.np_random.randn(), 0),
103 |                               heading=road.network.get_lane(("a", "b", 1)).heading_at(70+40*i),
104 |                               speed=24 + 2*self.np_random.randn(),
105 |                               enable_lane_change=False)
106 |             )
107 |         for i in range(2):
108 |             v = vehicles_type(road,
109 |                               position=road.network.get_lane(("b", "a", 0))
110 |                               .position(200+100*i + 10*self.np_random.randn(), 0),
111 |                               heading=road.network.get_lane(("b", "a", 0)).heading_at(200+100*i),
112 |                               speed=20 + 5*self.np_random.randn(),
113 |                               enable_lane_change=False)
114 |             v.target_lane_index = ("b", "a", 0)
115 |             self.road.vehicles.append(v)
116 | 
117 | 
118 | register(
119 |     id='two-way-v0',
120 |     entry_point='highway_env.envs:TwoWayEnv',
121 |     max_episode_steps=15
122 | )
123 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/road/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/road/__init__.py


--------------------------------------------------------------------------------
/highway_modify/highway_env/road/regulation.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | import numpy as np
 4 | 
 5 | from highway_env import utils
 6 | from highway_env.road.road import Road, RoadNetwork
 7 | from highway_env.vehicle.controller import ControlledVehicle, MDPVehicle
 8 | from highway_env.vehicle.kinematics import Vehicle, Obstacle
 9 | 
10 | 
11 | class RegulatedRoad(Road):
12 |     YIELDING_COLOR: Tuple[float, float, float] = None
13 |     REGULATION_FREQUENCY: int = 2
14 |     YIELD_DURATION: float = 0.
15 | 
16 |     def __init__(self, network: RoadNetwork = None, vehicles: List[Vehicle] = None, obstacles: List[Obstacle] = None,
17 |                  np_random: np.random.RandomState = None, record_history: bool = False) -> None:
18 |         super().__init__(network, vehicles, obstacles, np_random, record_history)
19 |         self.steps = 0
20 | 
21 |     def step(self, dt: float) -> None:
22 |         self.steps += 1
23 |         if self.steps % int(1 / dt / self.REGULATION_FREQUENCY) == 0:
24 |             self.enforce_road_rules()
25 |         return super().step(dt)
26 | 
27 |     def enforce_road_rules(self) -> None:
28 |         """Find conflicts and resolve them by assigning yielding vehicles and stopping them."""
29 | 
30 |         # Unfreeze previous yielding vehicles
31 |         for v in self.vehicles:
32 |             if getattr(v, "is_yielding", False):
33 |                 if v.yield_timer >= self.YIELD_DURATION * self.REGULATION_FREQUENCY:
34 |                     v.target_speed = v.lane.speed_limit
35 |                     delattr(v, "color")
36 |                     v.is_yielding = False
37 |                 else:
38 |                     v.yield_timer += 1
39 | 
40 |         # Find new conflicts and resolve them
41 |         for i in range(len(self.vehicles) - 1):
42 |             for j in range(i+1, len(self.vehicles)):
43 |                 if self.is_conflict_possible(self.vehicles[i], self.vehicles[j]):
44 |                     yielding_vehicle = self.respect_priorities(self.vehicles[i], self.vehicles[j])
45 |                     if yielding_vehicle is not None and \
46 |                             isinstance(yielding_vehicle, ControlledVehicle) and \
47 |                             not isinstance(yielding_vehicle, MDPVehicle):
48 |                         yielding_vehicle.color = self.YIELDING_COLOR
49 |                         yielding_vehicle.target_speed = 0
50 |                         yielding_vehicle.is_yielding = True
51 |                         yielding_vehicle.yield_timer = 0
52 | 
53 |     @staticmethod
54 |     def respect_priorities(v1: Vehicle, v2: Vehicle) -> Vehicle:
55 |         """
56 |         Resolve a conflict between two vehicles by determining who should yield
57 | 
58 |         :param v1: first vehicle
59 |         :param v2: second vehicle
60 |         :return: the yielding vehicle
61 |         """
62 |         if v1.lane.priority > v2.lane.priority:
63 |             return v2
64 |         elif v1.lane.priority < v2.lane.priority:
65 |             return v1
66 |         else:  # The vehicle behind should yield
67 |             return v1 if v1.front_distance_to(v2) > v2.front_distance_to(v1) else v2
68 | 
69 |     @staticmethod
70 |     def is_conflict_possible(v1: ControlledVehicle, v2: ControlledVehicle, horizon: int = 3, step: float = 0.25) -> bool:
71 |         times = np.arange(step, horizon, step)
72 |         positions_1, headings_1 = v1.predict_trajectory_constant_speed(times)
73 |         positions_2, headings_2 = v2.predict_trajectory_constant_speed(times)
74 | 
75 |         for position_1, heading_1, position_2, heading_2 in zip(positions_1, headings_1, positions_2, headings_2):
76 |             # Fast spherical pre-check
77 |             if np.linalg.norm(position_2 - position_1) > v1.LENGTH:
78 |                 continue
79 | 
80 |             # Accurate rectangular check
81 |             if utils.rotated_rectangles_intersect((position_1, 1.5*v1.LENGTH, 0.9*v1.WIDTH, heading_1),
82 |                                                   (position_2, 1.5*v2.LENGTH, 0.9*v2.WIDTH, heading_2)):
83 |                 return True
84 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/road/spline.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy import interpolate
  3 | from typing import List, Tuple
  4 | 
  5 | 
  6 | class LinearSpline2D:
  7 |     """
  8 |     Piece-wise linear curve fitted to a list of points.
  9 |     """
 10 | 
 11 |     PARAM_CURVE_SAMPLE_DISTANCE: int = 1  # curve samples are placed 1m apart
 12 | 
 13 |     def __init__(self, points: List[Tuple[float, float]]):
 14 |         x_values = np.array([pt[0] for pt in points])
 15 |         y_values = np.array([pt[1] for pt in points])
 16 |         x_values_diff = np.diff(x_values)
 17 |         x_values_diff = np.hstack((x_values_diff, x_values_diff[-1]))
 18 |         y_values_diff = np.diff(y_values)
 19 |         y_values_diff = np.hstack((y_values_diff, y_values_diff[-1]))
 20 |         arc_length_cumulated = np.hstack(
 21 |             (0, np.cumsum(np.sqrt(x_values_diff[:-1] ** 2 + y_values_diff[:-1] ** 2)))
 22 |         )
 23 |         self.length = arc_length_cumulated[-1]
 24 |         self.x_curve = interpolate.interp1d(
 25 |             arc_length_cumulated, x_values, fill_value="extrapolate"
 26 |         )
 27 |         self.y_curve = interpolate.interp1d(
 28 |             arc_length_cumulated, y_values, fill_value="extrapolate"
 29 |         )
 30 |         self.dx_curve = interpolate.interp1d(
 31 |             arc_length_cumulated, x_values_diff, fill_value="extrapolate"
 32 |         )
 33 |         self.dy_curve = interpolate.interp1d(
 34 |             arc_length_cumulated, y_values_diff, fill_value="extrapolate"
 35 |         )
 36 | 
 37 |         (self.s_samples, self.poses) = self.sample_curve(
 38 |             self.x_curve, self.y_curve, self.length, self.PARAM_CURVE_SAMPLE_DISTANCE
 39 |         )
 40 | 
 41 |     def __call__(self, lon: float) -> Tuple[float, float]:
 42 |         return self.x_curve(lon), self.y_curve(lon)
 43 | 
 44 |     def get_dx_dy(self, lon: float) -> Tuple[float, float]:
 45 |         idx_pose = self._get_idx_segment_for_lon(lon)
 46 |         pose = self.poses[idx_pose]
 47 |         return pose.normal
 48 | 
 49 |     def cartesian_to_frenet(self, position: Tuple[float, float]) -> Tuple[float, float]:
 50 |         """
 51 |         Transform the point in Cartesian coordinates into Frenet coordinates of the curve
 52 |         """
 53 | 
 54 |         pose = self.poses[-1]
 55 |         projection = pose.project_onto_normal(position)
 56 |         if projection >= 0:
 57 |             lon = self.s_samples[-1] + projection
 58 |             lat = pose.project_onto_orthonormal(position)
 59 |             return lon, lat
 60 | 
 61 |         for idx in list(range(len(self.s_samples) - 1))[::-1]:
 62 |             pose = self.poses[idx]
 63 |             projection = pose.project_onto_normal(position)
 64 |             if projection >= 0:
 65 |                 if projection < pose.distance_to_origin(position):
 66 |                     lon = self.s_samples[idx] + projection
 67 |                     lat = pose.project_onto_orthonormal(position)
 68 |                     return lon, lat
 69 |                 else:
 70 |                     ValueError("No valid projection could be found")
 71 |         pose = self.poses[0]
 72 |         lon = pose.project_onto_normal(position)
 73 |         lat = pose.project_onto_orthonormal(position)
 74 |         return lon, lat
 75 | 
 76 |     def frenet_to_cartesian(self, lon: float, lat: float) -> Tuple[float, float]:
 77 |         """
 78 |         Convert the point from Frenet coordinates of the curve into Cartesian coordinates
 79 |         """
 80 |         idx_segment = self._get_idx_segment_for_lon(lon)
 81 |         s = lon - self.s_samples[idx_segment]
 82 |         pose = self.poses[idx_segment]
 83 |         point = pose.position + s * pose.normal
 84 |         point += lat * pose.orthonormal
 85 |         return point
 86 | 
 87 |     def _get_idx_segment_for_lon(self, lon: float) -> int:
 88 |         """
 89 |         Returns the index of the curve pose that corresponds to the longitudinal coordinate
 90 |         """
 91 |         idx_smaller = np.argwhere(lon < self.s_samples)
 92 |         if len(idx_smaller) == 0:
 93 |             return len(self.s_samples) - 1
 94 |         if idx_smaller[0] == 0:
 95 |             return 0
 96 |         return int(idx_smaller[0]) - 1
 97 | 
 98 |     @staticmethod
 99 |     def sample_curve(x_curve, y_curve, length: float, CURVE_SAMPLE_DISTANCE=1):
100 |         """
101 |         Create samples of the curve that are CURVE_SAMPLE_DISTANCE apart. These samples are used for Frenet to Cartesian
102 |         conversion and vice versa
103 |         """
104 |         num_samples = np.floor(length / CURVE_SAMPLE_DISTANCE)
105 |         s_values = np.hstack(
106 |             (CURVE_SAMPLE_DISTANCE * np.arange(0, int(num_samples) + 1), length)
107 |         )
108 |         x_values = x_curve(s_values)
109 |         y_values = y_curve(s_values)
110 |         dx_values = np.diff(x_values)
111 |         dx_values = np.hstack((dx_values, dx_values[-1]))
112 |         dy_values = np.diff(y_values)
113 |         dy_values = np.hstack((dy_values, dy_values[-1]))
114 | 
115 |         poses = [
116 |             CurvePose(x, y, dx, dy)
117 |             for x, y, dx, dy in zip(x_values, y_values, dx_values, dy_values)
118 |         ]
119 | 
120 |         return s_values, poses
121 | 
122 | 
123 | class CurvePose:
124 |     """
125 |     Sample pose on a curve that is used for Frenet to Cartesian conversion
126 |     """
127 | 
128 |     def __init__(self, x: float, y: float, dx: float, dy: float):
129 |         self.length = np.sqrt(dx**2 + dy**2)
130 |         self.position = np.array([x, y]).flatten()
131 |         self.normal = np.array([dx, dy]).flatten() / self.length
132 |         self.orthonormal = np.array([-self.normal[1], self.normal[0]]).flatten()
133 | 
134 |     def distance_to_origin(self, point: Tuple[float, float]) -> float:
135 |         """
136 |         Compute the distance between the point [x, y] and the pose origin
137 |         """
138 |         return np.sqrt(np.sum((self.position - point) ** 2))
139 | 
140 |     def project_onto_normal(self, point: Tuple[float, float]) -> float:
141 |         """
142 |         Compute the longitudinal distance from pose origin to point by projecting the point onto the normal vector of the pose
143 |         """
144 |         return self.normal.dot(point - self.position)
145 | 
146 |     def project_onto_orthonormal(self, point: Tuple[float, float]) -> float:
147 |         """
148 |         Compute the lateral distance from pose origin to point by projecting the point onto the orthonormal vector of the pose
149 |         """
150 |         return self.orthonormal.dot(point - self.position)
151 | 


--------------------------------------------------------------------------------
/highway_modify/highway_env/vehicle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/vehicle/__init__.py


--------------------------------------------------------------------------------
/highway_modify/highway_env/vehicle/uncertainty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/highway_env/vehicle/uncertainty/__init__.py


--------------------------------------------------------------------------------
/highway_modify/highway_env/vehicle/uncertainty/estimation.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, Union
  2 | 
  3 | import numpy as np
  4 | 
  5 | from highway_env.road.road import Road, LaneIndex, Route
  6 | from highway_env.utils import confidence_polytope, is_consistent_dataset, Vector
  7 | from highway_env.vehicle.behavior import LinearVehicle
  8 | from highway_env.vehicle.uncertainty.prediction import IntervalVehicle, Polytope
  9 | 
 10 | 
 11 | class RegressionVehicle(IntervalVehicle):
 12 | 
 13 |     """Estimator for the parameter of a LinearVehicle."""
 14 | 
 15 |     def longitudinal_matrix_polytope(self) -> Polytope:
 16 |         return self.polytope_from_estimation(self.data["longitudinal"], self.theta_a_i, self.longitudinal_structure)
 17 | 
 18 |     def lateral_matrix_polytope(self) -> Polytope:
 19 |         return self.polytope_from_estimation(self.data["lateral"], self.theta_b_i, self.lateral_structure)
 20 | 
 21 |     def polytope_from_estimation(self, data: dict, parameter_box: np.ndarray, structure: Callable[[], Polytope])\
 22 |             -> Polytope:
 23 |         if not data:
 24 |             return self.parameter_box_to_polytope(parameter_box, structure)
 25 |         theta_n_lambda, d_theta, _, _ = confidence_polytope(data, parameter_box=parameter_box)
 26 |         a, phi = structure()
 27 |         a0 = a + np.tensordot(theta_n_lambda, phi, axes=[0, 0])
 28 |         da = [np.tensordot(d_theta_k, phi, axes=[0, 0]) for d_theta_k in d_theta]
 29 |         return a0, da
 30 | 
 31 | 
 32 | class MultipleModelVehicle(LinearVehicle):
 33 |     def __init__(self, road: Road,
 34 |                  position: Vector,
 35 |                  heading: float = 0,
 36 |                  speed: float = 0,
 37 |                  target_lane_index: LaneIndex = None,
 38 |                  target_speed: float = None,
 39 |                  route: Route = None,
 40 |                  enable_lane_change: bool = True,
 41 |                  timer: bool = None,
 42 |                  data: dict = None) -> None:
 43 |         super().__init__(road, position, heading, speed, target_lane_index, target_speed, route,
 44 |                          enable_lane_change, timer, data)
 45 |         if not self.data:
 46 |             self.data = []
 47 | 
 48 |     def act(self, action: Union[dict, str] = None) -> None:
 49 |         if self.collecting_data:
 50 |             self.update_possible_routes()
 51 |         super().act(action)
 52 | 
 53 |     def collect_data(self) -> None:
 54 |         """Collect the features for each possible route, and true observed outputs."""
 55 |         for route, data in self.data:
 56 |             self.add_features(data, route[0], output_lane=self.target_lane_index)
 57 | 
 58 |     def update_possible_routes(self) -> None:
 59 |         """
 60 |         Update a list of possible routes that this vehicle could be following.
 61 | 
 62 |         - Add routes at the next intersection
 63 |         - Step the current lane in each route
 64 |         - Reject inconsistent routes
 65 |         """
 66 | 
 67 |         for route in self.get_routes_at_intersection():  # Candidates
 68 |             # Unknown lane -> first lane
 69 |             for i, lane_index in enumerate(route):
 70 |                 route[i] = lane_index if lane_index[2] is not None else (lane_index[0], lane_index[1], 0)
 71 |             # Is this route already considered, or a suffix of a route already considered ?
 72 |             for known_route, _ in self.data:
 73 |                 if known_route == route:
 74 |                     break
 75 |                 elif len(known_route) < len(route) and route[:len(known_route)] == known_route:
 76 |                     self.data = [(r, d) if r != known_route else (route, d) for r, d in self.data]
 77 |                     break
 78 |             else:
 79 |                 self.data.append((route.copy(), {}))  # Add it
 80 | 
 81 |         # Step the lane being followed in each possible route
 82 |         for route, _ in self.data:
 83 |             if self.road.network.get_lane(route[0]).after_end(self.position):
 84 |                 route.pop(0)
 85 | 
 86 |         # Reject inconsistent hypotheses
 87 |         for route, data in self.data.copy():
 88 |             if data:
 89 |                 if not is_consistent_dataset(data["lateral"], parameter_box=LinearVehicle.STEERING_RANGE):
 90 |                     self.data.remove((route, data))
 91 | 
 92 |     def assume_model_is_valid(self, index: int) -> "LinearVehicle":
 93 |         """
 94 |         Get a copy of this vehicle behaving according to one of its possible routes.
 95 | 
 96 |         :param index: index of the route to consider
 97 |         :return: a copy of the vehicle
 98 |         """
 99 |         if not self.data:
100 |             return self.create_from(self)
101 |         index = min(index, len(self.data)-1)
102 |         route, data = self.data[index]
103 |         vehicle = RegressionVehicle.create_from(self)
104 |         vehicle.target_lane_index = route[0]
105 |         vehicle.route = route
106 |         vehicle.data = data
107 |         return vehicle
108 | 


--------------------------------------------------------------------------------
/highway_modify/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/highway_modify/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name=highway-env
 3 | version=1.5
 4 | author=Edouard Leurent
 5 | author_email=eleurent@gmail.com
 6 | description=An environment for simulated highway driving tasks.
 7 | long_description=file:README.md
 8 | long_description_content_type=text/markdown
 9 | url=https://github.com/eleurent/highway-env
10 | license=MIT
11 | classifiers=
12 |     Development Status :: 5 - Production/Stable
13 |     Programming Language :: Python
14 |     Programming Language :: Python :: 3 :: Only
15 |     Programming Language :: Python :: 3.8
16 |     License :: OSI Approved :: MIT License
17 | 
18 | 
19 | [options]
20 | setup_requires=
21 |     pytest-runner
22 | install_requires=
23 |     gym
24 |     numpy
25 |     pygame>=2.0.2
26 |     matplotlib
27 |     pandas
28 |     scipy
29 | packages=find:
30 | tests_require=
31 |     pytest
32 | 
33 | [options.extras_require]
34 | deploy = pytest-runner; sphinx<1.7.3; sphinx_rtd_theme
35 | 
36 | [options.packages.find]
37 | exclude =
38 |     tests
39 |     docs
40 |     scripts
41 | 
42 | 
43 | [aliases]
44 | test=pytest
45 | 


--------------------------------------------------------------------------------
/highway_modify/setup.py:
--------------------------------------------------------------------------------
1 | # Following PEP 517/518, this file should not not needed and replaced instead by the setup.cfg file and pyproject.toml.
2 | # Unfortunately it is still required py the pip editable mode `pip install -e`
3 | # See https://stackoverflow.com/a/60885212
4 | 
5 | from setuptools import setup
6 | 
7 | if __name__ == "__main__":
8 |     setup()


--------------------------------------------------------------------------------
/highway_modify/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/tests/__init__.py


--------------------------------------------------------------------------------
/highway_modify/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/highway_modify/tests/envs/__init__.py


--------------------------------------------------------------------------------
/highway_modify/tests/envs/test_actions.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import pytest
 3 | 
 4 | import highway_env
 5 | 
 6 | action_configs = [
 7 |     {"type": "ContinuousAction"},
 8 |     {"type": "DiscreteAction"},
 9 |     {"type": "DiscreteMetaAction"},
10 | ]
11 | 
12 | 
13 | @pytest.mark.parametrize("action_config", action_configs)
14 | def test_action_type(action_config):
15 |     env = gym.make("highway-v0")
16 |     env.configure({"action": action_config})
17 |     env.reset()
18 |     for _ in range(3):
19 |         action = env.action_space.sample()
20 |         obs, _, _, _ = env.step(action)
21 |         assert env.action_space.contains(action)
22 |         assert env.observation_space.contains(obs)
23 |     env.close()


--------------------------------------------------------------------------------
/highway_modify/tests/envs/test_env_preprocessors.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | 
 3 | import highway_env
 4 | 
 5 | 
 6 | def test_preprocessors():
 7 |     env = gym.make('highway-v0')
 8 |     env = env.simplify()
 9 |     env = env.change_vehicles("highway_env.vehicle.behavior.IDMVehicle")
10 |     env = env.set_preferred_lane(0)
11 |     env = env.set_route_at_intersection("random")
12 |     env = env.set_vehicle_field(("crashed", False))
13 |     env = env.call_vehicle_method(("plan_route_to", "1"))
14 |     env = env.randomize_behavior()
15 | 
16 |     env.reset()
17 |     for _ in range(3):
18 |         action = env.action_space.sample()
19 |         obs, reward, _, _ = env.step(action)
20 |     env.close()
21 | 
22 |     assert env.observation_space.contains(obs)
23 |     assert 0 <= reward <= 1
24 | 
25 | 


--------------------------------------------------------------------------------
/highway_modify/tests/envs/test_gym.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import pytest
 3 | 
 4 | import highway_env
 5 | 
 6 | envs = [
 7 |     "highway-v0",
 8 |     "merge-v0",
 9 |     "roundabout-v0",
10 |     "intersection-v0",
11 |     "intersection-v1",
12 |     "parking-v0",
13 |     "summon-v0",
14 |     "two-way-v0",
15 |     "lane-keeping-v0",
16 |     "racetrack-v0",
17 | ]
18 | 
19 | 
20 | @pytest.mark.parametrize("env_spec", envs)
21 | def test_env_step(env_spec):
22 |     env = gym.make(env_spec)
23 | 
24 |     env.reset()
25 |     for _ in range(3):
26 |         action = env.action_space.sample()
27 |         obs, _, _, _ = env.step(action)
28 |     env.close()
29 | 
30 |     assert env.observation_space.contains(obs)
31 | 
32 | 


--------------------------------------------------------------------------------
/highway_modify/tests/envs/test_time.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import timeit
 3 | import gym
 4 | 
 5 | import highway_env
 6 | 
 7 | 
 8 | def wrapper(func, *args, **kwargs):
 9 |     def wrapped():
10 |         return func(*args, **kwargs)
11 |     return wrapped
12 | 
13 | 
14 | def time_env(env_name, steps=20):
15 |     env = gym.make(env_name)
16 |     env.reset()
17 |     for _ in range(steps):
18 |         _, _, done, _ = env.step(env.action_space.sample())
19 |         env.reset() if done else _
20 |     env.close()
21 | 
22 | 
23 | def test_running_time(repeat=1):
24 |     for env_name, steps in [
25 |         ("highway-v0", 10),
26 |         ("highway-fast-v0", 10),
27 |         ("parking-v0", 20)
28 |     ]:
29 |         env_time = wrapper(time_env, env_name, steps)
30 |         time_spent = timeit.timeit(env_time, number=repeat) / repeat
31 |         env = gym.make(env_name)
32 |         time_simulated = steps / env.unwrapped.config["policy_frequency"]
33 |         real_time_ratio = time_simulated / time_spent
34 |         print("Real time ratio for {}: {}".format(env_name, real_time_ratio))
35 |         assert real_time_ratio > 0.5  # let's not be too ambitious for now
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     test_running_time()
40 | 


--------------------------------------------------------------------------------
/highway_modify/tests/graphics/test_render.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | import highway_env
 6 | 
 7 | envs = ["highway-v0", "merge-v0"]
 8 | 
 9 | 
10 | @pytest.mark.parametrize("env_spec", envs)
11 | def test_render(env_spec):
12 |     env = gym.make(env_spec)
13 |     env.configure({"offscreen_rendering": True})
14 |     img = env.render(mode="rgb_array")
15 |     env.close()
16 |     assert isinstance(img, np.ndarray)
17 |     assert img.shape == (env.config["screen_height"], env.config["screen_width"], 3)  # (H,W,C)
18 | 
19 | 
20 | @pytest.mark.parametrize("env_spec", envs)
21 | def test_obs_grayscale(env_spec, stack_size=4):
22 |     env = gym.make(env_spec)
23 |     env.configure({
24 |         "offscreen_rendering": True,
25 |         "observation": {
26 |             "type": "GrayscaleObservation",
27 |             "observation_shape": (env.config["screen_width"], env.config["screen_height"]),
28 |             "stack_size": stack_size,
29 |             "weights": [0.2989, 0.5870, 0.1140],
30 |         }
31 |     })
32 |     obs = env.reset()
33 |     env.close()
34 |     assert isinstance(obs, np.ndarray)
35 |     assert obs.shape == (stack_size, env.config["screen_width"], env.config["screen_height"])
36 | 


--------------------------------------------------------------------------------
/highway_modify/tests/road/test_road.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from highway_env.road.lane import StraightLane, CircularLane, PolyLane
  5 | from highway_env.road.road import Road, RoadNetwork
  6 | from highway_env.vehicle.controller import ControlledVehicle
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def net() -> RoadNetwork:
 11 |     # Diamond
 12 |     net = RoadNetwork()
 13 |     net.add_lane(0, 1, StraightLane([0, 0], [10, 0]))
 14 |     net.add_lane(1, 2, StraightLane([10, 0], [5, 5]))
 15 |     net.add_lane(2, 0, StraightLane([5, 5], [0, 0]))
 16 |     net.add_lane(1, 3, StraightLane([10, 0], [5, -5]))
 17 |     net.add_lane(3, 0, StraightLane([5, -5], [0, 0]))
 18 |     print(net.graph)
 19 | 
 20 |     return net
 21 | 
 22 | 
 23 | def test_network(net):
 24 |     # Road
 25 |     road = Road(network=net)
 26 |     v = ControlledVehicle(road, [5, 0], heading=0, target_speed=2)
 27 |     road.vehicles.append(v)
 28 |     assert v.lane_index == (0, 1, 0)
 29 | 
 30 |     # Lane changes
 31 |     dt = 1/15
 32 |     lane_index = v.target_lane_index
 33 |     lane_changes = 0
 34 |     for _ in range(int(20/dt)):
 35 |         road.act()
 36 |         road.step(dt)
 37 |         if lane_index != v.target_lane_index:
 38 |             lane_index = v.target_lane_index
 39 |             lane_changes += 1
 40 |     assert lane_changes >= 3
 41 | 
 42 | 
 43 | def test_network_to_from_config(net):
 44 |     config_dict = net.to_config()
 45 |     net_2 = RoadNetwork.from_config(config_dict)
 46 |     assert len(net.graph) == len(net_2.graph)
 47 | 
 48 | 
 49 | def test_polylane():
 50 |     lane = CircularLane(
 51 |         center=[0, 0],
 52 |         radius=10,
 53 |         start_phase=0,
 54 |         end_phase=3.14,
 55 |     )
 56 | 
 57 |     num_samples = int(lane.length / 5)
 58 |     sampled_centreline = [
 59 |         lane.position(longitudinal=lon, lateral=0)
 60 |         for lon in np.linspace(0, lane.length, num_samples)
 61 |     ]
 62 |     sampled_left_boundary = [
 63 |         lane.position(longitudinal=lon, lateral=0.5 * lane.width_at(longitudinal=lon))
 64 |         for lon in np.linspace(0, lane.length, num_samples)
 65 |     ]
 66 |     sampled_right_boundary = [
 67 |         lane.position(longitudinal=lon, lateral=-0.5 * lane.width_at(longitudinal=lon))
 68 |         for lon in np.linspace(0, lane.length, num_samples)
 69 |     ]
 70 |     polylane = PolyLane(
 71 |         lane_points=sampled_centreline,
 72 |         left_boundary_points=sampled_left_boundary,
 73 |         right_boundary_points=sampled_right_boundary,
 74 |     )
 75 | 
 76 |     # sample boundaries from both lanes and assert equal
 77 | 
 78 |     num_samples = int(lane.length / 3)
 79 |     # original lane
 80 |     sampled_centreline = [
 81 |         lane.position(longitudinal=lon, lateral=0)
 82 |         for lon in np.linspace(0, lane.length, num_samples)
 83 |     ]
 84 |     sampled_left_boundary = [
 85 |         lane.position(longitudinal=lon, lateral=0.5 * lane.width_at(longitudinal=lon))
 86 |         for lon in np.linspace(0, lane.length, num_samples)
 87 |     ]
 88 |     sampled_right_boundary = [
 89 |         lane.position(longitudinal=lon, lateral=-0.5 * lane.width_at(longitudinal=lon))
 90 |         for lon in np.linspace(0, lane.length, num_samples)
 91 |     ]
 92 | 
 93 |     # polylane
 94 |     polylane_sampled_centreline = [
 95 |         polylane.position(longitudinal=lon, lateral=0)
 96 |         for lon in np.linspace(0, polylane.length, num_samples)
 97 |     ]
 98 |     polylane_sampled_left_boundary = [
 99 |         polylane.position(
100 |             longitudinal=lon, lateral=0.5 * polylane.width_at(longitudinal=lon)
101 |         )
102 |         for lon in np.linspace(0, polylane.length, num_samples)
103 |     ]
104 |     polylane_sampled_right_boundary = [
105 |         polylane.position(
106 |             longitudinal=lon, lateral=-0.5 * polylane.width_at(longitudinal=lon)
107 |         )
108 |         for lon in np.linspace(0, polylane.length, num_samples)
109 |     ]
110 | 
111 |     # assert equal (very coarse because of coarse sampling)
112 |     assert all(
113 |         np.linalg.norm(
114 |             np.array(sampled_centreline) - np.array(polylane_sampled_centreline), axis=1
115 |         )
116 |         < 0.7
117 |     )
118 |     assert all(
119 |         np.linalg.norm(
120 |             np.array(sampled_left_boundary) - np.array(polylane_sampled_left_boundary),
121 |             axis=1,
122 |         )
123 |         < 0.7
124 |     )
125 |     assert all(
126 |         np.linalg.norm(
127 |             np.array(sampled_right_boundary)
128 |             - np.array(polylane_sampled_right_boundary),
129 |             axis=1,
130 |         )
131 |         < 0.7
132 |     )
133 | 


--------------------------------------------------------------------------------
/highway_modify/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from highway_env.utils import rotated_rectangles_intersect
 4 | 
 5 | 
 6 | def test_rotated_rectangles_intersect():
 7 |     assert rotated_rectangles_intersect(([12.86076812, 28.60182391], 5.0, 2.0, -0.4675779906495494),
 8 |                                         ([9.67753944, 28.90585412], 5.0, 2.0, -0.3417019364473201))
 9 |     assert rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([0, 1], 2, 1, 0))
10 |     assert not rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([0, 2.1], 2, 1, 0))
11 |     assert not rotated_rectangles_intersect(([0, 0], 2, 1, 0), ([1, 1.1], 2, 1, 0))
12 |     assert rotated_rectangles_intersect(([0, 0], 2, 1, np.pi/4), ([1, 1.1], 2, 1, 0))
13 | 


--------------------------------------------------------------------------------
/highway_modify/tests/vehicle/test_behavior.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from highway_env.vehicle.objects import Obstacle
 4 | from highway_env.road.road import Road, RoadNetwork
 5 | from highway_env.vehicle.behavior import IDMVehicle, LinearVehicle
 6 | 
 7 | FPS = 15
 8 | vehicle_types = [IDMVehicle, LinearVehicle]
 9 | 
10 | 
11 | @pytest.mark.parametrize("vehicle_type", vehicle_types)
12 | def test_stop_before_obstacle(vehicle_type):
13 |     road = Road(RoadNetwork.straight_road_network(lanes=1))
14 |     vehicle = vehicle_type(road=road, position=[0, 0], speed=20, heading=0)
15 |     obstacle = Obstacle(road=road, position=[80, 0])
16 |     road.vehicles.append(vehicle)
17 |     road.objects.append(obstacle)
18 |     for _ in range(10 * FPS):
19 |         road.act()
20 |         road.step(dt=1/FPS)
21 |     assert not vehicle.crashed
22 |     assert vehicle.position[0] == pytest.approx(obstacle.position[0] - vehicle_type.DISTANCE_WANTED, abs=1)
23 |     assert vehicle.position[1] == pytest.approx(0)
24 |     assert vehicle.speed == pytest.approx(0, abs=1)
25 |     assert vehicle.heading == pytest.approx(0)
26 | 


--------------------------------------------------------------------------------
/highway_modify/tests/vehicle/test_control.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from highway_env.road.lane import StraightLane
 4 | from highway_env.road.road import Road, RoadNetwork
 5 | from highway_env.vehicle.controller import ControlledVehicle
 6 | 
 7 | FPS = 15
 8 | 
 9 | 
10 | def test_step():
11 |     v = ControlledVehicle(road=None, position=[0, 0], speed=20, heading=0)
12 |     for _ in range(2 * FPS):
13 |         v.step(dt=1/FPS)
14 |     assert v.position[0] == pytest.approx(40)
15 |     assert v.position[1] == pytest.approx(0)
16 |     assert v.speed == pytest.approx(20)
17 |     assert v.heading == pytest.approx(0)
18 | 
19 | 
20 | def test_lane_change():
21 |     road = Road(RoadNetwork.straight_road_network(2))
22 |     v = ControlledVehicle(road=road, position=road.network.get_lane(("0", "1", 0)).position(0, 0), speed=20, heading=0)
23 |     v.act('LANE_RIGHT')
24 |     for _ in range(3 * FPS):
25 |         v.act()
26 |         v.step(dt=1/FPS)
27 |     assert v.speed == pytest.approx(20)
28 |     assert v.position[1] == pytest.approx(StraightLane.DEFAULT_WIDTH, abs=StraightLane.DEFAULT_WIDTH/4)
29 |     assert v.lane_index[2] == 1
30 | 
31 | 
32 | def test_speed_control():
33 |     road = Road(RoadNetwork.straight_road_network(1))
34 |     v = ControlledVehicle(road=road, position=road.network.get_lane(("0", "1", 0)).position(0, 0), speed=20, heading=0)
35 |     v.act('FASTER')
36 |     for _ in range(int(3 * v.TAU_ACC * FPS)):
37 |         v.act()
38 |         v.step(dt=1/FPS)
39 |     assert v.speed == pytest.approx(20 + v.DELTA_SPEED, abs=0.5)
40 |     assert v.position[1] == pytest.approx(0)
41 |     assert v.lane_index[2] == 0
42 | 


--------------------------------------------------------------------------------
/highway_modify/tests/vehicle/test_dynamics.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from highway_env.road.road import Road, RoadNetwork
 4 | from highway_env.vehicle.kinematics import Vehicle
 5 | from highway_env.vehicle.objects import Obstacle, Landmark
 6 | 
 7 | FPS = 15
 8 | 
 9 | 
10 | def test_step():
11 |     v = Vehicle(road=None, position=[0, 0], speed=20, heading=0)
12 |     for _ in range(2*FPS):
13 |         v.step(dt=1/FPS)
14 |     assert v.position[0] == pytest.approx(40)
15 |     assert v.position[1] == pytest.approx(0)
16 |     assert v.speed == pytest.approx(20)
17 |     assert v.heading == pytest.approx(0)
18 | 
19 | 
20 | def test_act():
21 |     v = Vehicle(road=None, position=[0, 0], speed=20, heading=0)
22 |     v.act({'acceleration': 1, 'steering': 0})
23 |     for _ in range(1 * FPS):
24 |         v.step(dt=1/FPS)
25 |     assert v.speed == pytest.approx(21)
26 | 
27 |     v.act({'acceleration': 0, 'steering': 0.5})
28 |     for _ in range(1 * FPS):
29 |         v.step(dt=1/FPS)
30 |     assert v.speed == pytest.approx(21)
31 |     assert v.position[1] > 0
32 | 
33 | 
34 | def test_brake():
35 |     v = Vehicle(road=None, position=[0, 0], speed=20, heading=0)
36 |     for _ in range(10 * FPS):
37 |         v.act({'acceleration': min(max(-1 * v.speed, -6), 6), 'steering': 0})
38 |         v.step(dt=1/FPS)
39 |     assert v.speed == pytest.approx(0, abs=0.01)
40 | 
41 | 
42 | def test_front():
43 |     r = Road(RoadNetwork.straight_road_network(1))
44 |     v1 = Vehicle(road=r, position=[0, 0], speed=20)
45 |     v2 = Vehicle(road=r, position=[10, 0], speed=10)
46 |     r.vehicles.extend([v1, v2])
47 | 
48 |     assert v1.lane_distance_to(v2) == pytest.approx(10)
49 |     assert v2.lane_distance_to(v1) == pytest.approx(-10)
50 | 
51 | 
52 | def test_collision():
53 |     # Collision between two vehicles
54 |     r = Road(RoadNetwork.straight_road_network(1))
55 |     v1 = Vehicle(road=r, position=[0, 0], speed=10)
56 |     v2 = Vehicle(road=r, position=[4, 0], speed=20)
57 |     v1.handle_collisions(v2)
58 | 
59 |     assert v1.crashed and v2.crashed
60 |     # Collision between a vehicle and an obstacle
61 |     v3 = Vehicle(road=r, position=[20, 0], speed=10)
62 |     o = Obstacle(road=r, position=[23, 0])
63 |     v3.handle_collisions(o)
64 | 
65 |     assert v3.crashed and o.crashed
66 |     # Collision between a vehicle and a landmark
67 |     v4 = Vehicle(road=r, position=[40, 0], speed=10)
68 |     l = Landmark(road=r, position=[43, 0])
69 |     v4.handle_collisions(l)
70 | 
71 |     assert v4.crashed is False
72 |     assert l.hit
73 | 


--------------------------------------------------------------------------------
/highway_modify/tests/vehicle/test_uncertainty.py:
--------------------------------------------------------------------------------
 1 | from highway_env.road.road import Road, RoadNetwork
 2 | from highway_env.vehicle.uncertainty.prediction import IntervalVehicle
 3 | 
 4 | FPS = 15
 5 | 
 6 | 
 7 | def test_partial():
 8 |     road = Road(RoadNetwork.straight_road_network())
 9 |     v = IntervalVehicle(road, position=[0, 0], speed=20, heading=0)
10 |     for _ in range(2 * FPS):
11 |         v.step(dt=1/FPS, mode="partial")
12 |         assert v.interval.position[0, 0] <= v.position[0] <= v.interval.position[1, 0]
13 |         assert v.interval.position[0, 1] <= v.position[1] <= v.interval.position[1, 1]
14 |         assert v.interval.heading[0] <= v.heading <= v.interval.heading[1]
15 | 
16 | 
17 | def test_predictor():
18 |     road = Road(RoadNetwork.straight_road_network())
19 |     v = IntervalVehicle(road, position=[0, 0], speed=20, heading=0)
20 |     for _ in range(2 * FPS):
21 |         v.step(dt=1/FPS, mode="predictor")
22 |         assert v.interval.position[0, 0] <= v.position[0] <= v.interval.position[1, 0]
23 |         assert v.interval.position[0, 1] <= v.position[1] <= v.interval.position[1, 1]
24 |         assert v.interval.heading[0] <= v.heading <= v.interval.heading[1]
25 | 


--------------------------------------------------------------------------------
/introduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/introduction.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | 
  3 | import hydra
  4 | import types
  5 | import torch
  6 | import os
  7 | import random
  8 | import gym
  9 | import torch.nn.functional as F
 10 | import utils.util as util
 11 | import itertools
 12 | import numpy as np
 13 | from tensorboardX import SummaryWriter
 14 | from itertools import count
 15 | from make_envs import make_env
 16 | from omegaconf import DictConfig, OmegaConf
 17 | from dataset.rs_memory import Memory
 18 | from dataset.load_data import Dataset
 19 | from torch.autograd import Variable
 20 | from model.sac_rs import SAC_RS
 21 | torch.set_num_threads(2)
 22 | cur_pth = os.getcwd()
 23 | def get_args(cfg: DictConfig):
 24 |     # cfg.device = "cpu"
 25 |     cfg.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 26 |     cfg.hydra_base_dir = os.getcwd()
 27 |     print(OmegaConf.to_yaml(cfg))
 28 |     return cfg
 29 | 
 30 | 
 31 | def make_agent(env, args):
 32 |     obs_dim = env.observation_space.shape[0]
 33 |     action_dim = env.action_space.shape[0]
 34 |     action_range = [
 35 |         float(env.action_space.low.min()),
 36 |         float(env.action_space.high.max())
 37 |     ]
 38 |     args.agent.obs_dim = obs_dim
 39 |     args.agent.action_dim = action_dim
 40 |     agent = SAC_RS(obs_dim, action_dim, action_range, args.train.batch, args)
 41 |     return agent
 42 | 
 43 | 
 44 | 
 45 | def get_re_obs(obs):
 46 |     re_obs = np.array(obs)
 47 |     sz = re_obs.shape
 48 |     for i in range(1,sz[0]):
 49 |         re_obs[i]=re_obs[i]-re_obs[0]
 50 |     return re_obs
 51 | 
 52 | def save(agent,args,cnt):
 53 |     output_dir=f'{args.env.name}'
 54 |     if not os.path.exists(output_dir):
 55 |         os.mkdir(output_dir)
 56 |     agent.save(f'{output_dir}/{args.agent.name}_{cnt}')
 57 |     print("saved successfully!")
 58 | 
 59 | @hydra.main(config_path="config", config_name="config")
 60 | def main(cfg: DictConfig):
 61 |     args = get_args(cfg)
 62 |     random.seed(args.seed)
 63 |     np.random.seed(args.seed)
 64 |     torch.manual_seed(args.seed)
 65 |     env_args=args.env
 66 |     env = make_env(args)
 67 |     eval_env = make_env(args)
 68 |     env.seed(args.seed)
 69 |     eval_env.seed(args.seed + 10)
 70 |     print(cur_pth)
 71 |     dataset_0=Dataset(cur_pth, args)
 72 |     g1 = int(env_args.g1)
 73 |     REPLAY_MEMORY = int(env_args.replay_mem)        # total buffer size
 74 |     INITIAL_MEMORY = int(env_args.initial_mem)      # buffer size that can start learning
 75 |     EPISODE_STEPS = int(env_args.eps_steps)         # maximum epoch_step number
 76 |     ROUND_LEARN_STEPS = int(env_args.round_steps)
 77 |     LEARN_STEPS = ROUND_LEARN_STEPS*dataset_0.expert_data["lengths"][0]  # maximum learning_step number
 78 |     agent = make_agent(env, args)
 79 |     online_memory_replay = Memory(REPLAY_MEMORY//2, args.seed+1)
 80 |     learn_step = 0
 81 |     all_step = 0
 82 |     sg_count = 0
 83 |     writer = SummaryWriter(log_dir="./logs")
 84 |     output_dir=f'./data/{args.env.name}/CSIRL/{dataset_0.get_tra_num()}'
 85 |     if not os.path.exists(output_dir):
 86 |         os.makedirs(output_dir)
 87 |     output_dir = output_dir + f'/{args.seed}.pkl'
 88 |     test_reward = []
 89 |     test_step = []
 90 |     for _1 in count():
 91 |         sg_count += 1
 92 |         save(agent, args, sg_count)
 93 |         print("| subgoal count %d |" %(sg_count))
 94 |         online_memory_replay.clear()
 95 |         begin_learn = False
 96 |         goal_learn_step = 0
 97 |         for __ in count():
 98 |             if goal_learn_step > ROUND_LEARN_STEPS:
 99 |                 break
100 |             state = env.reset()
101 |             episode_reward = 0
102 |             done = False
103 |             #print(_)
104 |             train_reward = -999.9
105 |             for episode_step in range(EPISODE_STEPS):
106 |                 # env.render()
107 |                 if learn_step % args.env.eval_interval == 1 and begin_learn == True:
108 |                     eval_returns, eval_timesteps = util.evaluate(agent, eval_env, num_episodes=args.eval.eps)
109 |                     returns = np.mean(eval_returns)
110 |                     writer.add_scalar('eval/episode_reward', returns, learn_step)
111 |                     test_step.append(learn_step)
112 |                     test_reward.append(returns)
113 |                     print("| test | steps: %2d | episode_reward: %.3f |" %(learn_step,returns))
114 |                     record_data = {"steps": test_step, "rewards": test_reward}
115 |                     torch.save(record_data, output_dir)
116 |                 if all_step < args.num_seed_steps:
117 |                     # Seed replay buffer with random actions
118 |                     action = env.action_space.sample()
119 |                 else:
120 |                     with util.eval_mode(agent):
121 |                         action = agent.choose_action(state, sample=True)
122 |                 next_state, reward, done, _ = env.step(action)
123 |                 train_reward = max(train_reward, -_["dis"])
124 |                 re_obs = get_re_obs(state)
125 |                 reward1= util.get_matching_reward(state, next_state, dataset_0, agent.get_reward(torch.tensor(re_obs)), g1, args)
126 |                 done_no_lim = done
127 |                 if str(env.__class__.__name__).find('TimeLimit') >= 0 and episode_step + 1 == env._max_episode_steps:
128 |                     done_no_lim = 0
129 |                 online_memory_replay.add((state,next_state, action, re_obs, reward1, done_no_lim))
130 |                 if online_memory_replay.size() > INITIAL_MEMORY:
131 |                     if begin_learn is False:
132 |                         print('Learn begins!')
133 |                         begin_learn = True
134 | 
135 |                     goal_learn_step += 1
136 |                     learn_step += 1
137 |                     agent.update(online_memory_replay, dataset_0, writer, learn_step)
138 |                     if learn_step == LEARN_STEPS:
139 |                         print('Finished!')
140 |                         writer.close()
141 |                         record_data = {"steps":test_step, "rewards": test_reward}
142 |                         print(output_dir)
143 |                         torch.save(record_data,output_dir)
144 |                         return
145 |                 if done:
146 |                     break
147 |                 state = next_state
148 |             if begin_learn:
149 |                 writer.add_scalar('train/reward',train_reward,learn_step)
150 |                 print("\n| train | steps: %2d | episode_reward: %.3f |" %(learn_step,train_reward))
151 |         eval_returns, eval_timesteps = util.evaluate(agent, eval_env, num_episodes=args.eval.eps)
152 |         returns = np.mean(eval_returns)
153 |         writer.add_scalar('eval/episode_reward', returns, learn_step)
154 |         test_step.append(learn_step)
155 |         test_reward.append(returns)
156 |         print("| test | steps: %2d | episode_reward: %.3f |" %(learn_step,returns))
157 |         dataset_0.select_subgoal(agent, args)
158 | 
159 |     writer.close()
160 | if __name__ == "__main__":
161 |     main()


--------------------------------------------------------------------------------
/make_envs.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import highway_env
  3 | from stable_baselines3.common.atari_wrappers import AtariWrapper
  4 | from stable_baselines3.common.monitor import Monitor
  5 | 
  6 | from wrappers.atari_wrapper import ScaledFloatFrame, FrameStack, PyTorchFrame
  7 | from wrappers.normalize_action_wrapper import check_and_normalize_box_actions
  8 | import envs
  9 | import numpy as np
 10 | 
 11 | # Register all custom envs
 12 | envs.register_custom_envs()
 13 | 
 14 | 
 15 | def make_atari(env):
 16 |     env = AtariWrapper(env)
 17 |     env = PyTorchFrame(env)
 18 |     env = FrameStack(env, 4)
 19 |     return env
 20 | 
 21 | 
 22 | def is_atari(env_name):
 23 |     return env_name in ['PongNoFrameskip-v4', 'BreakoutNoFrameskip-v4', 'SpaceInvadersNoFrameskip-v4']
 24 | 
 25 | 
 26 | def is_highway(env_name):
 27 |     return env_name in ['highway-fast-v0']
 28 | 
 29 | def is_merge(env_name):
 30 |     return env_name in ['merge-v0']
 31 | 
 32 | def is_roundabout(env_name):
 33 |     return env_name in ['roundabout-v0','roundabout-v1']
 34 | 
 35 | def is_intersection(env_name):
 36 |     return env_name in ['intersection-v0']
 37 | 
 38 | def is_mujoco(env_name):
 39 |     return env_name in ['antmaze-umaze-v0']
 40 | 
 41 | class HighwayObs(gym.ObservationWrapper):
 42 |     def __init__(self, env):
 43 |         super(HighwayObs, self).__init__(env)
 44 |         shape = self.observation_space.shape
 45 |         self.observation_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(shape[0] * shape[1],), dtype=np.float32)
 46 | 
 47 |     def observation(self, observation):
 48 |         return observation.flatten()
 49 | 
 50 | def make_env(args, monitor=True):
 51 |     print(args.env.name)
 52 |     env = gym.make(args.env.name)
 53 | 
 54 |     if monitor:
 55 |         env = Monitor(env, "gym")
 56 | 
 57 |     if is_atari(args.env.name):
 58 |         env = make_atari(env)
 59 | 
 60 |     if is_highway(args.env.name):
 61 |         env = HighwayObs(env)
 62 |         if args.env.action_type == 'continues':
 63 |             env_config = {
 64 |                 "action": {
 65 |                     "type": "ContinuousAction"
 66 |                 },
 67 |                 "is_record": False,
 68 |                 "total_time": 0.2,
 69 |                 "simulation_frequency": 1,
 70 |                 "duration": 150,
 71 |                 "vehicles_speed": args.env.speed,
 72 |                 "vehicles_density": args.env.density
 73 |             }
 74 |             env.configure(env_config)
 75 |             env.reset()
 76 |     if is_merge(args.env.name):
 77 |         env = HighwayObs(env)
 78 |         if args.env.action_type == 'continues':
 79 |             env_config = {
 80 |                 "action": {
 81 |                     "type": "ContinuousAction"
 82 |                 },
 83 |                 "is_record": False,
 84 |                 "total_time": 0.2,
 85 |                 "simulation_frequency": 1,
 86 |                 "duration": 60
 87 |             }
 88 |             env.configure(env_config)
 89 |             env.reset()
 90 |     if is_roundabout(args.env.name):
 91 |         env = HighwayObs(env)
 92 |         if args.env.action_type == 'continues':
 93 |             env_config = {
 94 |                 "action": {
 95 |                     "type": "ContinuousAction"
 96 |                 },
 97 |                 "is_record": False,
 98 |                 "total_time": 0.2,
 99 |                 "simulation_frequency": 1,
100 |                 "duration": 55
101 |             }
102 |             env.configure(env_config)
103 |             env.reset()
104 |     if is_intersection(args.env.name):
105 |         env = HighwayObs(env)
106 |         if args.env.action_type == 'continues':
107 |             env_config = {
108 |                 "action": {
109 |                     "type": "ContinuousAction"
110 |                 },
111 |                 "is_record": False,
112 |                 "total_time": 0.2,
113 |                 "destination": args.env.destination,
114 |                 "finish_position":None,
115 |                 "simulation_frequency": 1,
116 |                 "duration": 65
117 |             }
118 |             if args.env.destination=="o11":
119 |                 env_config["finish_position"]=[-45.0, -2.0]
120 |             elif args.env.destination=="o21":
121 |                 env_config["finish_position"]=[2.0, -45.0]
122 |             else:
123 |                 env_config["finish_position"]=[45.0, 6.0]
124 |             env.configure(env_config)
125 |             env.reset()
126 |     # Normalize box actions to [-1, 1]
127 |     env = check_and_normalize_box_actions(env)
128 |     return env
129 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/model/__init__.py


--------------------------------------------------------------------------------
/model/agent.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/model/agent.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.5.0
  2 | alabaster==0.7.8
  3 | antlr4-python3-runtime==4.8
  4 | asn1crypto==0.24.0
  5 | atari-py==0.2.6
  6 | atomicwrites==1.2.1
  7 | attrs==18.2.0
  8 | autobahn==17.10.1
  9 | Automat==0.6.0
 10 | Babel==2.4.0
 11 | bleach==2.1.2
 12 | blinker==1.4
 13 | box2d-py==2.3.8
 14 | cachetools==4.2.4
 15 | carla==0.9.13
 16 | cbor==1.0.0
 17 | certifi==2018.8.24
 18 | cffi==1.15.1
 19 | chardet==3.0.4
 20 | charset-normalizer==2.0.12
 21 | click==8.0.4
 22 | cloud-init==22.2
 23 | cloudpickle==1.3.0
 24 | colorama==0.3.7
 25 | command-not-found==0.3
 26 | configobj==5.0.6
 27 | constantly==15.1.0
 28 | cryptography==2.1.4
 29 | cycler==0.10.0
 30 | Cython==0.29.32
 31 | dataclasses==0.8
 32 | decorator==4.1.2
 33 | distro-info===0.18ubuntu0.18.04.1
 34 | docker-pycreds==0.4.0
 35 | docopt==0.6.2
 36 | docutils==0.14
 37 | entrypoints==0.2.3.post1
 38 | enum34==1.1.6
 39 | future==0.16.0
 40 | gitdb==4.0.9
 41 | GitPython==3.1.18
 42 | glfw==2.5.5
 43 | google-auth==1.35.0
 44 | google-auth-oauthlib==0.4.6
 45 | grpcio==1.48.2
 46 | gym==0.17.1
 47 | gym-minigrid==1.0.2
 48 | html5lib==0.999999999
 49 | httplib2==0.9.2
 50 | hydra-core==1.0.6
 51 | hyperlink==17.3.1
 52 | idna==2.7
 53 | imageio==2.4.1
 54 | imagesize==0.7.1
 55 | importlib-metadata==4.8.3
 56 | importlib-resources==5.4.0
 57 | incremental==16.10.1
 58 | ipykernel==4.8.2
 59 | ipython==5.5.0
 60 | ipython_genutils==0.2.0
 61 | ipywidgets==6.0.0
 62 | Jinja2==2.10
 63 | jsonpatch==1.16
 64 | jsonpickle==0.9.6
 65 | jsonpointer==1.10
 66 | jsonschema==2.6.0
 67 | jupyter-client==5.2.2
 68 | jupyter-core==4.4.0
 69 | keras==2.10.0
 70 | keyring==10.6.0
 71 | keyrings.alt==3.0
 72 | kiwisolver==1.0.1
 73 | language-selector==0.1
 74 | lockfile==0.12.2
 75 | lz4==0.10.1
 76 | Markdown==3.3.7
 77 | MarkupSafe==1.0
 78 | matplotlib==3.0.0
 79 | mistune==0.8.3
 80 | mock==2.0.0
 81 | more-itertools==4.3.0
 82 | mpi4py==2.0.0
 83 | mpyq==0.2.5
 84 | mujoco-maze==0.2.0
 85 | mujoco-py==2.0.2.0
 86 | munch==2.3.2
 87 | nbconvert==5.3.1
 88 | nbformat==4.4.0
 89 | netifaces==0.10.4
 90 | notebook==5.2.2
 91 | numpy==1.19.5
 92 | numpydoc==0.7.0
 93 | oauthlib==3.2.2
 94 | olefile==0.45.1
 95 | omegaconf==2.0.6
 96 | opencv-python==4.5.1.48
 97 | PAM==0.4.2
 98 | pandas==1.1.5
 99 | pandocfilters==1.4.2
100 | pathlib2==2.3.2
101 | pathtools==0.1.2
102 | pbr==4.3.0
103 | pexpect==4.2.1
104 | pickleshare==0.7.4
105 | Pillow==5.3.0
106 | pluggy==0.7.1
107 | portpicker==1.2.0
108 | probscale==0.2.3
109 | promise==2.3
110 | prompt-toolkit==1.0.15
111 | protobuf==3.19.6
112 | psutil==5.9.4
113 | py==1.6.0
114 | py-ubjson==0.8.5
115 | pyasn1==0.4.2
116 | pyasn1-modules==0.2.1
117 | pycparser==2.21
118 | pycrypto==2.6.1
119 | pygame==1.9.4
120 | pyglet==1.5.0
121 | Pygments==2.2.0
122 | PyGObject==3.26.1
123 | PyJWT==1.5.3
124 | PyNaCl==1.1.2
125 | pyOpenSSL==17.5.0
126 | pyparsing==2.2.2
127 | PySC2==2.0.2
128 | pyserial==3.4
129 | pytest==3.8.2
130 | python-apt==1.6.5+ubuntu0.6
131 | python-dateutil==2.7.3
132 | python-debian==0.1.32
133 | python-snappy==0.5
134 | PyTrie==0.2
135 | pytz==2018.3
136 | pyxdg==0.25
137 | PyYAML==6.0
138 | pyzmq==16.0.2
139 | qrcode==5.3
140 | requests==2.27.1
141 | requests-oauthlib==1.3.1
142 | requests-unixsocket==0.1.5
143 | roman==2.0.0
144 | rsa==4.9
145 | s2clientprotocol==4.6.1.68195.0
146 | sacred==0.7.2
147 | scipy==1.1.0
148 | SecretStorage==2.3.1
149 | sentry-sdk==1.11.1
150 | service-identity==16.0.0
151 | setproctitle==1.2.3
152 | shortuuid==1.0.11
153 | simplegeneric==0.8.1
154 | six==1.11.0
155 | sk-video==1.1.10
156 | smmap==5.0.0
157 | snakeviz==1.0.0
158 | sos==4.3
159 | Sphinx==1.6.7
160 | ssh-import-id==5.7
161 | stable-baselines3==1.0
162 | systemd-python==234
163 | tensorboard==2.4.0
164 | tensorboard-logger==0.1.0
165 | tensorboard-plugin-wit==1.8.1
166 | tensorboardX==2.1
167 | terminado==0.7
168 | testpath==0.3.1
169 | torch==1.7.1
170 | torchvision==0.8.2
171 | tornado==5.1.1
172 | tqdm==4.42.1
173 | traitlets==4.3.2
174 | Twisted==17.9.0
175 | txaio==2.8.1
176 | typing_extensions==4.1.1
177 | u-msgpack-python==2.1
178 | ubuntu-advantage-tools==27.0
179 | ufw==0.36
180 | unattended-upgrades==0.1
181 | urllib3==1.26.12
182 | wandb==0.13.6
183 | wcwidth==0.1.7
184 | webencodings==0.5
185 | websocket-client==0.53.0
186 | Werkzeug==2.0.3
187 | whichcraft==0.5.2
188 | wrapt==1.10.11
189 | wsaccel==0.6.2
190 | zipp==3.6.0
191 | zope.interface==4.3.2
192 | 


--------------------------------------------------------------------------------
/scripts/highway-fast-continues-v0-s35-d1.sh:
--------------------------------------------------------------------------------
1 | 
2 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=0
3 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=1
4 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=2
5 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=3
6 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=4
7 | python main.py env=highway-fast-continues-v0_s35_d1  expert.tra="expert_data/highway-fast-continues-v0-s35-d1/25.npy" seed=5
8 | 


--------------------------------------------------------------------------------
/scripts/intersection-continues-o1.sh:
--------------------------------------------------------------------------------
 1 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=0
 2 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=1
 3 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=2
 4 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=3
 5 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=4
 6 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/25.npy" seed=5
 7 | 
 8 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=0
 9 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=1
10 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=2
11 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=3
12 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=4
13 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/1.npy" seed=5
14 | 
15 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=0
16 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=1
17 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=2
18 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=3
19 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=4
20 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/5.npy" seed=5
21 | 
22 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=0
23 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=1
24 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=2
25 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=3
26 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=4
27 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/10.npy" seed=5
28 | 
29 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=0
30 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=1
31 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=2
32 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=3
33 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=4
34 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/15.npy" seed=5
35 | 
36 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=0
37 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=1
38 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=2
39 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=3
40 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=4
41 | python main.py env=intersection_continues_v0_o1 expert.tra="expert_data/intersection-continues-v0-o1/20.npy" seed=5
42 | 


--------------------------------------------------------------------------------
/scripts/merge-v0.sh:
--------------------------------------------------------------------------------
1 | 
2 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=0
3 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=1
4 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=2
5 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=3
6 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=4
7 | python main.py env=merge-continues-v0  expert.tra="expert_data/merge-continues-v0/25.npy" seed=5
8 | 


--------------------------------------------------------------------------------
/scripts/roundabout-v1.sh:
--------------------------------------------------------------------------------
 1 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=0
 2 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=1
 3 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=2
 4 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=3
 5 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=4
 6 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/25.npy" seed=5
 7 | 
 8 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=0
 9 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=1
10 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=2
11 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=3
12 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=4
13 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/1.npy" seed=5
14 | 
15 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=0
16 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=1
17 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=2
18 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=3
19 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=4
20 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/5.npy" seed=5
21 | 
22 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=0
23 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=1
24 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=2
25 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=3
26 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=4
27 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/10.npy" seed=5
28 | 
29 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=0
30 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=1
31 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=2
32 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=3
33 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=4
34 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/15.npy" seed=5
35 | 
36 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=0
37 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=1
38 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=2
39 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=3
40 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=4
41 | python main.py env=roundabout_continues_v1 expert.tra="expert_data/roundabout-continues-v1/20.npy" seed=5
42 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/utils/__init__.py


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import glob
 3 | import torch
 4 | from torch import nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | from torchvision.utils import make_grid, save_image
 8 | 
 9 | def write_to_file(dname, dmap, cmap, itr):
10 |     fid = open(dname + '-results.log', 'a+')
11 |     string_to_write = str(itr)
12 |     for item in dmap:
13 |         string_to_write += ' ' + '%.2f' % item
14 |     string_to_write += ' ' + '%.2f' % cmap
15 |     fid.write(string_to_write + '\n')
16 |     fid.close()
17 | 
18 | 
19 | def get_labels(seq_len, n_subgoals):
20 |     # Equi-partition labels
21 |     stops = np.array(range(1, n_subgoals + 1)).astype('float32') / n_subgoals
22 |     labels = np.zeros((seq_len, len(stops)), dtype=float)
23 |     prev_idx = 0
24 |     for i, stop in enumerate(stops):
25 |         idx = int(seq_len * stop)
26 |         labels[prev_idx:idx, i] = 1.
27 |         prev_idx = idx
28 |     return labels
29 | 
30 | 
31 | def dist(a, b):
32 |     return np.sum(np.abs(a - b))
33 | class eval_mode(object):
34 |     def __init__(self, *models):
35 |         self.models = models
36 | 
37 |     def __enter__(self):
38 |         self.prev_states = []
39 |         for model in self.models:
40 |             self.prev_states.append(model.training)
41 |             model.train(False)
42 | 
43 |     def __exit__(self, *args):
44 |         for model, state in zip(self.models, self.prev_states):
45 |             model.train(state)
46 |         return False
47 | 
48 | def get_matching_reward(s, next_s, tra_dataset, reward_w, g1, args):
49 |     _s = np.expand_dims(s,axis=0)
50 |     id, sg = tra_dataset.find_subgoal(_s)
51 |     sg = np.squeeze(sg, axis=0)
52 |     h1 = np.linalg.norm(s[args.env.l_pos:args.env.r_pos+1]-sg[args.env.l_pos:args.env.r_pos+1])
53 |     h2 = np.linalg.norm(next_s[args.env.l_pos:args.env.r_pos+1]-sg[args.env.l_pos:args.env.r_pos+1])
54 |     reward_m = ( h1 - h2 ) * g1
55 |     return reward_m
56 | 
57 | 
58 | def evaluate(actor, env, num_episodes=10, vis=True):
59 |     total_timesteps = []
60 |     total_returns = []
61 | 
62 |     while len(total_returns) < num_episodes:
63 |         state = env.reset()
64 |         done = False
65 |         info={}
66 |         ret = -999.9
67 |         with eval_mode(actor):
68 |             while not done:
69 |                 action = actor.choose_action(state, sample=False)
70 |                 next_state, reward, done, info = env.step(action)
71 |                 state = next_state
72 |                 ret = max(ret, -info['dis'])
73 |         total_returns.append(ret)
74 |     return total_returns, total_timesteps
75 | 


--------------------------------------------------------------------------------
/wrappers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Plankson/CSIRL/e0cd5e18184c616671571ecb6a334a813a850adf/wrappers/__init__.py


--------------------------------------------------------------------------------
/wrappers/atari_wrapper.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import numpy as np
 3 | import torch
 4 | from collections import deque
 5 | from gym import spaces
 6 | 
 7 | 
 8 | class FrameStack(gym.Wrapper):
 9 |     def __init__(self, env, k):
10 |         """Stack k last frames.
11 |         Returns lazy array, which is much more memory efficient.
12 |         Expects inputs to be of shape num_channels x height x width.
13 |         """
14 |         gym.Wrapper.__init__(self, env)
15 |         self.k = k
16 |         self.frames = deque([], maxlen=k)
17 |         shp = env.observation_space.shape
18 |         self.observation_space = spaces.Box(low=0, high=255, shape=(
19 |             shp[0] * k, shp[1], shp[2]), dtype=np.uint8)
20 | 
21 |     def reset(self):
22 |         ob = self.env.reset()
23 |         for _ in range(self.k):
24 |             self.frames.append(ob)
25 |         return self._get_ob()
26 | 
27 |     def step(self, action):
28 |         ob, reward, done, info = self.env.step(action)
29 |         self.frames.append(ob)
30 |         return self._get_ob(), reward, done, info
31 | 
32 |     def _get_ob(self):
33 |         assert len(self.frames) == self.k
34 |         return LazyFrames(list(self.frames))
35 | 
36 | 
37 | class ScaledFloatFrame(gym.ObservationWrapper):
38 |     def __init__(self, env):
39 |         gym.ObservationWrapper.__init__(self, env)
40 |         self.observation_space = gym.spaces.Box(
41 |             low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
42 | 
43 |     def observation(self, observation):
44 |         # careful! This undoes the memory optimization, use
45 |         # with smaller replay buffers only.
46 |         return np.array(observation).astype(np.float32) / 255.0
47 | 
48 | 
49 | class LazyFrames(object):
50 |     def __init__(self, frames):
51 |         """This object ensures that common frames between the observations are only stored once.
52 |         It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
53 |         buffers.
54 |         This object should only be converted to numpy array before being passed to the model."""
55 |         self._frames = frames
56 |         self._out = None
57 | 
58 |     def _force(self):
59 |         if self._out is None:
60 |             self._out = np.concatenate(self._frames, axis=0)
61 |             self._frames = None
62 |         return self._out
63 | 
64 |     def __array__(self, dtype=None):
65 |         out = self._force()
66 |         if dtype is not None:
67 |             out = out.astype(dtype)
68 |         return out
69 | 
70 |     def __len__(self):
71 |         return len(self._force())
72 | 
73 |     def __getitem__(self, i):
74 |         return self._force()[i]
75 | 
76 |     def count(self):
77 |         frames = self._force()
78 |         return frames.shape[frames.ndim - 1]
79 | 
80 |     def frame(self, i):
81 |         return self._force()[..., i]
82 | 
83 | 
84 | class PyTorchFrame(gym.ObservationWrapper):
85 |     """Image shape to num_channels x height x width"""
86 | 
87 |     def __init__(self, env):
88 |         super(PyTorchFrame, self).__init__(env)
89 |         shape = self.observation_space.shape
90 |         self.observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=(
91 |             shape[-1], shape[0], shape[1]), dtype=np.uint8)
92 | 
93 |     def observation(self, observation):
94 |         return np.rollaxis(observation, 2)
95 | 


--------------------------------------------------------------------------------
/wrappers/normalize_action_wrapper.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Wrapper to normalize gym.spaces.Box actions in [-1, 1]."""
17 | 
18 | import gym
19 | from gym import spaces
20 | import numpy as np
21 | 
22 | 
23 | class NormalizeBoxActionWrapper(gym.ActionWrapper):
24 |     """Rescale the action space of the environment."""
25 | 
26 |     def __init__(self, env):
27 |         if not isinstance(env.action_space, spaces.Box):
28 |             raise ValueError('env %s does not use spaces.Box.' % str(env))
29 |         super(NormalizeBoxActionWrapper, self).__init__(env)
30 |         # self._max_episode_steps = env.max_episode_steps
31 | 
32 |     def action(self, action):
33 |         # rescale the action
34 |         low, high = self.env.action_space.low, self.env.action_space.high
35 |         scaled_action = low + (action + 1.0) * (high - low) / 2.0
36 |         scaled_action = np.clip(scaled_action, low, high)
37 | 
38 |         return scaled_action
39 | 
40 |     def reverse_action(self, scaled_action):
41 |         low, high = self.env.action_space.low, self.env.action_space.high
42 |         action = (scaled_action - low) * 2.0 / (high - low) - 1.0
43 |         return action
44 | 
45 | 
46 | def check_and_normalize_box_actions(env):
47 |     """Wrap env to normalize actions if [low, high] != [-1, 1]."""
48 |     if isinstance(env.action_space, spaces.Box):
49 |         low, high = env.action_space.low, env.action_space.high
50 |         if (np.abs(low + np.ones_like(low)).max() > 1e-6 or
51 |                 np.abs(high - np.ones_like(high)).max() > 1e-6):
52 |             print('--> Normalizing environment actions.')
53 |             return NormalizeBoxActionWrapper(env)
54 | 
55 |     # Environment does not need to be normalized.
56 |     return env
57 | 


--------------------------------------------------------------------------------