├── custom_envs
├── demo.py
├── gymsumo.py
├── bsmMerge.py
├── sumo_ConfigTaper
│ ├── __init__.py
│ ├── Ramp_2.net.xml
│ ├── ramp_2.rou.xml
│ ├── ramp_2.sumocfg
│ ├── ramp_2_1.sumocfg
│ ├── ramp_2.sumocfg_copy
│ ├── test_traci.py
│ ├── ramp_2_3.rou.xml
│ ├── ramp_2_1.rou.xml
│ ├── ramp_2.rou.xml_copy
│ ├── ramp_2_2.rou.xml
│ ├── Ramp_2_1.net.xml
│ └── Ramp_2.net.xml_copy
├── MultiMerge.py
├── MultiMergeNoise.py
├── MergeSingleBSMNoise.py
├── MultiMergeBSMNoise.py
├── MultiMergeParallel.py
├── ramp_env.py
├── __pycache__
│ ├── bsmMerge.cpython-38.pyc
│ ├── gymsumo.cpython-38.pyc
│ └── MultiMerge.cpython-38.pyc
├── .ipynb_checkpoints
│ └── Untitled-checkpoint.ipynb
├── sample_cartpole.py
├── readme.md
├── test_sumo_env.py
├── sumo_Config
│ ├── ramp_11.sumocfg
│ ├── ramp_1.sumocfg
│ ├── ramp_1.rou.xml
│ └── Ramp_1.net.xml
├── sumo_ConfigParallelRamp
│ ├── ramp_parallel.sumocfg
│ ├── ramp_parallel.rou.xml
│ └── ramp_parallel.rou.xml_old
├── checkEnv.py
├── sample_env.py
├── trainRamp.py
├── Untitled.ipynb
├── MultiMergeBSMNoise2.py
└── MultiMerge_1.py
├── customFeatureExtractor.py
├── config_file
├── customFeatureExtractor.py
├── __pycache__
│ ├── ppo_18_bsm_noImage.cpython-38.pyc
│ ├── customFeatureExtractor.cpython-38.pyc
│ ├── ppo_final_Image_nobsm_6.cpython-38.pyc
│ ├── ppo_final_bsm_noImage_6.cpython-38.pyc
│ ├── ppo_final_multimodal_6.cpython-38.pyc
│ ├── 00_ppo_final_bsm_noImage.cpython-38.pyc
│ ├── 00_ppo_final_multimodal_6.cpython-38.pyc
│ ├── 10_ppo_final_bsm_noImage.cpython-38.pyc
│ ├── 10_ppo_final_multimodal_6.cpython-38.pyc
│ ├── 25_ppo_final_bsm_noImage.cpython-38.pyc
│ ├── 25_ppo_final_multimodal_6.cpython-38.pyc
│ ├── 50_ppo_final_bsm_noImage.cpython-38.pyc
│ └── 50_ppo_final_multimodal_6.cpython-38.pyc
├── ppo_16_OnlyImagenoise.py
├── ppo_18_Image_nobsm.py
├── ppo_final_Image_nobsm.py
├── ppo_final_Image_nobsm_5.py
├── ppo_final_Image_nobsm_1.py
├── ppo_16_OnlyBSMnoise.py
├── ppo_4.py
├── ppo_1.py
├── ppo_final_Image_nobsm_6.py
├── 00_ppo_final_Image_nobsm_6.py
├── 10_ppo_final_Image_nobsm_6.py
├── 25_ppo_final_Image_nobsm_6.py
├── 50_ppo_final_Image_nobsm_6.py
├── ppo_2.py
├── sac_multi_config.py
├── ppo_3.py
├── ppo_5.py
├── ppo_18_bsm_noImage.py
├── ppo_final_bsm_noImage.py
├── ppo_final_bsm_noImage_1.py
├── ppo_6.py
├── ppo_final_bsm_noImage_5.py
├── ppo_15.py
├── ppo_final_bsm_noImage_2.py
├── ppo_final_bsm_noImage_3.py
├── ppo_final_bsm_noImage_4.py
├── ppo_11.py
├── ppo_13.py
├── ppo_14.py
├── ppo_7.py
├── ppo_8.py
├── ppo_9.py
├── ppo_10.py
├── ppo_12.py
├── ppo_16.py
├── ppo_17.py
├── ppo_17_1.py
├── ppo_17_2.py
├── ppo_17_3.py
├── ppo_18.py
├── ppo_19.py
├── ppo_20.py
├── ppo_final.py
├── ppo_final_bsm_noImage_6.py
├── 00_ppo_final_bsm_noImage.py
├── 10_ppo_final_bsm_noImage.py
├── 25_ppo_final_bsm_noImage.py
├── 50_ppo_final_bsm_noImage.py
├── ppo_final_1.py
├── ppo_final_2.py
├── ppo_final_multimodal_5.py
├── ppo_final_multimodal_6.py
├── 00_ppo_final_multimodal_6.py
├── 10_ppo_final_multimodal_6.py
├── 25_ppo_final_multimodal_6.py
└── 50_ppo_final_multimodal_6.py
├── README.md
├── environment.yml
├── train_Image_noBSM_final.py
├── train_multiModelNoise_final.py
├── train_multiModal_final.py
├── train_multiModalNoise_parallel.py
├── train_bsm_noImage_final.py
├── test_multi_model.py
├── test_parallel_model.py
├── test_baseline_paralllel.py
├── test_bsmNoise_PerfectImage.py
├── train_ImageNoise_noBSM_final.py
├── train_ImageNoise_PerfectBSM.py
└── train_multiModelNoise_final_2.py
/custom_envs/demo.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/custom_envs/gymsumo.py:
--------------------------------------------------------------------------------
1 | gymsumo4.py
--------------------------------------------------------------------------------
/custom_envs/bsmMerge.py:
--------------------------------------------------------------------------------
1 | bsmMerge6.py
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/custom_envs/MultiMerge.py:
--------------------------------------------------------------------------------
1 | MultiMerge_final.py
--------------------------------------------------------------------------------
/customFeatureExtractor.py:
--------------------------------------------------------------------------------
1 | customFeatureExtractor3.py
--------------------------------------------------------------------------------
/custom_envs/MultiMergeNoise.py:
--------------------------------------------------------------------------------
1 | MultiMergeNoise2.py
--------------------------------------------------------------------------------
/custom_envs/MergeSingleBSMNoise.py:
--------------------------------------------------------------------------------
1 | MergeSingleBSMNoise2.py
--------------------------------------------------------------------------------
/custom_envs/MultiMergeBSMNoise.py:
--------------------------------------------------------------------------------
1 | MultiMergeBSMNoise2.py
--------------------------------------------------------------------------------
/custom_envs/MultiMergeParallel.py:
--------------------------------------------------------------------------------
1 | MultiMergeParallel_2.py
--------------------------------------------------------------------------------
/config_file/customFeatureExtractor.py:
--------------------------------------------------------------------------------
1 | customFeatureExtractor3.py
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/Ramp_2.net.xml:
--------------------------------------------------------------------------------
1 | Ramp_2_1.net.xml
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2.rou.xml:
--------------------------------------------------------------------------------
1 | ramp_2_2.rou.xml
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2.sumocfg:
--------------------------------------------------------------------------------
1 | ramp_2_1.sumocfg
--------------------------------------------------------------------------------
/custom_envs/ramp_env.py:
--------------------------------------------------------------------------------
1 | from rampTaperEnv import SumoRampEnv
2 |
3 | from gym.wrappers.rescale_action import RescaleAction
4 |
--------------------------------------------------------------------------------
/custom_envs/__pycache__/bsmMerge.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/bsmMerge.cpython-38.pyc
--------------------------------------------------------------------------------
/custom_envs/__pycache__/gymsumo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/gymsumo.cpython-38.pyc
--------------------------------------------------------------------------------
/custom_envs/__pycache__/MultiMerge.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/MultiMerge.cpython-38.pyc
--------------------------------------------------------------------------------
/custom_envs/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 5
6 | }
7 |
--------------------------------------------------------------------------------
/config_file/__pycache__/ppo_18_bsm_noImage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_18_bsm_noImage.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/customFeatureExtractor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/customFeatureExtractor.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/ppo_final_Image_nobsm_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_Image_nobsm_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/ppo_final_bsm_noImage_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_bsm_noImage_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/ppo_final_multimodal_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_multimodal_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/00_ppo_final_bsm_noImage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/00_ppo_final_bsm_noImage.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/00_ppo_final_multimodal_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/00_ppo_final_multimodal_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/10_ppo_final_bsm_noImage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/10_ppo_final_bsm_noImage.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/10_ppo_final_multimodal_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/10_ppo_final_multimodal_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/25_ppo_final_bsm_noImage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/25_ppo_final_bsm_noImage.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/25_ppo_final_multimodal_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/25_ppo_final_multimodal_6.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/50_ppo_final_bsm_noImage.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/50_ppo_final_bsm_noImage.cpython-38.pyc
--------------------------------------------------------------------------------
/config_file/__pycache__/50_ppo_final_multimodal_6.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/50_ppo_final_multimodal_6.cpython-38.pyc
--------------------------------------------------------------------------------
/custom_envs/sample_cartpole.py:
--------------------------------------------------------------------------------
1 | import gym
2 | env = gym.make('Hopper-v2')
3 |
4 | env.reset()
5 |
6 | for _ in range(100):
7 | env.render()
8 | env.step(env.action_space.sample())
9 | env.close()
10 |
--------------------------------------------------------------------------------
/custom_envs/readme.md:
--------------------------------------------------------------------------------
1 | # Custom Env
2 | Gym contains some predefined envs. In order to define our own env we can use the gym env's interface to define our own custom env.
3 | So it inherits the gym class
4 |
5 |
6 | self._setCmd(tc.VAR_SCREENSHOT, viewID, "tsii", 3, filename, width, height)
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RAMRL-On-Ramp-Merging
2 | PyTorch implementation of: Gaurav Bagwe, Xiaoyong Yuan, Xianhao Chen, Lan Zhang, "RAMRL: Towards Robust On-Ramp Merging via Augmented Multimodal Reinforcement Learning", 2023 IEEE International Conference on Mobility, Operations, Services and Technologies (MOST)
3 |
4 |
--------------------------------------------------------------------------------
/custom_envs/test_sumo_env.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | if 'SUMO_HOME' in os.environ:
3 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
4 | sys.path.append(tools)
5 | else:
6 | sys.exit("please declare environment variable 'SUMO_HOME'")
7 |
8 |
9 | sumoBinary = "/home/gauravb/sumo/bin/sumo-gui"
10 | sumoCmd = [sumoBinary, "-c", "./sumo_ConfigParallelRamp/ramp_parallel.sumocfg"]
11 |
12 | import traci
13 | traci.start(sumoCmd)
14 | step = 0
15 | while step < 1000:
16 | traci.simulationStep()
17 | step += 1
18 |
19 | traci.close()
20 |
21 |
--------------------------------------------------------------------------------
/custom_envs/sumo_Config/ramp_11.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/custom_envs/sumo_Config/ramp_1.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2_1.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2.sumocfg_copy:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigParallelRamp/ramp_parallel.sumocfg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/test_traci.py:
--------------------------------------------------------------------------------
1 | import os,sys
2 |
3 |
4 | if 'SUMO_HOME' in os.environ:
5 | SUMO_HOME = os.environ['SUMO_HOME']
6 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
7 | print('sumo_loaded')
8 | tools = os.path.join(SUMO_HOME, 'tools')
9 | sys.path.append(tools)
10 | print(tools)
11 |
12 | import traci as traci
13 | import sumolib
14 | LIBSUMO = 'LIBSUMO_AS_TRACI' in os.environ
15 |
16 | sumoBinary = os.path.join(SUMO_HOME, "bin/sumo-gui")
17 |
18 |
19 | sumoConfigFile = "./custom_envs/sumo_ConfigParallelRamp/ramp_parallel.sumocfg"
20 | sumo_cmd = [sumoBinary,
21 | '-n', './custom_envs/sumo_ConfigParallelRamp/ramp_parallel.net.xml',
22 | '-r', './sumo_ConfigParallelRamp/ramp_parallel.rou.xml',
23 | '--waiting-time-memory', '10000',
24 | '--time-to-teleport', '-1', '--random']
25 | traci.start(sumo_cmd)
26 |
27 |
--------------------------------------------------------------------------------
/custom_envs/sumo_Config/ramp_1.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/config_file/ppo_16_OnlyImagenoise.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 500,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.001,
24 | 'alphaD': 0.25,
25 | 'rC': -400,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.008
29 | }
30 | sumoParameters = {'maxSpeed':30 ,
31 | 'episodeLength': 600
32 | }
33 |
34 |
35 |
--------------------------------------------------------------------------------
/config_file/ppo_18_Image_nobsm.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 |
18 | }
19 |
20 | weights = {'alphasl0': 0.5,
21 | 'alphasl1': 0.5,
22 | 'rSuccess': 150,
23 | 'alphaO': 0.1,
24 | 'rTimeAlpha': 0.001,
25 | 'alphaD': 0.25,
26 | 'rC': -150,
27 | 'alphaDistance': 0.5,
28 | 'alphaP': 0.5,
29 | 'alphaJ': 0.08
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 600
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/ppo_final_Image_nobsm.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 |
18 | }
19 |
20 | weights = {'alphasl0': 0.5,
21 | 'alphasl1': 0.5,
22 | 'rSuccess': 150,
23 | 'alphaO': 0.1,
24 | 'rTimeAlpha': 0.01,
25 | 'alphaD': 0.25,
26 | 'rC': -250,
27 | 'alphaDistance': 0.5,
28 | 'alphaP': 0.5,
29 | 'alphaJ': 0.08
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 600
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/ppo_final_Image_nobsm_5.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 |
18 | }
19 |
20 | weights = {'alphasl0': 0.5,
21 | 'alphasl1': 0.5,
22 | 'rSuccess': 150,
23 | 'alphaO': 0.1,
24 | 'rTimeAlpha': 0.01,
25 | 'alphaD': 0.25,
26 | 'rC': -150,
27 | 'alphaDistance': 0.5,
28 | 'alphaP': 0.5,
29 | 'alphaJ': 0.08
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 600
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/ppo_final_Image_nobsm_1.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 |
18 | }
19 |
20 | weights = {'alphasl0': 0.5,
21 | 'alphasl1': 0.5,
22 | 'rSuccess': 150,
23 | 'alphaO': 0.1,
24 | 'rTimeAlpha': 0.01,
25 | 'alphaD': 0.25,
26 | 'rC': -150,
27 | 'alphaDistance': 0.5,
28 | 'alphaP': 0.5,
29 | 'alphaJ': 0.08
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 600
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/ppo_16_OnlyBSMnoise.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 |
7 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
8 | )
9 |
10 | action_space = {'high': 2.6,
11 | 'low': -4.5}
12 | image_shape = (200, 768,3)
13 | obsspaces = {
14 | 'velocity': Box(low=0, high=70, shape=(7,)),
15 | 'xPos': Box(low=-100, high=400, shape=(7,)),
16 | 'yPos': Box(low=-100, high=400, shape=(7,)),
17 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
18 |
19 | }
20 |
21 | weights = {'alphasl0': 0.5,
22 | 'alphasl1': 0.5,
23 | 'rSuccess': 500,
24 | 'alphaO': 0.1,
25 | 'rTimeAlpha': 0.001,
26 | 'alphaD': 0.25,
27 | 'rC': -400,
28 | 'alphaDistance': 0.5,
29 | 'alphaP': 0.5,
30 | 'alphaJ': 0.008
31 | }
32 | sumoParameters = {'maxSpeed':30 ,
33 | 'episodeLength': 600
34 | }
35 |
36 |
37 |
--------------------------------------------------------------------------------
/config_file/ppo_4.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 |
4 |
5 |
6 |
7 |
8 |
9 | policy_kwargs = dict(
10 | #features_extractor_class=CustomCombinedExtractor,
11 | features_extractor_kwargs=dict(cnn_output_dim=2046),
12 |
13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
14 | )
15 |
16 | action_space = {'high': 3,
17 | 'low': -4.5}
18 | image_shape = (200, 768,3)
19 | obsspaces = {
20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
21 | 'velocity': Box(low=0, high=70, shape=(7,)),
22 | 'xPos': Box(low=-100, high=400, shape=(7,)),
23 | 'yPos': Box(low=-100, high=400, shape=(7,)),
24 | }
25 |
26 | weights = {'alphasl0': 0.5,
27 | 'alphasl1': 0.5,
28 | 'rSuccess': 250,
29 | 'alphaO': 0.1,
30 | 'rTimeAlpha': 1,
31 | 'alphaD': 0.25,
32 | 'rC': -250,
33 | 'alphaDistance': 0.5,
34 | 'alphaP': 0.5,
35 | 'alphaJ': 0.8
36 | }
37 | sumoParameters = {'maxSpeed':30 ,
38 | 'episodeLength': 600
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/config_file/ppo_1.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 |
4 |
5 |
6 |
7 |
8 |
9 | policy_kwargs = dict(
10 | #features_extractor_class=CustomCombinedExtractor,
11 | features_extractor_kwargs=dict(cnn_output_dim=2046),
12 |
13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
14 | )
15 |
16 | action_space = {'high': 3,
17 | 'low': -4.5}
18 | image_shape = (200, 768,3)
19 | obsspaces = {
20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
21 | 'velocity': Box(low=0, high=70, shape=(7,)),
22 | 'xPos': Box(low=-100, high=400, shape=(7,)),
23 | 'yPos': Box(low=-100, high=400, shape=(7,)),
24 | }
25 |
26 | weights = {'alphasl0': 0.5,
27 | 'alphasl1': 0.5,
28 | 'rSuccess': 250,
29 | 'alphaO': 0.1,
30 | 'rTimeAlpha': 0.05,
31 | 'alphaD': 0.5,
32 | 'rC': -250,
33 | 'alphaDistance': 0.5,
34 | 'alphaP': 0.5,
35 | 'alphaJ': 0.8
36 | }
37 | sumoParameters = {'maxSpeed':30 ,
38 | 'episodeLength': 600
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/config_file/ppo_final_Image_nobsm_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 150,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.01,
24 | 'alphaD': 0.25,
25 | 'rC': -150,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.8,
29 | 'noise_level': 1
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 180
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/00_ppo_final_Image_nobsm_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 150,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.01,
24 | 'alphaD': 0.25,
25 | 'rC': -150,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.8,
29 | 'noise_level': 0.00,
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 180
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/10_ppo_final_Image_nobsm_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 150,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.01,
24 | 'alphaD': 0.25,
25 | 'rC': -150,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.8,
29 | 'noise_level': 0.10
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 180
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/25_ppo_final_Image_nobsm_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 150,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.01,
24 | 'alphaD': 0.25,
25 | 'rC': -150,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.8,
29 | 'noise_level': 0.25,
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 180
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/50_ppo_final_Image_nobsm_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | }
18 |
19 | weights = {'alphasl0': 0.5,
20 | 'alphasl1': 0.5,
21 | 'rSuccess': 150,
22 | 'alphaO': 0.1,
23 | 'rTimeAlpha': 0.01,
24 | 'alphaD': 0.25,
25 | 'rC': -150,
26 | 'alphaDistance': 0.5,
27 | 'alphaP': 0.5,
28 | 'alphaJ': 0.8,
29 | 'noise_level': 0.50,
30 | }
31 | sumoParameters = {'maxSpeed':30 ,
32 | 'episodeLength': 180
33 | }
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config_file/ppo_2.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 |
4 |
5 |
6 |
7 |
8 |
9 | policy_kwargs = dict(
10 | #features_extractor_class=CustomCombinedExtractor,
11 | features_extractor_kwargs=dict(cnn_output_dim=2046),
12 |
13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
14 | )
15 |
16 | action_space = {'high': 3,
17 | 'low': -4.5}
18 | image_shape = (200, 768,3)
19 | obsspaces = {
20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
21 | 'velocity': Box(low=0, high=70, shape=(7,)),
22 | 'xPos': Box(low=-100, high=400, shape=(7,)),
23 | 'yPos': Box(low=-100, high=400, shape=(7,)),
24 | }
25 |
26 | weights = {'alphasl0': 0.05,
27 | 'alphasl1': 0.05,
28 | 'rSuccess': 250,
29 | 'alphaO': 0.1,
30 | 'rTimeAlpha': 0.05,
31 | 'alphaD': 0.05,
32 | 'rC': -250,
33 | 'alphaDistance': 0.3,
34 | 'alphaP': 0.25,
35 | 'alphaJ': 0.3
36 | }
37 | sumoParameters = {'maxSpeed':30 ,
38 | 'episodeLength': 600
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/config_file/sac_multi_config.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 |
4 |
5 |
6 |
7 |
8 |
9 | policy_kwargs = dict(
10 | #features_extractor_class=CustomCombinedExtractor,
11 | features_extractor_kwargs=dict(cnn_output_dim=2046),
12 |
13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
14 | )
15 |
16 | action_space = {'high': 3,
17 | 'low': -4.5}
18 | image_shape = (200, 768,3)
19 | obsspaces = {
20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
21 | 'velocity': Box(low=0, high=70, shape=(7,)),
22 | 'xPos': Box(low=-100, high=400, shape=(7,)),
23 | 'yPos': Box(low=-100, high=400, shape=(7,)),
24 | }
25 |
26 | weights = {'alphasl0': 0.5,
27 | 'alphasl1': 0.5,
28 | 'rSuccess': 250,
29 | 'alphaO': 0.1,
30 | 'rTimeAlpha': 0.05,
31 | 'alphaD': 0.5,
32 | 'rC': -250,
33 | 'alphaDistance': 0.5,
34 | 'alphaP': 0.5,
35 | 'alphaJ': 0.8
36 | }
37 | sumoParameters = {'maxSpeed':30 ,
38 | 'episodeLength': 600
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/config_file/ppo_3.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | import sys
4 |
5 |
6 |
7 |
8 |
9 | policy_kwargs = dict(
10 | #features_extractor_class=CustomCombinedExtractor,
11 | features_extractor_kwargs=dict(cnn_output_dim=2046),
12 |
13 | net_arch=[1024,512, dict(vf=[512, 128, 64,8], pi=[512, 128,64, 8])],
14 | )
15 |
16 | action_space = {'high': 3,
17 | 'low': -4.5}
18 | image_shape = (200, 768,3)
19 | obsspaces = {
20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
21 | 'velocity': Box(low=0, high=70, shape=(7,)),
22 | 'xPos': Box(low=-100, high=400, shape=(7,)),
23 | 'yPos': Box(low=-100, high=400, shape=(7,)),
24 | }
25 |
26 | weights = {'alphasl0': 0.05,
27 | 'alphasl1': 0.05,
28 | 'rSuccess': 250,
29 | 'alphaO': 0.1,
30 | 'rTimeAlpha': 0.05,
31 | 'alphaD': 0.05,
32 | 'rC': -250,
33 | 'alphaDistance': 0.3,
34 | 'alphaP': 0.25,
35 | 'alphaJ': 0.3
36 | }
37 | sumoParameters = {'maxSpeed':30 ,
38 | 'episodeLength': 600
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/custom_envs/checkEnv.py:
--------------------------------------------------------------------------------
1 | from ramp_env3 import SumoRampEnv
2 | import os
3 | from gym.wrappers.rescale_action import RescaleAction
4 | from gym.wrappers.resize_observation import
5 | # simpath = "/home/gauravb/Documents/MichiganTech/Programming/CustomRampTraining/custom_envs/sumo_Config/ramp_1.sumocfg"
6 | # simpath = os.getcwd()+"/custom_envs/sumo_Config/ramp_1.sumocfg"
7 | from gym.spaces import Box
8 | env = SumoRampEnv()
9 | min_action = -1
10 | max_action = +1
11 | print('before \n ', env.action_space.high,'high', env.action_space.low ,'low')
12 | env = RescaleAction(env, min_action, max_action)
13 | for i in range(0,100):
14 | print(env.action_space.sample())
15 |
16 | print('after \n ', env.action_space.high,'high', env.action_space.low ,'low')
17 |
18 | from gym.utils.env_checker import check_env
19 |
20 | check_env(env)
21 | #
22 | # for episode in range(1,10):
23 | # env.reset()
24 | # done= False
25 | # score = 0
26 | # while not done:
27 | # env.render()
28 | # action = +30 #env.action_space.sample()
29 | # state_, reward, done, info = env.step(action)
30 | #
31 | # score += reward
32 | # print(f'Episode {episode} score {score} ')
33 |
--------------------------------------------------------------------------------
/config_file/ppo_5.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | }
21 |
22 | weights = {'alphasl0': 0.5,
23 | 'alphasl1': 0.5,
24 | 'rSuccess': 100,
25 | 'alphaO': 0.1,
26 | 'rTimeAlpha': 5,
27 | 'alphaD': 0.25,
28 | 'rC': -250,
29 | 'alphaDistance': 0.5,
30 | 'alphaP': 0.5,
31 | 'alphaJ': 0.8
32 | }
33 | sumoParameters = {'maxSpeed':30 ,
34 | 'episodeLength': 600
35 | }
36 |
37 |
38 |
--------------------------------------------------------------------------------
/config_file/ppo_18_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.001,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.01,
28 | 'alphaD': 0.25,
29 | 'rC': -250,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_1.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.01,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 100,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 10,
28 | 'alphaD': 0.25,
29 | 'rC': -250,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.8
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_5.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.01,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_15.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 500,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 1,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 1
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_2.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.01,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_3.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.00001,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_4.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.001,
28 | 'alphaD': 0.25,
29 | 'rC': -300,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.08
33 | }
34 | sumoParameters = {'maxSpeed':30 ,
35 | 'episodeLength': 600
36 | }
37 |
38 |
39 |
--------------------------------------------------------------------------------
/config_file/ppo_11.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 300,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_13.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 500,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_14.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 500,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 2.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 1
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_7.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.1,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_8.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_9.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_10.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.0001,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_12.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | #features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 500,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_16.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 500,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -400,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.008
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_17.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -100,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_17_1.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 1,
29 | 'alphaD': 0.25,
30 | 'rC': -100,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_17_2.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.1,
29 | 'alphaD': 0.25,
30 | 'rC': -100,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_17_3.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -100,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_18.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_19.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 200,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -200,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_20.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 100,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.001,
29 | 'alphaD': 0.25,
30 | 'rC': -100,
31 | 'alphaDistance': 2,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.008
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_final.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 600
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/ppo_final_bsm_noImage_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
21 | }
22 |
23 | weights = {'alphasl0': 0.5,
24 | 'alphasl1': 0.5,
25 | 'rSuccess': 150,
26 | 'alphaO': 0.1,
27 | 'rTimeAlpha': 0.01,
28 | 'alphaD': 0.25,
29 | 'rC': -150,
30 | 'alphaDistance': 0.5,
31 | 'alphaP': 0.5,
32 | 'alphaJ': 0.8,
33 | 'noise_level': 1
34 | }
35 | sumoParameters = {'maxSpeed':30 ,
36 | 'episodeLength': 180
37 | }
38 |
39 |
40 |
--------------------------------------------------------------------------------
/config_file/00_ppo_final_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08,
34 | 'noise_level': 0.0,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/10_ppo_final_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08,
34 | 'noise_level': 0.10,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/25_ppo_final_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08,
34 | 'noise_level': 0.25,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/50_ppo_final_bsm_noImage.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 3,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'velocity': Box(low=0, high=70, shape=(7,)),
17 | 'xPos': Box(low=-100, high=400, shape=(7,)),
18 | 'yPos': Box(low=-100, high=400, shape=(7,)),
19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
20 |
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -250,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08,
34 | 'noise_level': 0.50,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/ppo_final_1.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level' :1
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/ppo_final_2.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 |
22 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
23 | }
24 |
25 | weights = {'alphasl0': 0.5,
26 | 'alphasl1': 0.5,
27 | 'rSuccess': 150,
28 | 'alphaO': 0.1,
29 | 'rTimeAlpha': 0.01,
30 | 'alphaD': 0.25,
31 | 'rC': -150,
32 | 'alphaDistance': 0.5,
33 | 'alphaP': 0.5,
34 | 'alphaJ': 0.08
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/ppo_final_multimodal_5.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.08,
34 | 'noise_level': 0
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 600
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/ppo_final_multimodal_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level': 1
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/00_ppo_final_multimodal_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level': 0.00,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/10_ppo_final_multimodal_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level': 0.010
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/25_ppo_final_multimodal_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level': 0.25,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/config_file/50_ppo_final_multimodal_6.py:
--------------------------------------------------------------------------------
1 | from gym.spaces import Box
2 | import numpy as np
3 | from config_file.customFeatureExtractor import CustomCombinedExtractor
4 | #from customFeatureExtractor import CustomCombinedExtractor
5 | policy_kwargs = dict(
6 | features_extractor_class=CustomCombinedExtractor,
7 | features_extractor_kwargs=dict(cnn_output_dim=2046),
8 |
9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])],
10 | )
11 |
12 | action_space = {'high': 2.6,
13 | 'low': -4.5}
14 | image_shape = (200, 768,3)
15 | obsspaces = {
16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
17 | 'velocity': Box(low=0, high=70, shape=(7,)),
18 | 'xPos': Box(low=-100, high=400, shape=(7,)),
19 | 'yPos': Box(low=-100, high=400, shape=(7,)),
20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)),
21 | 'latSpeed': Box(low=0, high=70, shape=(7,)),
22 | }
23 |
24 | weights = {'alphasl0': 0.5,
25 | 'alphasl1': 0.5,
26 | 'rSuccess': 150,
27 | 'alphaO': 0.1,
28 | 'rTimeAlpha': 0.01,
29 | 'alphaD': 0.25,
30 | 'rC': -150,
31 | 'alphaDistance': 0.5,
32 | 'alphaP': 0.5,
33 | 'alphaJ': 0.8,
34 | 'noise_level': 0.50,
35 | }
36 | sumoParameters = {'maxSpeed':30 ,
37 | 'episodeLength': 180
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: SB3_training
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - _openmp_mutex=4.5=1_gnu
7 | - ca-certificates=2022.3.29=h06a4308_0
8 | - certifi=2021.10.8=py38h06a4308_2
9 | - ld_impl_linux-64=2.35.1=h7274673_9
10 | - libffi=3.3=he6710b0_2
11 | - libgcc-ng=9.3.0=h5101ec6_17
12 | - libgomp=9.3.0=h5101ec6_17
13 | - libstdcxx-ng=9.3.0=hd4cf53a_17
14 | - ncurses=6.3=h7f8727e_2
15 | - openssl=1.1.1n=h7f8727e_0
16 | - pip=21.2.4=py38h06a4308_0
17 | - python=3.8.13=h12debd9_0
18 | - readline=8.1.2=h7f8727e_1
19 | - setuptools=58.0.4=py38h06a4308_0
20 | - sqlite=3.38.2=hc218d9a_0
21 | - tk=8.6.11=h1ccaba5_0
22 | - wheel=0.37.1=pyhd3eb1b0_0
23 | - xz=5.2.5=h7b6447c_0
24 | - zlib=1.2.11=h7f8727e_4
25 | - pip:
26 | - absl-py==1.0.0
27 | - ale-py==0.7.4
28 | - autorom==0.4.2
29 | - autorom-accept-rom-license==0.4.2
30 | - cachetools==5.0.0
31 | - charset-normalizer==2.0.12
32 | - click==8.1.2
33 | - cloudpickle==2.0.0
34 | - cycler==0.11.0
35 | - docker-pycreds==0.4.0
36 | - fonttools==4.31.2
37 | - gitdb==4.0.9
38 | - gitpython==3.1.27
39 | - google-auth==2.6.2
40 | - google-auth-oauthlib==0.4.6
41 | - grpcio==1.44.0
42 | - gym==0.21.0
43 | - idna==3.3
44 | - importlib-metadata==4.11.3
45 | - importlib-resources==5.6.0
46 | - kiwisolver==1.4.2
47 | - markdown==3.3.6
48 | - matplotlib==3.5.1
49 | - numpy==1.22.3
50 | - oauthlib==3.2.0
51 | - opencv-python==4.5.5.64
52 | - packaging==21.3
53 | - pandas==1.4.2
54 | - pathtools==0.1.2
55 | - pettingzoo==1.17.0
56 | - pillow==9.1.0
57 | - promise==2.3
58 | - protobuf==3.20.0
59 | - psutil==5.9.0
60 | - pyasn1==0.4.8
61 | - pyasn1-modules==0.2.8
62 | - pyparsing==3.0.7
63 | - python-dateutil==2.8.2
64 | - pytz==2022.1
65 | - pyvirtualdisplay==3.0
66 | - pyyaml==6.0
67 | - requests==2.27.1
68 | - requests-oauthlib==1.3.1
69 | - rsa==4.8
70 | - sentry-sdk==1.5.9
71 | - setproctitle==1.2.2
72 | - shortuuid==1.0.8
73 | - six==1.16.0
74 | - smmap==5.0.0
75 | - stable-baselines3==1.5.0
76 | - tensorboard==2.8.0
77 | - tensorboard-data-server==0.6.1
78 | - tensorboard-plugin-wit==1.8.1
79 | - torch==1.11.0
80 | - tqdm==4.64.0
81 | - typing-extensions==4.1.1
82 | - urllib3==1.26.9
83 | - wandb==0.12.15
84 | - werkzeug==2.1.1
85 | - zipp==3.8.0
86 |
--------------------------------------------------------------------------------
/custom_envs/sample_env.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import random
3 | from gym.spaces import Discrete, Box
4 | import numpy as np
5 | from gym import Env
6 | gym.logger.set_level(40)
7 | class customEnv(Env):
8 | ''' four key functions
9 | init
10 | step
11 | random
12 | reset
13 | '''
14 | # env for shower temprature https://www.youtube.com/watch?v=bD6V3rcr_54
15 | def __init__(self):
16 | # action space for example down , stay , up
17 | self.actionSpace = Discrete(3)
18 | # oversation space allows us to have continous values over the ranfe of layers
19 | self.observationSpace = Box(low= np.array([0]),high=np.array([100]), )
20 |
21 | #state is the information from the env
22 |
23 | self.state = 38 - random.randint(-3,3)
24 | self.episodeLength = 60 # secs
25 |
26 | def step(self, action):
27 | # how we rake the action
28 |
29 | # apply action
30 | # here the actions are 0,1 ,2 to reduce the temp, stay the temp and increase it
31 | # if 0 : state = state + 0 -1 to reduce the state by 1
32 | # if 1 : state = state + 1-1 to keep the same state
33 | # if 2 : state = state + 2-1 to increaset the temp by 1
34 |
35 | self.state += action -1
36 |
37 | # reduce the episode length
38 | self.episodeLength -=1 #sec
39 |
40 | # calc reward
41 | # the aim is that the temp remains between 17 and 39 so we give a reward if it is in this temp
42 | # else we give a negative reward
43 |
44 | if self.state >= 17 and self.state<= 39 :
45 | reward = 1
46 | else:
47 | reward = -1
48 |
49 | # check if shower is done
50 | if self.episodeLength == 0: done = True
51 | else: done = False
52 |
53 | # some random noise to the state
54 | self.state += random.randint(-1,1)
55 |
56 | info ={}
57 |
58 | # how open ai requires
59 | return self.state, reward, done, info
60 |
61 | def render(self):
62 | # if to visualize
63 | pass
64 | def reset(self):
65 | self.state = 38 - random.randint(-3,3)
66 | self.episodeLength = 60
67 | return self.state
68 | env = customEnv()
69 |
70 | for episode in range(1,100):
71 | env.reset()
72 | done= False
73 | score = 0
74 | while not done:
75 | env.render()
76 | action = env.actionSpace.sample()
77 | state_, reward, done, info = env.step(action)
78 |
79 | score += reward
80 | print(f'Episode {episode} score {score} ')
81 |
--------------------------------------------------------------------------------
/custom_envs/trainRamp.py:
--------------------------------------------------------------------------------
1 | import ray
2 | from ramp_env3 import SumoRampEnv
3 | from ray.rllib.agents.ppo import PPOTrainer
4 |
5 |
6 | import ray
7 | import ray.rllib.agents.ppo as ppo
8 | from ray.tune.logger import pretty_print
9 |
10 |
11 | ''''
12 | https://docs.ray.io/en/latest/rllib/rllib-models.html
13 |
14 | the vision network case, you’ll probably have to configure conv_filters, if your environment observations have custom
15 | sizes. For example, "model": {"dim": 42, "conv_filters": [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1]]} for
16 | 42x42 observations. Thereby, always make sure that the last Conv2D output has an output shape of [B, 1, 1, X]
17 | ([B, X, 1, 1] for PyTorch), where B=batch and X=last Conv2D layer’s number of filters, so that RLlib can flatten it.
18 | An informative error will be thrown if this is not the case.
19 | '''
20 | config = ppo.DEFAULT_CONFIG.copy()
21 |
22 | config = {
23 | # this is a dict
24 | # "env": SumoRampEnv,
25 | "num_workers": 1,
26 | # "framework" : "tf2",
27 | "num_gpus": 1,
28 | "model": {
29 | "dim": 512,
30 | "conv_filters": [ # [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1], [1000, 1, 512]],#, [1000,512, 1]],
31 | [96, 11, 4], # 126
32 | [256, 5, 2], # 61
33 | [384, 3, 2], # 30
34 | [384, 3, 2], # 14
35 | [256, 3, 2], # 6
36 | [256, 3, 2], # 2
37 | [256, 1, 128],
38 |
39 | ], # lenet
40 | "post_fcnet_hiddens": [256, 256],
41 | # "post_fcnet_activation": "relu",
42 | # "fcnet_hiddens" : [10, 10 ],
43 | # "fcnet_activation" : "relu",
44 |
45 | },
46 | "evaluation_num_workers": 1,
47 | # Only for evaluation runs, render the env.
48 | "evaluation_config": {
49 | "render_env": True,
50 | }
51 |
52 | }
53 | #
54 | # from ray import tune
55 | #
56 | # def tune_func(config):
57 | # tune.util.wait_for_gpu()
58 | # train()
59 | #
60 | # tune.run(PPOTrainer, config=config, verbose=3,
61 | # # resources_per_trial={"cpu": 12, "gpu": 1} ,
62 | # reuse_actors=True,
63 | # stop={"training_iteration": 10e3})
64 |
65 |
66 |
67 |
68 |
69 | ray.init()
70 |
71 |
72 | trainer = ppo.PPOTrainer(config=config, env=SumoRampEnv)
73 |
74 | # Can optionally call trainer.restore(path) to load a checkpoint.
75 |
76 | for i in range(1000):
77 | # Perform one iteration of training the policy with PPO
78 | result = trainer.train()
79 | print(pretty_print(result))
80 |
81 | if i == 0:
82 | checkpoint = trainer.save()
83 | print("checkpoint saved at", checkpoint)
84 | ray.shutdown()
85 |
86 |
--------------------------------------------------------------------------------
/custom_envs/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 4,
6 | "id": "61547c8f",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import numpy as np\n",
11 | "import gym\n",
12 | "import random"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 5,
18 | "id": "e2a97f0d",
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "image = np.random.randint(0,255, size=(512,512,3))"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 13,
28 | "id": "2fbaa74d",
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "state_velocity = np.random.randint(-10,10, size = (7))\n",
33 | "state_pos_x = np.random.randint(-10,10, size = (7))\n",
34 | "state_pos_y = np.random.randint(-10,10, size = (7))"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 15,
40 | "id": "cf4dcf9c",
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "ename": "ValueError",
45 | "evalue": "operands could not be broadcast together with shapes (512,512,3) (7,) ",
46 | "output_type": "error",
47 | "traceback": [
48 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
49 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
50 | "\u001b[0;32m/var/folders/ph/y0swjc297dx47xlplt3w27xr0000gn/T/ipykernel_3030/3150006869.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mimage\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_pos_x\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_pos_y\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_velociy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
51 | "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (512,512,3) (7,) "
52 | ]
53 | }
54 | ],
55 | "source": [
56 | "image + state_pos_x + state_pos_y + state_velociy"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "id": "5f351282",
63 | "metadata": {},
64 | "outputs": [],
65 | "source": []
66 | }
67 | ],
68 | "metadata": {
69 | "kernelspec": {
70 | "display_name": "Python [conda env:base-flow]",
71 | "language": "python",
72 | "name": "conda-env-base-flow-py"
73 | },
74 | "language_info": {
75 | "codemirror_mode": {
76 | "name": "ipython",
77 | "version": 3
78 | },
79 | "file_extension": ".py",
80 | "mimetype": "text/x-python",
81 | "name": "python",
82 | "nbconvert_exporter": "python",
83 | "pygments_lexer": "ipython3",
84 | "version": "3.9.7"
85 | }
86 | },
87 | "nbformat": 4,
88 | "nbformat_minor": 5
89 | }
90 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigParallelRamp/ramp_parallel.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/custom_envs/MultiMergeBSMNoise2.py:
--------------------------------------------------------------------------------
1 | import os
2 | from abc import ABC
3 |
4 | import gym
5 | from custom_envs.gymsumo import SumoRamp
6 | import traci
7 | import numpy as np
8 | from custom_envs.bsmMerge import BsmMerge, BsmMergeAllRewards
9 | from typing import Callable, Optional, Tuple, Union
10 | from scipy.ndimage.filters import gaussian_filter
11 |
12 |
13 |
14 |
15 | class MultiMerge(BsmMerge):
16 |
17 | def getObservations(self):
18 | # returns observations of the state
19 |
20 | state_speed = np.ones(7) * self.maxSpeed
21 | state_position_x = np.ones(7)
22 | state_position_y = np.ones(7)
23 | state_acc = np.zeros(7)
24 |
25 |
26 | vehicle_ids = self.getVehicleIds()
27 | state_image = np.array(self.render())
28 | # state_image = gaussian_filter(state_image , sigma=1)
29 |
30 | if vehicle_ids:
31 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids)
32 |
33 | for i, vehicle in enumerate(obsLane0):
34 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0])
35 | if vehicle:
36 |
37 | if vehicle[0] not in ["no_vehicle","", None]:
38 |
39 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
40 | state_acc[i] = traci.vehicle.getAcceleration(vehicle[0])
41 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
42 |
43 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
44 | for i, vehicle in enumerate(obsLane1, len(obsLane0)):
45 | if vehicle:
46 | if vehicle not in ["no_vehicle","", None]:
47 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
48 | state_acc[i] = traci.vehicle.getAcceleration(vehicle[0])
49 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
50 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
51 |
52 | # rl state information
53 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id)
54 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0]
55 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1]
56 | state_acc[-1] = traci.vehicle.getAcceleration(self.rl_car_id)
57 |
58 | #state_speed = np.clip(state_speed, 0, self.maxSpeed)
59 | #state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low),
60 | # abs(self.observation_space['xPos'].high))
61 | #state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low),
62 | # abs(self.observation_space['yPos'].high))
63 | sigmavalue = 1
64 | state_speed =gaussian_filter(state_speed , sigma= sigmavalue )
65 | state_position_x = gaussian_filter(state_position_x , sigma= sigmavalue )
66 | state_position_y = gaussian_filter( state_position_y, sigma= sigmavalue)
67 | state_acc = gaussian_filter(state_acc , sigma = sigmavalue)
68 |
69 |
70 | state = {
71 | 'image': state_image.astype(np.uint8),
72 | 'xPos': np.array(state_position_x, dtype=np.float32),
73 | 'yPos': np.array(state_position_y, dtype=np.float32),
74 | 'velocity': np.array(state_speed, dtype=np.float32),
75 | 'acceleration' : np.array(state_acc, dtype= np.float32)
76 | }
77 |
78 | return state
79 |
80 |
81 |
--------------------------------------------------------------------------------
/train_Image_noBSM_final.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | from gym.spaces import Box
11 | from custom_envs.MultiMerge import Image_No_BSM as MultiMerge
12 |
13 | import os
14 | import wandb, glob
15 | from wandb.integration.sb3 import WandbCallback
16 | from stable_baselines3.common.monitor import Monitor
17 | import argparse
18 |
19 | parser = argparse.ArgumentParser(description='train PPO multi model')
20 | parser.add_argument("config", help="Config file")
21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
22 |
23 | args = parser.parse_args()
24 | module = __import__("config_file",fromlist= [args.config])
25 | exp_config = getattr(module, args.config)
26 |
27 | timesteps = 500000
28 |
29 | config = {
30 | "policy_type": "MultiInputPolicy",
31 | "total_timesteps": timesteps,
32 | "env_name": "SumoRamp()",
33 | }
34 | pdir = os.path.abspath('../')
35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
36 |
37 | policy_kwargs = exp_config.policy_kwargs
38 |
39 | action_space = exp_config.action_space
40 |
41 | image_shape = exp_config.image_shape
42 | obsspaces = exp_config.obsspaces
43 |
44 | weights = exp_config.weights
45 | sumoParameters = exp_config.sumoParameters
46 |
47 | min_action = -1
48 | max_action = +1
49 |
50 | video_folder = dir + '/logs/videos/'
51 | video_length = 600
52 |
53 | def make_env(env_id, rank, seed=0, monitor_dir = None):
54 | """
55 | Utility function for multiprocessed env.
56 |
57 | :param env_id: (str) the environment ID
58 | :param num_env: (int) the number of environments you wish to have in subprocesses
59 | :param seed: (int) the inital seed for RNG
60 | :param rank: (int) index of the subprocess
61 | """
62 |
63 | def _init():
64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
65 | isBaseline=False,render=0)
66 | env.seed(seed + rank)
67 | env = RescaleAction(env, min_action, max_action)
68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
69 | if monitor_path is not None:
70 | os.makedirs(monitor_dir, exist_ok=True)
71 | return env
72 | set_random_seed(seed)
73 | return _init
74 |
75 |
76 | if __name__ == '__main__':
77 | run = wandb.init(
78 | project="RMMRL-Training",
79 | name=f"Image+NoBSM_{args.config}",
80 | dir=dir,
81 | config=config,
82 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
83 | monitor_gym=True, # auto-upload the videos of agents playing the game
84 | save_code=True, # optional
85 | magic=True
86 | )
87 |
88 | env_id = "MultiMerge"
89 | num_cpu =16# Number of processes to use
90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
93 | env = VecMonitor(venv=env)
94 | model = PPO(config["policy_type"],
95 | env,
96 | verbose=3,
97 | policy_kwargs=policy_kwargs,
98 | gamma=0.99,
99 | n_steps=512,
100 | learning_rate=0.0003,
101 | vf_coef=0.042202,
102 | max_grad_norm=0.9,
103 | gae_lambda=0.95,
104 | n_epochs=10,
105 | clip_range=0.2,
106 | batch_size=256,
107 | tensorboard_log=f"{dir}"
108 | )
109 |
110 | model.learn(
111 | total_timesteps=int(config["total_timesteps"]),
112 | callback=WandbCallback(
113 | gradient_save_freq=5,
114 | model_save_freq=5000,
115 | model_save_path=f"{dir}/models/{run.id}",
116 | verbose=2,
117 | ), )
118 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
119 | env.save(stats_path)
120 |
121 |
--------------------------------------------------------------------------------
/train_multiModelNoise_final.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | from gym.spaces import Box
11 | from custom_envs.MultiMerge import BSM_Noise_Image_Noise as MultiMerge
12 |
13 | import os
14 | import wandb, glob
15 | from wandb.integration.sb3 import WandbCallback
16 | from stable_baselines3.common.monitor import Monitor
17 | import argparse
18 |
19 | parser = argparse.ArgumentParser(description='train PPO multi model')
20 | parser.add_argument("config", help="Config file")
21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
22 |
23 | args = parser.parse_args()
24 | module = __import__("config_file",fromlist= [args.config])
25 | exp_config = getattr(module, args.config)
26 |
27 | timesteps = 500000
28 |
29 | config = {
30 | "policy_type": "MultiInputPolicy",
31 | "total_timesteps": timesteps,
32 | "env_name": "SumoRamp()",
33 | }
34 | pdir = os.path.abspath('../')
35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
36 |
37 | policy_kwargs = exp_config.policy_kwargs
38 |
39 | action_space = exp_config.action_space
40 |
41 | image_shape = exp_config.image_shape
42 | obsspaces = exp_config.obsspaces
43 |
44 | weights = exp_config.weights
45 | sumoParameters = exp_config.sumoParameters
46 |
47 | min_action = -1
48 | max_action = +1
49 |
50 | video_folder = dir + '/logs/videos/'
51 | video_length = 600
52 |
53 | def make_env(env_id, rank, seed=0, monitor_dir = None):
54 | """
55 | Utility function for multiprocessed env.
56 |
57 | :param env_id: (str) the environment ID
58 | :param num_env: (int) the number of environments you wish to have in subprocesses
59 | :param seed: (int) the inital seed for RNG
60 | :param rank: (int) index of the subprocess
61 | """
62 |
63 | def _init():
64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
65 | isBaseline=False,render=0)
66 | env.seed(seed + rank)
67 | env = RescaleAction(env, min_action, max_action)
68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
69 | if monitor_path is not None:
70 | os.makedirs(monitor_dir, exist_ok=True)
71 | return env
72 | set_random_seed(seed)
73 | return _init
74 |
75 | if __name__ == '__main__':
76 | run = wandb.init(
77 | project="RMMRL-Training",
78 | name=f"MultiModal_noise",
79 | dir=dir,
80 | config=config,
81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
82 | monitor_gym=True, # auto-upload the videos of agents playing the game
83 | save_code=True, # optional
84 | magic=True
85 | )
86 |
87 | env_id = "MultiMerge"
88 | num_cpu = 16# Number of processes to use
89 | # Create the vectorized environment
90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
93 |
94 | env = VecMonitor(venv=env)
95 | model = PPO(
96 | config["policy_type"],
97 | env,
98 | verbose=3,
99 | policy_kwargs=policy_kwargs,
100 | gamma=0.99,
101 | n_steps=512,
102 | learning_rate=0.0003,
103 | vf_coef=0.042202,
104 | max_grad_norm=0.9,
105 | gae_lambda=0.95,
106 | n_epochs=10,
107 | clip_range=0.2,
108 | batch_size=256,
109 | tensorboard_log=f"{dir}",
110 | )
111 |
112 | model.learn(
113 | total_timesteps=int(config["total_timesteps"]),
114 | callback=WandbCallback(
115 | gradient_save_freq=5,
116 | model_save_freq=5000,
117 | model_save_path=f"{dir}/models/{run.id}",
118 | verbose=2,
119 | ), )
120 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
121 | env.save(stats_path)
122 |
123 |
--------------------------------------------------------------------------------
/train_multiModal_final.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | from gym.spaces import Box
11 | from custom_envs.MultiMerge import MultiMergeAllRewards as MultiMerge
12 | import os
13 | import wandb, glob
14 | from wandb.integration.sb3 import WandbCallback
15 | from stable_baselines3.common.monitor import Monitor
16 | import argparse
17 |
18 | parser = argparse.ArgumentParser(description='train PPO multi model')
19 | parser.add_argument("config", help="Config file")
20 | parser.add_argument("--render", default=0 , help = "should render")
21 |
22 | args = parser.parse_args()
23 | module = __import__("config_file",fromlist= [args.config])
24 | exp_config = getattr(module, args.config)
25 |
26 | timesteps = 500000
27 |
28 | config = {
29 | "policy_type": "MultiInputPolicy",
30 | "total_timesteps": timesteps,
31 | "env_name": "SumoRamp()",
32 | }
33 | pdir = os.path.abspath('../')
34 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
35 |
36 | policy_kwargs = exp_config.policy_kwargs
37 |
38 | action_space = exp_config.action_space
39 |
40 | image_shape = exp_config.image_shape
41 | obsspaces = exp_config.obsspaces
42 |
43 | weights = exp_config.weights
44 | sumoParameters = exp_config.sumoParameters
45 |
46 | min_action = -1
47 | max_action = +1
48 |
49 | video_folder = dir + '/logs/videos/'
50 | video_length = 600
51 |
52 | def make_env(env_id, rank, seed=0, monitor_dir = None):
53 | """
54 | Utility function for multiprocessed env.
55 |
56 | :param env_id: (str) the environment ID
57 | :param num_env: (int) the number of environments you wish to have in subprocesses
58 | :param seed: (int) the inital seed for RNG
59 | :param rank: (int) index of the subprocess
60 | """
61 |
62 | def _init():
63 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
64 | isBaseline=False,render=0)
65 | env.seed(seed + rank)
66 | env = RescaleAction(env, min_action, max_action)
67 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
68 | if monitor_path is not None:
69 | os.makedirs(monitor_dir, exist_ok=True)
70 | return env
71 | set_random_seed(seed)
72 | return _init
73 |
74 | if __name__ == '__main__':
75 | run = wandb.init(
76 | project="RMMRL-Training",
77 | name=f"MultiModal_NoNoise",
78 | dir=dir,
79 | config=config,
80 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
81 | monitor_gym=True, # auto-upload the videos of agents playing the game
82 | save_code=True, # optional
83 | magic=True
84 | )
85 |
86 | env_id = "MultiMerge"
87 | num_cpu = 16# Number of processes to use
88 | # Create the vectorized environment
89 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
90 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
91 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
92 |
93 | env = VecMonitor(venv=env)
94 | model = PPO(
95 | config["policy_type"],
96 | env,
97 | verbose=3,
98 | policy_kwargs=policy_kwargs,
99 | gamma=0.99,
100 | n_steps=512,
101 | learning_rate=0.0001,
102 | vf_coef=0.042202,
103 | max_grad_norm=0.9,
104 | gae_lambda=0.95,
105 | n_epochs=10,
106 | clip_range=0.2,
107 | batch_size=256,
108 | tensorboard_log=f"{dir}",
109 | )
110 |
111 |
112 | model.learn(
113 | total_timesteps=int(config["total_timesteps"]),
114 | callback=WandbCallback(
115 | gradient_save_freq=5,
116 | model_save_freq=5000,
117 | model_save_path=f"{dir}/models/{run.id}",
118 | verbose=2,
119 | ),
120 | )
121 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
122 | env.save(stats_path)
123 |
124 |
--------------------------------------------------------------------------------
/train_multiModalNoise_parallel.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | from gym.spaces import Box
11 | from custom_envs.MultiMergeParallel import BSM_Noise_Image_Noise as MultiMerge
12 |
13 | import os
14 | import wandb, glob
15 | from wandb.integration.sb3 import WandbCallback
16 | from stable_baselines3.common.monitor import Monitor
17 | import argparse
18 |
19 | parser = argparse.ArgumentParser(description='train PPO multi model')
20 | parser.add_argument("config", help="Config file")
21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
22 |
23 | args = parser.parse_args()
24 | module = __import__("config_file",fromlist= [args.config])
25 | exp_config = getattr(module, args.config)
26 |
27 | timesteps = 3000000
28 |
29 | config = {
30 | "policy_type": "MultiInputPolicy",
31 | "total_timesteps": timesteps,
32 | "env_name": "SumoRamp()",
33 | }
34 | pdir = os.path.abspath('../')
35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
36 |
37 | policy_kwargs = exp_config.policy_kwargs
38 |
39 | action_space = exp_config.action_space
40 |
41 | image_shape = exp_config.image_shape
42 | obsspaces = exp_config.obsspaces
43 |
44 | weights = exp_config.weights
45 | sumoParameters = exp_config.sumoParameters
46 |
47 | min_action = -1
48 | max_action = +1
49 |
50 | video_folder = dir + '/logs/videos/'
51 | video_length = 600
52 |
53 | def make_env(env_id, rank, seed=0, monitor_dir = None):
54 | """
55 | Utility function for multiprocessed env.
56 |
57 | :param env_id: (str) the environment ID
58 | :param num_env: (int) the number of environments you wish to have in subprocesses
59 | :param seed: (int) the inital seed for RNG
60 | :param rank: (int) index of the subprocess
61 | """
62 |
63 | def _init():
64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
65 | isBaseline=False,render=0)
66 | env.seed(seed + rank)
67 | env = RescaleAction(env, min_action, max_action)
68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
69 | if monitor_path is not None:
70 | os.makedirs(monitor_dir, exist_ok=True)
71 | return env
72 | set_random_seed(seed)
73 | return _init
74 |
75 | if __name__ == '__main__':
76 | run = wandb.init(
77 | project="Robust-OnRampMerging-Training",
78 | name=f"ParallelMultiModal_noise",
79 | dir=dir,
80 | config=config,
81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
82 | monitor_gym=True, # auto-upload the videos of agents playing the game
83 | save_code=True, # optional
84 | magic=True
85 | )
86 |
87 | env_id = "MultiMerge"
88 | num_cpu = 16# Number of processes to use
89 | # Create the vectorized environment
90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
93 |
94 | env = VecMonitor(venv=env)
95 | model = PPO(
96 | config["policy_type"],
97 | env,
98 | verbose=3,
99 | policy_kwargs=policy_kwargs,
100 | gamma=0.99,
101 | n_steps=512,
102 | learning_rate=0.0003,
103 | vf_coef=0.042202,
104 | max_grad_norm=0.9,
105 | gae_lambda=0.95,
106 | n_epochs=10,
107 | clip_range=0.2,
108 | batch_size=256,
109 | tensorboard_log=f"{dir}",
110 | )
111 |
112 | model.learn(
113 | total_timesteps=int(config["total_timesteps"]),
114 | callback=WandbCallback(
115 | gradient_save_freq=5,
116 | model_save_freq=5000,
117 | model_save_path=f"{dir}/models/{run.id}",
118 | verbose=2,
119 | ), )
120 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
121 | env.save(stats_path)
122 |
123 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigParallelRamp/ramp_parallel.rou.xml_old:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
14 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/train_bsm_noImage_final.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | from gym.spaces import Box
11 | from custom_envs.MultiMerge import BSM_No_Image as MultiMerge
12 | import os
13 | import wandb, glob
14 | from wandb.integration.sb3 import WandbCallback
15 | from stable_baselines3.common.monitor import Monitor
16 | import argparse
17 |
18 | parser = argparse.ArgumentParser(description='train PPO multi model')
19 | parser.add_argument("config", help="Config file")
20 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma valu")
21 |
22 | args = parser.parse_args()
23 | module = __import__("config_file",fromlist= [args.config])
24 | exp_config = getattr(module, args.config)
25 |
26 | timesteps = 500000
27 |
28 | config = {
29 | "policy_type": "MultiInputPolicy",
30 | "total_timesteps": timesteps,
31 | "env_name": "SumoRamp()",
32 | }
33 | pdir = os.path.abspath('../')
34 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
35 |
36 | policy_kwargs = exp_config.policy_kwargs
37 |
38 | action_space = exp_config.action_space
39 |
40 | image_shape = exp_config.image_shape
41 | obsspaces = exp_config.obsspaces
42 |
43 | weights = exp_config.weights
44 | sumoParameters = exp_config.sumoParameters
45 |
46 | min_action = -1
47 | max_action = +1
48 |
49 | video_folder = dir + '/logs/videos/'
50 | video_length = 600
51 |
52 | def make_env(env_id, rank, seed=0, monitor_dir = None):
53 | """
54 | Utility function for multiprocessed env.
55 |
56 | :param env_id: (str) the environment ID
57 | :param num_env: (int) the number of environments you wish to have in subprocesses
58 | :param seed: (int) the inital seed for RNG
59 | :param rank: (int) index of the subprocess
60 | """
61 |
62 | def _init():
63 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
64 | isBaseline=False,render=0)
65 | env.seed(seed + rank)
66 | env = RescaleAction(env, min_action, max_action)
67 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
68 | if monitor_path is not None:
69 | os.makedirs(monitor_dir, exist_ok=True)
70 | return env
71 | set_random_seed(seed)
72 | return _init
73 |
74 |
75 | if __name__ == '__main__':
76 | run = wandb.init(
77 | project="RMMRL-Training",
78 | name=f"BSM+NoImage",
79 | dir=dir,
80 | config=config,
81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
82 | monitor_gym=True, # auto-upload the videos of agents playing the game
83 | save_code=True, # optional
84 | magic=True
85 | )
86 |
87 | env_id = "MultiMerge"
88 | num_cpu = 16# Number of processes to use
89 | # Create the vectorized environment
90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
93 |
94 | env = VecMonitor(venv=env)
95 | model = PPO(
96 | config["policy_type"],
97 | env,
98 | verbose=3,
99 | policy_kwargs=policy_kwargs,
100 | gamma=0.99,
101 | n_steps=512,
102 | learning_rate=0.0003,
103 | vf_coef=0.042202,
104 | max_grad_norm=0.9,
105 | gae_lambda=0.95,
106 | n_epochs=10,
107 | clip_range=0.2,
108 | batch_size=256,
109 | tensorboard_log=f"{dir}",
110 | )
111 |
112 |
113 | model.learn(
114 | total_timesteps=int(config["total_timesteps"]),
115 | callback=WandbCallback(
116 | gradient_save_freq=5,
117 | model_save_freq=5000,
118 | model_save_path=f"{dir}/models/{run.id}",
119 | verbose=2,
120 | ),
121 | )
122 | model.save(f"{dir}/models/{run.id}/vecstats/")
123 |
124 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
125 | env.save(stats_path)
126 |
--------------------------------------------------------------------------------
/test_multi_model.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
6 | from stable_baselines3.common.env_util import make_vec_env
7 | from stable_baselines3.common.utils import set_random_seed
8 | from gym.wrappers.rescale_action import RescaleAction
9 | from gym.spaces import Box
10 | from custom_envs.MultiMerge import MultiMergeAllRewards as MultiMerge
11 |
12 |
13 | import os
14 | import wandb, glob
15 | from wandb.integration.sb3 import WandbCallback
16 | from stable_baselines3.common.monitor import Monitor
17 | import argparse
18 |
19 | parser = argparse.ArgumentParser(description='test PPO multi model')
20 | parser.add_argument("dir", help="model path")
21 | parser.add_argument("--render", default =0, help = "should render default 0")
22 | parser.add_argument("stats_load", help="vec norm stats file")
23 |
24 | parser.add_argument("config", help="Config file")
25 | args = parser.parse_args()
26 |
27 | module = __import__("config_file",fromlist= [args.config])
28 | exp_config = getattr(module, args.config)
29 |
30 | config = {
31 | "policy_type": "MultiInputPolicy",
32 | "env_name": "SumoRamp()",
33 | }
34 |
35 |
36 | pdir = os.path.abspath('../')
37 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
38 |
39 |
40 | policy_kwargs = exp_config.policy_kwargs
41 |
42 | action_space = exp_config.action_space
43 |
44 | image_shape = exp_config.image_shape
45 | obsspaces = exp_config.obsspaces
46 |
47 | weights = exp_config.weights
48 | sumoParameters = exp_config.sumoParameters
49 |
50 | min_action = -1
51 | max_action = +1
52 |
53 | video_folder = dir + '/logs/videos/'
54 | video_length = 180
55 |
56 | def make_env(env_id, rank, seed=0, monitor_dir = None):
57 | """
58 | Utility function for multiprocessed env.
59 |
60 | :param env_id: (str) the environment ID
61 | :param num_env: (int) the number of environments you wish to have in subprocesses
62 | :param seed: (int) the inital seed for RNG
63 | :param rank: (int) index of the subprocess
64 | """
65 |
66 | def _init():
67 |
68 |
69 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
70 | isBaseline=False,render=0)
71 | env.seed(seed + rank)
72 | env = RescaleAction(env, min_action, max_action)
73 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
74 | if monitor_path is not None:
75 | os.makedirs(monitor_dir, exist_ok=True)
76 | env = Monitor(env, filename=monitor_path)
77 |
78 | return env
79 |
80 | set_random_seed(seed)
81 | return _init
82 |
83 |
84 | if __name__ == '__main__':
85 | run = wandb.init(
86 | project="Robust-OnRampMerging-Testing",
87 | dir=dir,
88 | name=f"multimodal_{args.config}",
89 | config=config,
90 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
91 | monitor_gym=True, # auto-upload the videos of agents playing the game
92 | save_code=True, # optional
93 | magic=True
94 | )
95 |
96 | env_id = "MultiMerge"
97 | num_cpu = 1 # Number of processes to use
98 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
99 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
100 | env = VecNormalize.load(args.stats_load,env)
101 | env.training = False
102 | env.norm_reward =True
103 | env = VecMonitor(venv=env)
104 | model = PPO.load(args.dir, env)
105 |
106 | obs = env.reset()
107 | n_games = 10
108 | for i_games in range(n_games):
109 |
110 | done = False
111 | obs = env.reset()
112 | score = 0
113 | num_collisions = 0
114 | mergeTime = 0
115 | velocity_reward= []
116 | acc_reward = []
117 | while not done:
118 | action, _states = model.predict(obs)
119 | obs, rewards, done, info = env.step(action)
120 | if int(args.render)==1:
121 | env.render()
122 | score += rewards
123 |
124 | if int(info[0]['terminal']) == -1:
125 | num_collisions += 1
126 | if int(info[0]['terminal']) != 0:
127 | mergeTime = info[0]['mergeTime']
128 | velocity_reward.append(info[0]['velocity_reward'])
129 |
130 | acc_reward.append((info[0]['acc_reward']))
131 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}")
132 | wandb.log({
133 | "episodic score": score,
134 | "num_collisions": num_collisions,
135 | "mergeTime": mergeTime,
136 | "acc_reward": np.mean(acc_reward),
137 | "velocity_reward": np.mean(velocity_reward),
138 | }, step=i_games)
139 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2_3.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
134 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2_1.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
133 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2.rou.xml_copy:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
133 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/ramp_2_2.rou.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
20 |
21 |
22 |
23 |
24 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
50 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
134 |
--------------------------------------------------------------------------------
/custom_envs/sumo_Config/Ramp_1.net.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/Ramp_2_1.net.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/custom_envs/sumo_ConfigTaper/Ramp_2.net.xml_copy:
--------------------------------------------------------------------------------
1 |
2 |
3 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/custom_envs/MultiMerge_1.py:
--------------------------------------------------------------------------------
1 | import os
2 | from abc import ABC
3 |
4 | import gym
5 | from custom_envs.gymsumo import SumoRamp
6 | import traci
7 | import numpy as np
8 | from custom_envs.bsmMerge import BsmMerge, BsmMergeAllRewards
9 | from typing import Callable, Optional, Tuple, Union
10 |
11 | class MultiMerge(BsmMerge):
12 | def getObservations(self):
13 | # returns observations of the state
14 |
15 | state_speed = np.ones(7) * self.maxSpeed
16 | state_position_x = np.ones(7)
17 | state_position_y = np.ones(7)
18 |
19 | vehicle_ids = self.getVehicleIds()
20 | state_image = np.array(self.render())
21 | if vehicle_ids:
22 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids)
23 |
24 | for i, vehicle in enumerate(obsLane0):
25 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0])
26 | if vehicle:
27 |
28 | if vehicle[0] not in ["no_vehicle","", None]:
29 |
30 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
31 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
32 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
33 | for i, vehicle in enumerate(obsLane1, len(obsLane0)):
34 | if vehicle:
35 | if vehicle not in ["no_vehicle","", None]:
36 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
37 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
38 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
39 |
40 | # rl state information
41 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id) / self.maxSpeed
42 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0]
43 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1]
44 | state_speed = np.clip(state_speed, 0, self.maxSpeed)
45 | state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low),
46 | abs(self.observation_space['xPos'].high))
47 | state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low),
48 | abs(self.observation_space['yPos'].high))
49 | state = {
50 | 'image': state_image.astype(np.uint8),
51 | 'xPos': np.array(state_position_x, dtype=np.float32),
52 | 'yPos': np.array(state_position_y, dtype=np.float32),
53 | 'velocity': np.array(state_speed, dtype=np.float32)}
54 |
55 | return state
56 |
57 | class MultiMergeAllRewards(BsmMergeAllRewards):
58 | def getObservations(self):
59 | # returns observations of the state
60 |
61 | state_speed = np.ones(7) * self.maxSpeed
62 | state_position_x = np.ones(7)
63 | state_position_y = np.ones(7)
64 |
65 | vehicle_ids = self.getVehicleIds()
66 | state_image = np.array(self.render())
67 | if vehicle_ids:
68 | for vehicle in vehicle_ids:
69 | if not "rl" in vehicle:
70 | traci.vehicle.setColor(vehicle, color=(255, 255, 255, 255)) # change vehicle color to white
71 |
72 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids)
73 |
74 |
75 | for i, vehicle in enumerate(obsLane0):
76 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0])
77 | #print(vehicle)
78 | if vehicle:
79 | if vehicle[0] not in ["no_vehicle","", None]:
80 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
81 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
82 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
83 | #traci.vehicle.setColor(vehicle[0], color=(255, 0, 255, 255)) # change vehicle color to blue
84 |
85 | for i, vehicle in enumerate(obsLane1, len(obsLane0)):
86 | #print(vehicle)
87 | if vehicle:
88 |
89 | if vehicle[0] not in ["no_vehicle","", None]:
90 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0])
91 |
92 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0]
93 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1]
94 | #traci.vehicle.setColor(vehicle[0], color=(255, 255, 0, 255)) # change vehicle color to blue
95 |
96 | # rl state information
97 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id) / self.maxSpeed
98 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0]
99 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1]
100 | state_speed = np.clip(state_speed, 0, self.maxSpeed)
101 | state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low),
102 | abs(self.observation_space['xPos'].high))
103 | state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low),
104 | abs(self.observation_space['yPos'].high))
105 | state = {
106 | 'image': state_image.astype(np.uint8),
107 | 'xPos': np.array(state_position_x, dtype=np.float32),
108 | 'yPos': np.array(state_position_y, dtype=np.float32),
109 | 'velocity': np.array(state_speed, dtype=np.float32)}
110 | return state
111 |
--------------------------------------------------------------------------------
/test_parallel_model.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
6 | from stable_baselines3.common.env_util import make_vec_env
7 | from stable_baselines3.common.utils import set_random_seed
8 | from gym.wrappers.rescale_action import RescaleAction
9 | # from custom_envs.rampTaperEnv_half import SumoRamp
10 | from gym.spaces import Box
11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
12 | # from custom_envs.bsmMerge import BsmMerge
13 | # load simple cnn + bsm reward env
14 | # from custom_envs.MultiMerge import MultiMerge
15 | # load cnn + bsm all rewards env
16 | from custom_envs.MultiMergeParallel import MultiMergeAllRewards as MultiMerge
17 |
18 |
19 | import os
20 | import wandb, glob
21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN
22 | from wandb.integration.sb3 import WandbCallback
23 | from stable_baselines3.common.monitor import Monitor
24 | import argparse
25 |
26 | parser = argparse.ArgumentParser(description='test PPO multi model')
27 |
28 | parser.add_argument("dir", help="model path")
29 | parser.add_argument("--render", default =0, help = "should render default 0")
30 |
31 | parser.add_argument("stats_path", help="vec norm stats path file")
32 | parser.add_argument("config", help="Config file")
33 | args = parser.parse_args()
34 |
35 | module = __import__("config_file",fromlist= [args.config])
36 | exp_config = getattr(module, args.config)
37 |
38 |
39 | timesteps = 3e6
40 | sub_timesteps = 10000
41 |
42 | config = {
43 | "policy_type": "MultiInputPolicy",
44 | "total_timesteps": timesteps,
45 | "env_name": "SumoRamp()",
46 | "sub_timesteps": sub_timesteps
47 | }
48 |
49 |
50 | pdir = os.path.abspath('../')
51 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
52 |
53 |
54 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False)
55 | policy_kwargs = exp_config.policy_kwargs
56 |
57 | action_space = exp_config.action_space
58 |
59 | image_shape = exp_config.image_shape
60 | obsspaces = exp_config.obsspaces
61 |
62 | weights = exp_config.weights
63 | sumoParameters = exp_config.sumoParameters
64 |
65 | min_action = -1
66 | max_action = +1
67 |
68 | video_folder = dir + '/logs/videos/'
69 | video_length = 600
70 |
71 | def make_env(env_id, rank, seed=0, monitor_dir = None):
72 | """
73 | Utility function for multiprocessed env.
74 |
75 | :param env_id: (str) the environment ID
76 | :param num_env: (int) the number of environments you wish to have in subprocesses
77 | :param seed: (int) the inital seed for RNG
78 | :param rank: (int) index of the subprocess
79 | """
80 |
81 | def _init():
82 |
83 |
84 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
85 | isBaseline=False,render=0)
86 | env.seed(seed + rank)
87 | env = RescaleAction(env, min_action, max_action)
88 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
89 | if monitor_path is not None:
90 | os.makedirs(monitor_dir, exist_ok=True)
91 | env = Monitor(env, filename=monitor_path)
92 |
93 | return env
94 |
95 | set_random_seed(seed)
96 | return _init
97 |
98 |
99 | if __name__ == '__main__':
100 | run = wandb.init(
101 | project="Multi_Testing",
102 | dir=dir,
103 | config=config,
104 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
105 | monitor_gym=True, # auto-upload the videos of agents playing the game
106 | save_code=True, # optional
107 | magic=True
108 | )
109 |
110 | env_id = "MultiMerge"
111 | num_cpu = 1 # Number of processes to use
112 | # Create the vectorized environment
113 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
114 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
115 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0,
116 | # video_length=300)
117 |
118 | # add vstack
119 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
120 | env = VecNormalize.load(args.stats_path,env)
121 | env = VecMonitor(venv=env)
122 |
123 |
124 | #code = wandb.Artifact('project-source', type='code')
125 | #for path in glob.glob('**/*.py', recursive=True):
126 | # code.add_file(path)
127 |
128 | #wandb.run.use_artifact(code)
129 |
130 |
131 |
132 | model = PPO.load(args.dir, env)
133 |
134 | obs = env.reset()
135 | n_games = 300
136 | for i_games in range(n_games):
137 |
138 | done = False
139 | obs = env.reset()
140 | score = 0
141 | num_collisions = 0
142 | mergeTime = 0
143 |
144 | while not done:
145 | action, _states = model.predict(obs)
146 | print('action', action)
147 | obs, rewards, done, info = env.step(action)
148 | if int(args.render)==1:
149 | env.render()
150 | score += rewards
151 |
152 | print('rewards', rewards)
153 | if int(info[0]['terminal']) == -1:
154 | num_collisions += 1
155 | if int(info[0]['terminal']) != 0:
156 | mergeTime = int(info[0]['mergeTime'])
157 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}")
158 | wandb.log({
159 | "episodic score": score,
160 | "num_collisions": num_collisions,
161 | "mergeTime": mergeTime
162 | }, step=i_games)
163 |
--------------------------------------------------------------------------------
/test_baseline_paralllel.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
6 | from stable_baselines3.common.env_util import make_vec_env
7 | from stable_baselines3.common.utils import set_random_seed
8 | from gym.wrappers.rescale_action import RescaleAction
9 | # from custom_envs.rampTaperEnv_half import SumoRamp
10 | from gym.spaces import Box
11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
12 | # from custom_envs.bsmMerge import BsmMerge
13 | # load simple cnn + bsm reward env
14 | # from custom_envs.MultiMerge import MultiMerge
15 | # load cnn + bsm all rewards env
16 | from custom_envs.MultiMergeParallel import MultiMergeAllRewards as MultiMerge
17 | import argparse
18 |
19 | import os
20 | import wandb, glob
21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN
22 | from wandb.integration.sb3 import WandbCallback
23 | from stable_baselines3.common.monitor import Monitor
24 |
25 | timesteps = 3e6
26 | sub_timesteps = 10000
27 |
28 | config = {
29 | "policy_type": "MultiInputPolicy",
30 | "total_timesteps": timesteps,
31 | "env_name": "SumoRamp()",
32 | "sub_timesteps": sub_timesteps
33 | }
34 |
35 | parser = argparse.ArgumentParser(description='test PPO multi model')
36 |
37 |
38 | parser.add_argument("--render",default = 0,help="should render default 0")
39 | parser.add_argument("config", help="Config file")
40 |
41 | args = parser.parse_args()
42 | module = __import__("config_file",fromlist= [args.config])
43 | exp_config = getattr(module, args.config)
44 | pdir = os.path.abspath('../')
45 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
46 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False)
47 |
48 |
49 |
50 | policy_kwargs = exp_config.policy_kwargs
51 |
52 | action_space = exp_config.action_space
53 |
54 | image_shape = exp_config.image_shape
55 | obsspaces = exp_config.obsspaces
56 |
57 | weights = exp_config.weights
58 | sumoParameters = exp_config.sumoParameters
59 |
60 |
61 | min_action = -1
62 | max_action = +1
63 |
64 | video_folder = dir + '/logs/videos/'
65 | video_length = 600
66 |
67 | def make_env(env_id, rank, seed=0, monitor_dir = None):
68 | """
69 | Utility function for multiprocessed env.
70 |
71 | :param env_id: (str) the environment ID
72 | :param num_env: (int) the number of environments you wish to have in subprocesses
73 | :param seed: (int) the inital seed for RNG
74 | :param rank: (int) index of the subprocess
75 | """
76 |
77 | def _init():
78 |
79 |
80 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
81 | isBaseline=True,render=0)
82 | env.seed(seed + rank)
83 | env = RescaleAction(env, min_action, max_action)
84 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
85 | if monitor_path is not None:
86 | os.makedirs(monitor_dir, exist_ok=True)
87 | env = Monitor(env, filename=monitor_path)
88 |
89 | return env
90 |
91 | set_random_seed(seed)
92 | return _init
93 |
94 |
95 | if __name__ == '__main__':
96 | run = wandb.init(
97 | project="SB3RampTraining",
98 | name="Baseline_ppo7_multi-all-rewards",
99 | dir=dir,
100 | config=config,
101 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
102 | monitor_gym=True, # auto-upload the videos of agents playing the game
103 | save_code=True, # optional
104 | magic=True
105 | )
106 |
107 | env_id = "MultiMerge"
108 | num_cpu = 1 # Number of processes to use
109 | # Create the vectorized environment
110 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
111 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
112 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
113 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0,
114 | # video_length=300)
115 |
116 | # add vstack
117 | # env = VecFrameStack(env, n_stack=4) # stack 4 frames
118 | env = VecMonitor(venv=env)
119 |
120 |
121 | # code = wandb.Artifact('project-source', type='code')
122 | # for path in glob.glob('**/*.py', recursive=True):
123 | # code.add_file(path)
124 | #
125 | # wandb.run.use_artifact(code)
126 |
127 |
128 |
129 | # model = PPO.load(os.path.join(pdir,'trainedSBModels/multi_all_rewards/model'), env)
130 |
131 | obs = env.reset()
132 | n_games = 300
133 | for i_games in range(n_games):
134 |
135 | done = False
136 | obs = env.reset()
137 | score = 0
138 | num_collisions = 0
139 | mergeTime = 0
140 | while not done:
141 | action = env.action_space.sample()
142 | #print('action', action)
143 | obs, rewards, done, info = env.step(action)
144 | score += rewards
145 | if int(args.render) == int(1):
146 |
147 | env.render()
148 | #print('rewards', rewards)
149 | if int(info[0]['terminal']) == -1:
150 | num_collisions += 1
151 | if int(info[0]['terminal']) != 0:
152 | mergeTime = int(info[0]['mergeTime'])
153 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}")
154 | wandb.log({
155 | "episodic score": score,
156 | "num_collisions": num_collisions,
157 | "mergeTime": mergeTime
158 | }, step=i_games)
159 |
--------------------------------------------------------------------------------
/test_bsmNoise_PerfectImage.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
6 | from stable_baselines3.common.env_util import make_vec_env
7 | from stable_baselines3.common.utils import set_random_seed
8 | from gym.wrappers.rescale_action import RescaleAction
9 | # from custom_envs.rampTaperEnv_half import SumoRamp
10 | from gym.spaces import Box
11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
12 | # from custom_envs.bsmMerge import BsmMerge
13 | # load simple cnn + bsm reward env
14 | # from custom_envs.MultiMerge import MultiMerge
15 | # load cnn + bsm all rewards env
16 | from custom_envs.MultiMerge import BSM_Noise_No_Image as MultiMerge
17 |
18 |
19 | import os
20 | import wandb, glob
21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN
22 | from wandb.integration.sb3 import WandbCallback
23 | from stable_baselines3.common.monitor import Monitor
24 | import argparse
25 |
26 | parser = argparse.ArgumentParser(description='test PPO multi model')
27 |
28 | parser.add_argument("dir", help="model path")
29 |
30 | parser.add_argument("stats_path", help="vec env stats path")
31 | parser.add_argument("--render", default =0, help = "should render default 0")
32 | parser.add_argument("config", help="Config file")
33 |
34 | args = parser.parse_args()
35 |
36 | module = __import__("config_file",fromlist= [args.config])
37 | exp_config = getattr(module, args.config)
38 |
39 |
40 | timesteps = 3e6
41 | sub_timesteps = 10000
42 |
43 | config = {
44 | "policy_type": "MultiInputPolicy",
45 | "total_timesteps": timesteps,
46 | "env_name": "SumoRamp()",
47 | "sub_timesteps": sub_timesteps
48 | }
49 |
50 |
51 | pdir = os.path.abspath('../')
52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
53 |
54 |
55 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False)
56 | policy_kwargs = exp_config.policy_kwargs
57 |
58 | action_space = exp_config.action_space
59 |
60 | image_shape = exp_config.image_shape
61 | obsspaces = exp_config.obsspaces
62 |
63 | weights = exp_config.weights
64 | sumoParameters = exp_config.sumoParameters
65 |
66 | min_action = -1
67 | max_action = +1
68 |
69 | video_folder = dir + '/logs/videos/'
70 | video_length = 600
71 |
72 | def make_env(env_id, rank, seed=0, monitor_dir = None):
73 | """
74 | Utility function for multiprocessed env.
75 |
76 | :param env_id: (str) the environment ID
77 | :param num_env: (int) the number of environments you wish to have in subprocesses
78 | :param seed: (int) the inital seed for RNG
79 | :param rank: (int) index of the subprocess
80 | """
81 |
82 | def _init():
83 |
84 |
85 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
86 | isBaseline=False,render=0)
87 | env.seed(seed + rank)
88 | env = RescaleAction(env, min_action, max_action)
89 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
90 | if monitor_path is not None:
91 | os.makedirs(monitor_dir, exist_ok=True)
92 | env = Monitor(env, filename=monitor_path)
93 |
94 | return env
95 |
96 | set_random_seed(seed)
97 | return _init
98 |
99 |
100 | if __name__ == '__main__':
101 | run = wandb.init(
102 | project="Robust-OnRampMerging",
103 | name=f"Test_BSMNoise+NoImage_{args.config}",
104 | dir=dir,
105 | config=config,
106 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
107 | monitor_gym=True, # auto-upload the videos of agents playing the game
108 | save_code=True, # optional
109 | magic=True
110 | )
111 |
112 | env_id = "MultiMerge"
113 | num_cpu = 1 # Number of processes to use
114 | # Create the vectorized environment
115 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
116 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
117 | #env = VecNormalize(env, norm_obs=True, norm_reward=True, training=False)
118 |
119 | env = VecFrameStack(env, 4)
120 |
121 | env = VecNormalize.load(args.stats_path, env)
122 | env.training = False
123 | env.norm_reward = False
124 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0,
125 | # video_length=300)
126 |
127 | # add vstack
128 | env = VecMonitor(venv=env)
129 |
130 |
131 | #code = wandb.Artifact('project-source', type='code')
132 | #for path in glob.glob('**/*.py', recursive=True):
133 | # code.add_file(path)
134 |
135 | #wandb.run.use_artifact(code)
136 |
137 |
138 |
139 | model = PPO.load(args.dir, env)
140 |
141 | obs = env.reset()
142 | n_games = 300
143 | for i_games in range(n_games):
144 |
145 | done = False
146 | obs = env.reset()
147 | score = 0
148 | num_collisions = 0
149 | mergeTime = 0
150 |
151 | while not done:
152 | action, _states = model.predict(obs)
153 | obs, rewards, done, info = env.step(action)
154 | if int(args.render)==1:
155 | env.render()
156 | score += rewards
157 |
158 | if int(info[0]['terminal']) == -1:
159 | num_collisions += 1
160 | if int(info[0]['terminal']) != 0:
161 | mergeTime = int(info[0]['mergeTime'])
162 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}")
163 | wandb.log({
164 | "episodic score": score,
165 | "num_collisions": num_collisions,
166 | "mergeTime": mergeTime
167 | }, step=i_games)
168 |
--------------------------------------------------------------------------------
/train_ImageNoise_noBSM_final.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | # from custom_envs.rampTaperEnv_half import SumoRamp
11 | from gym.spaces import Box
12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
13 | # from custom_envs.bsmMerge import BsmMerge
14 | # load simple cnn + bsm reward env
15 | # from custom_envs.MultiMerge import MultiMerge
16 | # load cnn + bsm all rewards env
17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
18 | from custom_envs.MultiMerge import Image_Noise_No_BSM as MultiMerge
19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
22 |
23 |
24 | import os
25 | import wandb, glob
26 | #from customFeatureExtractor import CustomCombinedExtractor
27 | from wandb.integration.sb3 import WandbCallback
28 | from stable_baselines3.common.monitor import Monitor
29 | #from config_file import sac_multi_config as exp_config
30 | import argparse
31 |
32 | parser = argparse.ArgumentParser(description='train PPO multi model')
33 | parser.add_argument("config", help="Config file")
34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
35 |
36 | args = parser.parse_args()
37 | module = __import__("config_file",fromlist= [args.config])
38 | exp_config = getattr(module, args.config)
39 |
40 | timesteps = 50000
41 | subtimesteps = 10000
42 |
43 | sub_timesteps = 10000
44 |
45 | config = {
46 | "policy_type": "MultiInputPolicy",
47 | "total_timesteps": timesteps,
48 | "env_name": "SumoRamp()",
49 | "sub_timesteps": sub_timesteps
50 | }
51 | pdir = os.path.abspath('../')
52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
53 |
54 | policy_kwargs = exp_config.policy_kwargs
55 |
56 | action_space = exp_config.action_space
57 |
58 | image_shape = exp_config.image_shape
59 | obsspaces = exp_config.obsspaces
60 |
61 | weights = exp_config.weights
62 | sumoParameters = exp_config.sumoParameters
63 |
64 | min_action = -1
65 | max_action = +1
66 |
67 | video_folder = dir + '/logs/videos/'
68 | video_length = 600
69 |
70 | def make_env(env_id, rank, seed=0, monitor_dir = None):
71 | """
72 | Utility function for multiprocessed env.
73 |
74 | :param env_id: (str) the environment ID
75 | :param num_env: (int) the number of environments you wish to have in subprocesses
76 | :param seed: (int) the inital seed for RNG
77 | :param rank: (int) index of the subprocess
78 | """
79 |
80 | def _init():
81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
82 | isBaseline=False,render=0)
83 | env.seed(seed + rank)
84 | env = RescaleAction(env, min_action, max_action)
85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
86 | if monitor_path is not None:
87 | os.makedirs(monitor_dir, exist_ok=True)
88 | return env
89 | set_random_seed(seed)
90 | return _init
91 |
92 |
93 | if __name__ == '__main__':
94 | run = wandb.init(
95 | project="Robust-OnRampMerging",
96 | name=f"ImageNoise+NoBSM_{args.config}",
97 | dir=dir,
98 | config=config,
99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
100 | monitor_gym=True, # auto-upload the videos of agents playing the game
101 | save_code=True, # optional
102 | magic=True
103 | )
104 |
105 | env_id = "MultiMerge"
106 | num_cpu = 2# Number of processes to use
107 | # Create the vectorized environment
108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
112 |
113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}",
114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0,
115 | # video_length=300)
116 |
117 | env = VecMonitor(venv=env)
118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
119 | # isBaseline=False,render=0)
120 | #code = wandb.Artifact('project-source', type='code')
121 | #for path in glob.glob('**/*.py', recursive=True):
122 | # code.add_file(path)
123 |
124 | #wandb.run.use_artifact(code)
125 | model = PPO(config["policy_type"], env,
126 | verbose=3,
127 | gamma=0.95,
128 | n_steps=1200,
129 | ent_coef=0.0905168,
130 | learning_rate=0.005,
131 | vf_coef=0.042202,
132 | max_grad_norm=0.9,
133 | gae_lambda=0.7,
134 | n_epochs=5,
135 | clip_range=0.2,
136 | batch_size=1200,
137 | tensorboard_log=f"{dir}")
138 |
139 | model.learn(
140 | total_timesteps=int(config["total_timesteps"]),
141 | callback=WandbCallback(
142 | gradient_save_freq=5,
143 | model_save_freq=5000,
144 | model_save_path=f"{dir}/models/{run.id}",
145 | verbose=2,
146 | ), )
147 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
148 | env.save(stats_path)
149 |
150 |
--------------------------------------------------------------------------------
/train_ImageNoise_PerfectBSM.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | # from custom_envs.rampTaperEnv_half import SumoRamp
11 | from gym.spaces import Box
12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
13 | # from custom_envs.bsmMerge import BsmMerge
14 | # load simple cnn + bsm reward env
15 | # from custom_envs.MultiMerge import MultiMerge
16 | # load cnn + bsm all rewards env
17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
18 | from custom_envs.MultiMerge import BSM_Perfect_Noise_Image as MultiMerge
19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
22 |
23 |
24 | import os
25 | import wandb, glob
26 | #from customFeatureExtractor import CustomCombinedExtractor
27 | from wandb.integration.sb3 import WandbCallback
28 | from stable_baselines3.common.monitor import Monitor
29 | #from config_file import sac_multi_config as exp_config
30 | import argparse
31 |
32 | parser = argparse.ArgumentParser(description='train PPO multi model')
33 | parser.add_argument("config", help="Config file")
34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
35 |
36 | args = parser.parse_args()
37 | module = __import__("config_file",fromlist= [args.config])
38 | exp_config = getattr(module, args.config)
39 |
40 | timesteps = 50000
41 | subtimesteps = 10000
42 |
43 | sub_timesteps = 10000
44 |
45 | config = {
46 | "policy_type": "MultiInputPolicy",
47 | "total_timesteps": timesteps,
48 | "env_name": "SumoRamp()",
49 | "sub_timesteps": sub_timesteps
50 | }
51 | pdir = os.path.abspath('../')
52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
53 |
54 | policy_kwargs = exp_config.policy_kwargs
55 |
56 | action_space = exp_config.action_space
57 |
58 | image_shape = exp_config.image_shape
59 | obsspaces = exp_config.obsspaces
60 |
61 | weights = exp_config.weights
62 | sumoParameters = exp_config.sumoParameters
63 |
64 | min_action = -1
65 | max_action = +1
66 |
67 | video_folder = dir + '/logs/videos/'
68 | video_length = 600
69 |
70 | def make_env(env_id, rank, seed=0, monitor_dir = None):
71 | """
72 | Utility function for multiprocessed env.
73 |
74 | :param env_id: (str) the environment ID
75 | :param num_env: (int) the number of environments you wish to have in subprocesses
76 | :param seed: (int) the inital seed for RNG
77 | :param rank: (int) index of the subprocess
78 | """
79 |
80 | def _init():
81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
82 | isBaseline=False,render=0)
83 | env.seed(seed + rank)
84 | env = RescaleAction(env, min_action, max_action)
85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
86 | if monitor_path is not None:
87 | os.makedirs(monitor_dir, exist_ok=True)
88 | return env
89 | set_random_seed(seed)
90 | return _init
91 |
92 |
93 | if __name__ == '__main__':
94 | run = wandb.init(
95 | project="Robust-OnRampMerging",
96 | name=f"ImageNoise+PerfectBSM_{args.config}",
97 | dir=dir,
98 | config=config,
99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
100 | monitor_gym=True, # auto-upload the videos of agents playing the game
101 | save_code=True, # optional
102 | magic=True
103 | )
104 |
105 | env_id = "MultiMerge"
106 | num_cpu = 2# Number of processes to use
107 | # Create the vectorized environment
108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
112 |
113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}",
114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0,
115 | # video_length=300)
116 |
117 | env = VecMonitor(venv=env)
118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
119 | # isBaseline=False,render=0)
120 | #code = wandb.Artifact('project-source', type='code')
121 | #for path in glob.glob('**/*.py', recursive=True):
122 | # code.add_file(path)
123 |
124 | #wandb.run.use_artifact(code)
125 | model = PPO(config["policy_type"], env,
126 | verbose=3,
127 | gamma=0.95,
128 | n_steps=1200,
129 | ent_coef=0.0905168,
130 | learning_rate=0.005,
131 | vf_coef=0.042202,
132 | max_grad_norm=0.9,
133 | gae_lambda=0.7,
134 | n_epochs=5,
135 | clip_range=0.2,
136 | batch_size=1200,
137 | tensorboard_log=f"{dir}")
138 |
139 | model.learn(
140 | total_timesteps=int(config["total_timesteps"]),
141 | callback=WandbCallback(
142 | gradient_save_freq=5,
143 | model_save_freq=5000,
144 | model_save_path=f"{dir}/models/{run.id}",
145 | verbose=2,
146 | ), )
147 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
148 | env.save(stats_path)
149 |
150 |
--------------------------------------------------------------------------------
/train_multiModelNoise_final_2.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import numpy as np
3 |
4 | from stable_baselines3 import PPO
5 | from stable_baselines3.common.evaluation import evaluate_policy
6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack
7 | from stable_baselines3.common.env_util import make_vec_env
8 | from stable_baselines3.common.utils import set_random_seed
9 | from gym.wrappers.rescale_action import RescaleAction
10 | # from custom_envs.rampTaperEnv_half import SumoRamp
11 | from gym.spaces import Box
12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge
13 | # from custom_envs.bsmMerge import BsmMerge
14 | # load simple cnn + bsm reward env
15 | # from custom_envs.MultiMerge import MultiMerge
16 | # load cnn + bsm all rewards env
17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
18 | from custom_envs.MultiMerge import BSM_Noise_Image_Noise as MultiMerge
19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge
22 |
23 |
24 | import os
25 | import wandb, glob
26 | #from customFeatureExtractor import CustomCombinedExtractor
27 | from wandb.integration.sb3 import WandbCallback
28 | from stable_baselines3.common.monitor import Monitor
29 | #from config_file import sac_multi_config as exp_config
30 | import argparse
31 |
32 | parser = argparse.ArgumentParser(description='train PPO multi model')
33 | parser.add_argument("config", help="Config file")
34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value")
35 |
36 | args = parser.parse_args()
37 | module = __import__("config_file",fromlist= [args.config])
38 | exp_config = getattr(module, args.config)
39 |
40 | timesteps = 500000
41 | subtimesteps = 10000
42 |
43 | sub_timesteps = 10000
44 |
45 | config = {
46 | "policy_type": "MultiInputPolicy",
47 | "total_timesteps": timesteps,
48 | "env_name": "SumoRamp()",
49 | "sub_timesteps": sub_timesteps
50 | }
51 | pdir = os.path.abspath('../')
52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles')
53 |
54 | policy_kwargs = exp_config.policy_kwargs
55 |
56 | action_space = exp_config.action_space
57 |
58 | image_shape = exp_config.image_shape
59 | obsspaces = exp_config.obsspaces
60 |
61 | weights = exp_config.weights
62 | sumoParameters = exp_config.sumoParameters
63 |
64 | min_action = -1
65 | max_action = +1
66 |
67 | video_folder = dir + '/logs/videos/'
68 | video_length = 600
69 |
70 | def make_env(env_id, rank, seed=0, monitor_dir = None):
71 | """
72 | Utility function for multiprocessed env.
73 |
74 | :param env_id: (str) the environment ID
75 | :param num_env: (int) the number of environments you wish to have in subprocesses
76 | :param seed: (int) the inital seed for RNG
77 | :param rank: (int) index of the subprocess
78 | """
79 |
80 | def _init():
81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
82 | isBaseline=False,render=0 )
83 | env.seed(seed + rank)
84 | env = RescaleAction(env, min_action, max_action)
85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
86 | if monitor_path is not None:
87 | os.makedirs(monitor_dir, exist_ok=True)
88 | return env
89 | set_random_seed(seed)
90 | return _init
91 |
92 |
93 | if __name__ == '__main__':
94 | run = wandb.init(
95 | project="Robust-OnRampMerging",
96 | name=f"MultiModal_noise",
97 | dir=dir,
98 | config=config,
99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics
100 | monitor_gym=True, # auto-upload the videos of agents playing the game
101 | save_code=True, # optional
102 | magic=True
103 | )
104 |
105 | env_id = "MultiMerge"
106 | num_cpu = 16# Number of processes to use
107 | # Create the vectorized environment
108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)
109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames
111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True)
112 |
113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}",
114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0,
115 | # video_length=300)
116 |
117 | env = VecMonitor(venv=env)
118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights,
119 | # isBaseline=False,render=0)
120 | #code = wandb.Artifact('project-source', type='code')
121 | #for path in glob.glob('**/*.py', recursive=True):
122 | # code.add_file(path)
123 |
124 | #wandb.run.use_artifact(code)
125 | model = PPO(config["policy_type"], env,
126 | verbose=3,
127 | learning_rate= 1e-5,
128 | n_steps = 512,
129 | batch_size = 256,
130 | n_epochs = 20,
131 | gamma = 0.99,
132 | gae_lambda = 0.9,
133 | clip_range = 0.2,
134 | clip_range_vf = None,
135 | ent_coef = 0.05,
136 | vf_coef= 0.5,
137 | max_grad_norm = 0.5,
138 | target_kl = 0.01,
139 | tensorboard_log=f"{dir}")
140 | model.learn(
141 | total_timesteps=int(config["total_timesteps"]),
142 | callback=WandbCallback(
143 | gradient_save_freq=5,
144 | model_save_freq=5000,
145 | model_save_path=f"{dir}/models/{run.id}",
146 | verbose=2,
147 | ), )
148 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl")
149 | env.save(stats_path)
150 |
151 |
--------------------------------------------------------------------------------