├── custom_envs ├── demo.py ├── gymsumo.py ├── bsmMerge.py ├── sumo_ConfigTaper │ ├── __init__.py │ ├── Ramp_2.net.xml │ ├── ramp_2.rou.xml │ ├── ramp_2.sumocfg │ ├── ramp_2_1.sumocfg │ ├── ramp_2.sumocfg_copy │ ├── test_traci.py │ ├── ramp_2_3.rou.xml │ ├── ramp_2_1.rou.xml │ ├── ramp_2.rou.xml_copy │ ├── ramp_2_2.rou.xml │ ├── Ramp_2_1.net.xml │ └── Ramp_2.net.xml_copy ├── MultiMerge.py ├── MultiMergeNoise.py ├── MergeSingleBSMNoise.py ├── MultiMergeBSMNoise.py ├── MultiMergeParallel.py ├── ramp_env.py ├── __pycache__ │ ├── bsmMerge.cpython-38.pyc │ ├── gymsumo.cpython-38.pyc │ └── MultiMerge.cpython-38.pyc ├── .ipynb_checkpoints │ └── Untitled-checkpoint.ipynb ├── sample_cartpole.py ├── readme.md ├── test_sumo_env.py ├── sumo_Config │ ├── ramp_11.sumocfg │ ├── ramp_1.sumocfg │ ├── ramp_1.rou.xml │ └── Ramp_1.net.xml ├── sumo_ConfigParallelRamp │ ├── ramp_parallel.sumocfg │ ├── ramp_parallel.rou.xml │ └── ramp_parallel.rou.xml_old ├── checkEnv.py ├── sample_env.py ├── trainRamp.py ├── Untitled.ipynb ├── MultiMergeBSMNoise2.py └── MultiMerge_1.py ├── customFeatureExtractor.py ├── config_file ├── customFeatureExtractor.py ├── __pycache__ │ ├── ppo_18_bsm_noImage.cpython-38.pyc │ ├── customFeatureExtractor.cpython-38.pyc │ ├── ppo_final_Image_nobsm_6.cpython-38.pyc │ ├── ppo_final_bsm_noImage_6.cpython-38.pyc │ ├── ppo_final_multimodal_6.cpython-38.pyc │ ├── 00_ppo_final_bsm_noImage.cpython-38.pyc │ ├── 00_ppo_final_multimodal_6.cpython-38.pyc │ ├── 10_ppo_final_bsm_noImage.cpython-38.pyc │ ├── 10_ppo_final_multimodal_6.cpython-38.pyc │ ├── 25_ppo_final_bsm_noImage.cpython-38.pyc │ ├── 25_ppo_final_multimodal_6.cpython-38.pyc │ ├── 50_ppo_final_bsm_noImage.cpython-38.pyc │ └── 50_ppo_final_multimodal_6.cpython-38.pyc ├── ppo_16_OnlyImagenoise.py ├── ppo_18_Image_nobsm.py ├── ppo_final_Image_nobsm.py ├── ppo_final_Image_nobsm_5.py ├── ppo_final_Image_nobsm_1.py ├── ppo_16_OnlyBSMnoise.py ├── ppo_4.py ├── ppo_1.py ├── ppo_final_Image_nobsm_6.py ├── 00_ppo_final_Image_nobsm_6.py ├── 10_ppo_final_Image_nobsm_6.py ├── 25_ppo_final_Image_nobsm_6.py ├── 50_ppo_final_Image_nobsm_6.py ├── ppo_2.py ├── sac_multi_config.py ├── ppo_3.py ├── ppo_5.py ├── ppo_18_bsm_noImage.py ├── ppo_final_bsm_noImage.py ├── ppo_final_bsm_noImage_1.py ├── ppo_6.py ├── ppo_final_bsm_noImage_5.py ├── ppo_15.py ├── ppo_final_bsm_noImage_2.py ├── ppo_final_bsm_noImage_3.py ├── ppo_final_bsm_noImage_4.py ├── ppo_11.py ├── ppo_13.py ├── ppo_14.py ├── ppo_7.py ├── ppo_8.py ├── ppo_9.py ├── ppo_10.py ├── ppo_12.py ├── ppo_16.py ├── ppo_17.py ├── ppo_17_1.py ├── ppo_17_2.py ├── ppo_17_3.py ├── ppo_18.py ├── ppo_19.py ├── ppo_20.py ├── ppo_final.py ├── ppo_final_bsm_noImage_6.py ├── 00_ppo_final_bsm_noImage.py ├── 10_ppo_final_bsm_noImage.py ├── 25_ppo_final_bsm_noImage.py ├── 50_ppo_final_bsm_noImage.py ├── ppo_final_1.py ├── ppo_final_2.py ├── ppo_final_multimodal_5.py ├── ppo_final_multimodal_6.py ├── 00_ppo_final_multimodal_6.py ├── 10_ppo_final_multimodal_6.py ├── 25_ppo_final_multimodal_6.py └── 50_ppo_final_multimodal_6.py ├── README.md ├── environment.yml ├── train_Image_noBSM_final.py ├── train_multiModelNoise_final.py ├── train_multiModal_final.py ├── train_multiModalNoise_parallel.py ├── train_bsm_noImage_final.py ├── test_multi_model.py ├── test_parallel_model.py ├── test_baseline_paralllel.py ├── test_bsmNoise_PerfectImage.py ├── train_ImageNoise_noBSM_final.py ├── train_ImageNoise_PerfectBSM.py └── train_multiModelNoise_final_2.py /custom_envs/demo.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_envs/gymsumo.py: -------------------------------------------------------------------------------- 1 | gymsumo4.py -------------------------------------------------------------------------------- /custom_envs/bsmMerge.py: -------------------------------------------------------------------------------- 1 | bsmMerge6.py -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /custom_envs/MultiMerge.py: -------------------------------------------------------------------------------- 1 | MultiMerge_final.py -------------------------------------------------------------------------------- /customFeatureExtractor.py: -------------------------------------------------------------------------------- 1 | customFeatureExtractor3.py -------------------------------------------------------------------------------- /custom_envs/MultiMergeNoise.py: -------------------------------------------------------------------------------- 1 | MultiMergeNoise2.py -------------------------------------------------------------------------------- /custom_envs/MergeSingleBSMNoise.py: -------------------------------------------------------------------------------- 1 | MergeSingleBSMNoise2.py -------------------------------------------------------------------------------- /custom_envs/MultiMergeBSMNoise.py: -------------------------------------------------------------------------------- 1 | MultiMergeBSMNoise2.py -------------------------------------------------------------------------------- /custom_envs/MultiMergeParallel.py: -------------------------------------------------------------------------------- 1 | MultiMergeParallel_2.py -------------------------------------------------------------------------------- /config_file/customFeatureExtractor.py: -------------------------------------------------------------------------------- 1 | customFeatureExtractor3.py -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/Ramp_2.net.xml: -------------------------------------------------------------------------------- 1 | Ramp_2_1.net.xml -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2.rou.xml: -------------------------------------------------------------------------------- 1 | ramp_2_2.rou.xml -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2.sumocfg: -------------------------------------------------------------------------------- 1 | ramp_2_1.sumocfg -------------------------------------------------------------------------------- /custom_envs/ramp_env.py: -------------------------------------------------------------------------------- 1 | from rampTaperEnv import SumoRampEnv 2 | 3 | from gym.wrappers.rescale_action import RescaleAction 4 | -------------------------------------------------------------------------------- /custom_envs/__pycache__/bsmMerge.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/bsmMerge.cpython-38.pyc -------------------------------------------------------------------------------- /custom_envs/__pycache__/gymsumo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/gymsumo.cpython-38.pyc -------------------------------------------------------------------------------- /custom_envs/__pycache__/MultiMerge.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/custom_envs/__pycache__/MultiMerge.cpython-38.pyc -------------------------------------------------------------------------------- /custom_envs/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /config_file/__pycache__/ppo_18_bsm_noImage.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_18_bsm_noImage.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/customFeatureExtractor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/customFeatureExtractor.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/ppo_final_Image_nobsm_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_Image_nobsm_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/ppo_final_bsm_noImage_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_bsm_noImage_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/ppo_final_multimodal_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/ppo_final_multimodal_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/00_ppo_final_bsm_noImage.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/00_ppo_final_bsm_noImage.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/00_ppo_final_multimodal_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/00_ppo_final_multimodal_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/10_ppo_final_bsm_noImage.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/10_ppo_final_bsm_noImage.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/10_ppo_final_multimodal_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/10_ppo_final_multimodal_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/25_ppo_final_bsm_noImage.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/25_ppo_final_bsm_noImage.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/25_ppo_final_multimodal_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/25_ppo_final_multimodal_6.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/50_ppo_final_bsm_noImage.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/50_ppo_final_bsm_noImage.cpython-38.pyc -------------------------------------------------------------------------------- /config_file/__pycache__/50_ppo_final_multimodal_6.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/grbagwe/RAMRL/HEAD/config_file/__pycache__/50_ppo_final_multimodal_6.cpython-38.pyc -------------------------------------------------------------------------------- /custom_envs/sample_cartpole.py: -------------------------------------------------------------------------------- 1 | import gym 2 | env = gym.make('Hopper-v2') 3 | 4 | env.reset() 5 | 6 | for _ in range(100): 7 | env.render() 8 | env.step(env.action_space.sample()) 9 | env.close() 10 | -------------------------------------------------------------------------------- /custom_envs/readme.md: -------------------------------------------------------------------------------- 1 | # Custom Env 2 | Gym contains some predefined envs. In order to define our own env we can use the gym env's interface to define our own custom env. 3 | So it inherits the gym class 4 | 5 | 6 | self._setCmd(tc.VAR_SCREENSHOT, viewID, "tsii", 3, filename, width, height) 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAMRL-On-Ramp-Merging 2 | PyTorch implementation of: Gaurav Bagwe, Xiaoyong Yuan, Xianhao Chen, Lan Zhang, "RAMRL: Towards Robust On-Ramp Merging via Augmented Multimodal Reinforcement Learning", 2023 IEEE International Conference on Mobility, Operations, Services and Technologies (MOST) 3 | 4 | -------------------------------------------------------------------------------- /custom_envs/test_sumo_env.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | if 'SUMO_HOME' in os.environ: 3 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools') 4 | sys.path.append(tools) 5 | else: 6 | sys.exit("please declare environment variable 'SUMO_HOME'") 7 | 8 | 9 | sumoBinary = "/home/gauravb/sumo/bin/sumo-gui" 10 | sumoCmd = [sumoBinary, "-c", "./sumo_ConfigParallelRamp/ramp_parallel.sumocfg"] 11 | 12 | import traci 13 | traci.start(sumoCmd) 14 | step = 0 15 | while step < 1000: 16 | traci.simulationStep() 17 | step += 1 18 | 19 | traci.close() 20 | 21 | -------------------------------------------------------------------------------- /custom_envs/sumo_Config/ramp_11.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /custom_envs/sumo_Config/ramp_1.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2_1.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2.sumocfg_copy: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigParallelRamp/ramp_parallel.sumocfg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/test_traci.py: -------------------------------------------------------------------------------- 1 | import os,sys 2 | 3 | 4 | if 'SUMO_HOME' in os.environ: 5 | SUMO_HOME = os.environ['SUMO_HOME'] 6 | tools = os.path.join(os.environ['SUMO_HOME'], 'tools') 7 | print('sumo_loaded') 8 | tools = os.path.join(SUMO_HOME, 'tools') 9 | sys.path.append(tools) 10 | print(tools) 11 | 12 | import traci as traci 13 | import sumolib 14 | LIBSUMO = 'LIBSUMO_AS_TRACI' in os.environ 15 | 16 | sumoBinary = os.path.join(SUMO_HOME, "bin/sumo-gui") 17 | 18 | 19 | sumoConfigFile = "./custom_envs/sumo_ConfigParallelRamp/ramp_parallel.sumocfg" 20 | sumo_cmd = [sumoBinary, 21 | '-n', './custom_envs/sumo_ConfigParallelRamp/ramp_parallel.net.xml', 22 | '-r', './sumo_ConfigParallelRamp/ramp_parallel.rou.xml', 23 | '--waiting-time-memory', '10000', 24 | '--time-to-teleport', '-1', '--random'] 25 | traci.start(sumo_cmd) 26 | 27 | -------------------------------------------------------------------------------- /custom_envs/sumo_Config/ramp_1.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /config_file/ppo_16_OnlyImagenoise.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 500, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.001, 24 | 'alphaD': 0.25, 25 | 'rC': -400, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.008 29 | } 30 | sumoParameters = {'maxSpeed':30 , 31 | 'episodeLength': 600 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /config_file/ppo_18_Image_nobsm.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 18 | } 19 | 20 | weights = {'alphasl0': 0.5, 21 | 'alphasl1': 0.5, 22 | 'rSuccess': 150, 23 | 'alphaO': 0.1, 24 | 'rTimeAlpha': 0.001, 25 | 'alphaD': 0.25, 26 | 'rC': -150, 27 | 'alphaDistance': 0.5, 28 | 'alphaP': 0.5, 29 | 'alphaJ': 0.08 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 600 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/ppo_final_Image_nobsm.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 18 | } 19 | 20 | weights = {'alphasl0': 0.5, 21 | 'alphasl1': 0.5, 22 | 'rSuccess': 150, 23 | 'alphaO': 0.1, 24 | 'rTimeAlpha': 0.01, 25 | 'alphaD': 0.25, 26 | 'rC': -250, 27 | 'alphaDistance': 0.5, 28 | 'alphaP': 0.5, 29 | 'alphaJ': 0.08 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 600 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/ppo_final_Image_nobsm_5.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 18 | } 19 | 20 | weights = {'alphasl0': 0.5, 21 | 'alphasl1': 0.5, 22 | 'rSuccess': 150, 23 | 'alphaO': 0.1, 24 | 'rTimeAlpha': 0.01, 25 | 'alphaD': 0.25, 26 | 'rC': -150, 27 | 'alphaDistance': 0.5, 28 | 'alphaP': 0.5, 29 | 'alphaJ': 0.08 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 600 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/ppo_final_Image_nobsm_1.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 18 | } 19 | 20 | weights = {'alphasl0': 0.5, 21 | 'alphasl1': 0.5, 22 | 'rSuccess': 150, 23 | 'alphaO': 0.1, 24 | 'rTimeAlpha': 0.01, 25 | 'alphaD': 0.25, 26 | 'rC': -150, 27 | 'alphaDistance': 0.5, 28 | 'alphaP': 0.5, 29 | 'alphaJ': 0.08 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 600 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/ppo_16_OnlyBSMnoise.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | 7 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 8 | ) 9 | 10 | action_space = {'high': 2.6, 11 | 'low': -4.5} 12 | image_shape = (200, 768,3) 13 | obsspaces = { 14 | 'velocity': Box(low=0, high=70, shape=(7,)), 15 | 'xPos': Box(low=-100, high=400, shape=(7,)), 16 | 'yPos': Box(low=-100, high=400, shape=(7,)), 17 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 18 | 19 | } 20 | 21 | weights = {'alphasl0': 0.5, 22 | 'alphasl1': 0.5, 23 | 'rSuccess': 500, 24 | 'alphaO': 0.1, 25 | 'rTimeAlpha': 0.001, 26 | 'alphaD': 0.25, 27 | 'rC': -400, 28 | 'alphaDistance': 0.5, 29 | 'alphaP': 0.5, 30 | 'alphaJ': 0.008 31 | } 32 | sumoParameters = {'maxSpeed':30 , 33 | 'episodeLength': 600 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /config_file/ppo_4.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | 4 | 5 | 6 | 7 | 8 | 9 | policy_kwargs = dict( 10 | #features_extractor_class=CustomCombinedExtractor, 11 | features_extractor_kwargs=dict(cnn_output_dim=2046), 12 | 13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 14 | ) 15 | 16 | action_space = {'high': 3, 17 | 'low': -4.5} 18 | image_shape = (200, 768,3) 19 | obsspaces = { 20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 21 | 'velocity': Box(low=0, high=70, shape=(7,)), 22 | 'xPos': Box(low=-100, high=400, shape=(7,)), 23 | 'yPos': Box(low=-100, high=400, shape=(7,)), 24 | } 25 | 26 | weights = {'alphasl0': 0.5, 27 | 'alphasl1': 0.5, 28 | 'rSuccess': 250, 29 | 'alphaO': 0.1, 30 | 'rTimeAlpha': 1, 31 | 'alphaD': 0.25, 32 | 'rC': -250, 33 | 'alphaDistance': 0.5, 34 | 'alphaP': 0.5, 35 | 'alphaJ': 0.8 36 | } 37 | sumoParameters = {'maxSpeed':30 , 38 | 'episodeLength': 600 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /config_file/ppo_1.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | 4 | 5 | 6 | 7 | 8 | 9 | policy_kwargs = dict( 10 | #features_extractor_class=CustomCombinedExtractor, 11 | features_extractor_kwargs=dict(cnn_output_dim=2046), 12 | 13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 14 | ) 15 | 16 | action_space = {'high': 3, 17 | 'low': -4.5} 18 | image_shape = (200, 768,3) 19 | obsspaces = { 20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 21 | 'velocity': Box(low=0, high=70, shape=(7,)), 22 | 'xPos': Box(low=-100, high=400, shape=(7,)), 23 | 'yPos': Box(low=-100, high=400, shape=(7,)), 24 | } 25 | 26 | weights = {'alphasl0': 0.5, 27 | 'alphasl1': 0.5, 28 | 'rSuccess': 250, 29 | 'alphaO': 0.1, 30 | 'rTimeAlpha': 0.05, 31 | 'alphaD': 0.5, 32 | 'rC': -250, 33 | 'alphaDistance': 0.5, 34 | 'alphaP': 0.5, 35 | 'alphaJ': 0.8 36 | } 37 | sumoParameters = {'maxSpeed':30 , 38 | 'episodeLength': 600 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /config_file/ppo_final_Image_nobsm_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 150, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.01, 24 | 'alphaD': 0.25, 25 | 'rC': -150, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.8, 29 | 'noise_level': 1 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 180 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/00_ppo_final_Image_nobsm_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 150, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.01, 24 | 'alphaD': 0.25, 25 | 'rC': -150, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.8, 29 | 'noise_level': 0.00, 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 180 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/10_ppo_final_Image_nobsm_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 150, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.01, 24 | 'alphaD': 0.25, 25 | 'rC': -150, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.8, 29 | 'noise_level': 0.10 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 180 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/25_ppo_final_Image_nobsm_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 150, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.01, 24 | 'alphaD': 0.25, 25 | 'rC': -150, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.8, 29 | 'noise_level': 0.25, 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 180 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/50_ppo_final_Image_nobsm_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | } 18 | 19 | weights = {'alphasl0': 0.5, 20 | 'alphasl1': 0.5, 21 | 'rSuccess': 150, 22 | 'alphaO': 0.1, 23 | 'rTimeAlpha': 0.01, 24 | 'alphaD': 0.25, 25 | 'rC': -150, 26 | 'alphaDistance': 0.5, 27 | 'alphaP': 0.5, 28 | 'alphaJ': 0.8, 29 | 'noise_level': 0.50, 30 | } 31 | sumoParameters = {'maxSpeed':30 , 32 | 'episodeLength': 180 33 | } 34 | 35 | 36 | -------------------------------------------------------------------------------- /config_file/ppo_2.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | 4 | 5 | 6 | 7 | 8 | 9 | policy_kwargs = dict( 10 | #features_extractor_class=CustomCombinedExtractor, 11 | features_extractor_kwargs=dict(cnn_output_dim=2046), 12 | 13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 14 | ) 15 | 16 | action_space = {'high': 3, 17 | 'low': -4.5} 18 | image_shape = (200, 768,3) 19 | obsspaces = { 20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 21 | 'velocity': Box(low=0, high=70, shape=(7,)), 22 | 'xPos': Box(low=-100, high=400, shape=(7,)), 23 | 'yPos': Box(low=-100, high=400, shape=(7,)), 24 | } 25 | 26 | weights = {'alphasl0': 0.05, 27 | 'alphasl1': 0.05, 28 | 'rSuccess': 250, 29 | 'alphaO': 0.1, 30 | 'rTimeAlpha': 0.05, 31 | 'alphaD': 0.05, 32 | 'rC': -250, 33 | 'alphaDistance': 0.3, 34 | 'alphaP': 0.25, 35 | 'alphaJ': 0.3 36 | } 37 | sumoParameters = {'maxSpeed':30 , 38 | 'episodeLength': 600 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /config_file/sac_multi_config.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | 4 | 5 | 6 | 7 | 8 | 9 | policy_kwargs = dict( 10 | #features_extractor_class=CustomCombinedExtractor, 11 | features_extractor_kwargs=dict(cnn_output_dim=2046), 12 | 13 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 14 | ) 15 | 16 | action_space = {'high': 3, 17 | 'low': -4.5} 18 | image_shape = (200, 768,3) 19 | obsspaces = { 20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 21 | 'velocity': Box(low=0, high=70, shape=(7,)), 22 | 'xPos': Box(low=-100, high=400, shape=(7,)), 23 | 'yPos': Box(low=-100, high=400, shape=(7,)), 24 | } 25 | 26 | weights = {'alphasl0': 0.5, 27 | 'alphasl1': 0.5, 28 | 'rSuccess': 250, 29 | 'alphaO': 0.1, 30 | 'rTimeAlpha': 0.05, 31 | 'alphaD': 0.5, 32 | 'rC': -250, 33 | 'alphaDistance': 0.5, 34 | 'alphaP': 0.5, 35 | 'alphaJ': 0.8 36 | } 37 | sumoParameters = {'maxSpeed':30 , 38 | 'episodeLength': 600 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /config_file/ppo_3.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | import sys 4 | 5 | 6 | 7 | 8 | 9 | policy_kwargs = dict( 10 | #features_extractor_class=CustomCombinedExtractor, 11 | features_extractor_kwargs=dict(cnn_output_dim=2046), 12 | 13 | net_arch=[1024,512, dict(vf=[512, 128, 64,8], pi=[512, 128,64, 8])], 14 | ) 15 | 16 | action_space = {'high': 3, 17 | 'low': -4.5} 18 | image_shape = (200, 768,3) 19 | obsspaces = { 20 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 21 | 'velocity': Box(low=0, high=70, shape=(7,)), 22 | 'xPos': Box(low=-100, high=400, shape=(7,)), 23 | 'yPos': Box(low=-100, high=400, shape=(7,)), 24 | } 25 | 26 | weights = {'alphasl0': 0.05, 27 | 'alphasl1': 0.05, 28 | 'rSuccess': 250, 29 | 'alphaO': 0.1, 30 | 'rTimeAlpha': 0.05, 31 | 'alphaD': 0.05, 32 | 'rC': -250, 33 | 'alphaDistance': 0.3, 34 | 'alphaP': 0.25, 35 | 'alphaJ': 0.3 36 | } 37 | sumoParameters = {'maxSpeed':30 , 38 | 'episodeLength': 600 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /custom_envs/checkEnv.py: -------------------------------------------------------------------------------- 1 | from ramp_env3 import SumoRampEnv 2 | import os 3 | from gym.wrappers.rescale_action import RescaleAction 4 | from gym.wrappers.resize_observation import 5 | # simpath = "/home/gauravb/Documents/MichiganTech/Programming/CustomRampTraining/custom_envs/sumo_Config/ramp_1.sumocfg" 6 | # simpath = os.getcwd()+"/custom_envs/sumo_Config/ramp_1.sumocfg" 7 | from gym.spaces import Box 8 | env = SumoRampEnv() 9 | min_action = -1 10 | max_action = +1 11 | print('before \n ', env.action_space.high,'high', env.action_space.low ,'low') 12 | env = RescaleAction(env, min_action, max_action) 13 | for i in range(0,100): 14 | print(env.action_space.sample()) 15 | 16 | print('after \n ', env.action_space.high,'high', env.action_space.low ,'low') 17 | 18 | from gym.utils.env_checker import check_env 19 | 20 | check_env(env) 21 | # 22 | # for episode in range(1,10): 23 | # env.reset() 24 | # done= False 25 | # score = 0 26 | # while not done: 27 | # env.render() 28 | # action = +30 #env.action_space.sample() 29 | # state_, reward, done, info = env.step(action) 30 | # 31 | # score += reward 32 | # print(f'Episode {episode} score {score} ') 33 | -------------------------------------------------------------------------------- /config_file/ppo_5.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | } 21 | 22 | weights = {'alphasl0': 0.5, 23 | 'alphasl1': 0.5, 24 | 'rSuccess': 100, 25 | 'alphaO': 0.1, 26 | 'rTimeAlpha': 5, 27 | 'alphaD': 0.25, 28 | 'rC': -250, 29 | 'alphaDistance': 0.5, 30 | 'alphaP': 0.5, 31 | 'alphaJ': 0.8 32 | } 33 | sumoParameters = {'maxSpeed':30 , 34 | 'episodeLength': 600 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /config_file/ppo_18_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.001, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.01, 28 | 'alphaD': 0.25, 29 | 'rC': -250, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_1.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.01, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 100, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 10, 28 | 'alphaD': 0.25, 29 | 'rC': -250, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.8 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_5.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.01, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_15.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 500, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 1, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 1 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_2.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.01, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_3.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.00001, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_4.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.001, 28 | 'alphaD': 0.25, 29 | 'rC': -300, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.08 33 | } 34 | sumoParameters = {'maxSpeed':30 , 35 | 'episodeLength': 600 36 | } 37 | 38 | 39 | -------------------------------------------------------------------------------- /config_file/ppo_11.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 300, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_13.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 500, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_14.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 500, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 2.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 1 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_7.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.1, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_8.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_9.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_10.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.0001, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_12.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | #features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 500, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_16.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 500, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -400, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.008 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_17.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -100, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_17_1.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 1, 29 | 'alphaD': 0.25, 30 | 'rC': -100, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_17_2.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.1, 29 | 'alphaD': 0.25, 30 | 'rC': -100, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_17_3.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -100, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_18.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_19.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 200, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -200, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_20.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 100, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.001, 29 | 'alphaD': 0.25, 30 | 'rC': -100, 31 | 'alphaDistance': 2, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.008 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_final.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 600 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/ppo_final_bsm_noImage_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 21 | } 22 | 23 | weights = {'alphasl0': 0.5, 24 | 'alphasl1': 0.5, 25 | 'rSuccess': 150, 26 | 'alphaO': 0.1, 27 | 'rTimeAlpha': 0.01, 28 | 'alphaD': 0.25, 29 | 'rC': -150, 30 | 'alphaDistance': 0.5, 31 | 'alphaP': 0.5, 32 | 'alphaJ': 0.8, 33 | 'noise_level': 1 34 | } 35 | sumoParameters = {'maxSpeed':30 , 36 | 'episodeLength': 180 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /config_file/00_ppo_final_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08, 34 | 'noise_level': 0.0, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/10_ppo_final_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08, 34 | 'noise_level': 0.10, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/25_ppo_final_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08, 34 | 'noise_level': 0.25, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/50_ppo_final_bsm_noImage.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 3, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'velocity': Box(low=0, high=70, shape=(7,)), 17 | 'xPos': Box(low=-100, high=400, shape=(7,)), 18 | 'yPos': Box(low=-100, high=400, shape=(7,)), 19 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 20 | 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -250, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08, 34 | 'noise_level': 0.50, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/ppo_final_1.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level' :1 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/ppo_final_2.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 22 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 23 | } 24 | 25 | weights = {'alphasl0': 0.5, 26 | 'alphasl1': 0.5, 27 | 'rSuccess': 150, 28 | 'alphaO': 0.1, 29 | 'rTimeAlpha': 0.01, 30 | 'alphaD': 0.25, 31 | 'rC': -150, 32 | 'alphaDistance': 0.5, 33 | 'alphaP': 0.5, 34 | 'alphaJ': 0.08 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/ppo_final_multimodal_5.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.08, 34 | 'noise_level': 0 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 600 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/ppo_final_multimodal_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level': 1 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/00_ppo_final_multimodal_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level': 0.00, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/10_ppo_final_multimodal_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level': 0.010 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/25_ppo_final_multimodal_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level': 0.25, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /config_file/50_ppo_final_multimodal_6.py: -------------------------------------------------------------------------------- 1 | from gym.spaces import Box 2 | import numpy as np 3 | from config_file.customFeatureExtractor import CustomCombinedExtractor 4 | #from customFeatureExtractor import CustomCombinedExtractor 5 | policy_kwargs = dict( 6 | features_extractor_class=CustomCombinedExtractor, 7 | features_extractor_kwargs=dict(cnn_output_dim=2046), 8 | 9 | net_arch=[1024, dict(vf=[512, 128, 32], pi=[512, 128, 32])], 10 | ) 11 | 12 | action_space = {'high': 2.6, 13 | 'low': -4.5} 14 | image_shape = (200, 768,3) 15 | obsspaces = { 16 | 'image': Box(low=0, high=255, shape=image_shape, dtype=np.uint8), 17 | 'velocity': Box(low=0, high=70, shape=(7,)), 18 | 'xPos': Box(low=-100, high=400, shape=(7,)), 19 | 'yPos': Box(low=-100, high=400, shape=(7,)), 20 | 'acceleration': Box(low=-4.5, high=3, shape=(7,)), 21 | 'latSpeed': Box(low=0, high=70, shape=(7,)), 22 | } 23 | 24 | weights = {'alphasl0': 0.5, 25 | 'alphasl1': 0.5, 26 | 'rSuccess': 150, 27 | 'alphaO': 0.1, 28 | 'rTimeAlpha': 0.01, 29 | 'alphaD': 0.25, 30 | 'rC': -150, 31 | 'alphaDistance': 0.5, 32 | 'alphaP': 0.5, 33 | 'alphaJ': 0.8, 34 | 'noise_level': 0.50, 35 | } 36 | sumoParameters = {'maxSpeed':30 , 37 | 'episodeLength': 180 38 | } 39 | 40 | 41 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: SB3_training 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _openmp_mutex=4.5=1_gnu 7 | - ca-certificates=2022.3.29=h06a4308_0 8 | - certifi=2021.10.8=py38h06a4308_2 9 | - ld_impl_linux-64=2.35.1=h7274673_9 10 | - libffi=3.3=he6710b0_2 11 | - libgcc-ng=9.3.0=h5101ec6_17 12 | - libgomp=9.3.0=h5101ec6_17 13 | - libstdcxx-ng=9.3.0=hd4cf53a_17 14 | - ncurses=6.3=h7f8727e_2 15 | - openssl=1.1.1n=h7f8727e_0 16 | - pip=21.2.4=py38h06a4308_0 17 | - python=3.8.13=h12debd9_0 18 | - readline=8.1.2=h7f8727e_1 19 | - setuptools=58.0.4=py38h06a4308_0 20 | - sqlite=3.38.2=hc218d9a_0 21 | - tk=8.6.11=h1ccaba5_0 22 | - wheel=0.37.1=pyhd3eb1b0_0 23 | - xz=5.2.5=h7b6447c_0 24 | - zlib=1.2.11=h7f8727e_4 25 | - pip: 26 | - absl-py==1.0.0 27 | - ale-py==0.7.4 28 | - autorom==0.4.2 29 | - autorom-accept-rom-license==0.4.2 30 | - cachetools==5.0.0 31 | - charset-normalizer==2.0.12 32 | - click==8.1.2 33 | - cloudpickle==2.0.0 34 | - cycler==0.11.0 35 | - docker-pycreds==0.4.0 36 | - fonttools==4.31.2 37 | - gitdb==4.0.9 38 | - gitpython==3.1.27 39 | - google-auth==2.6.2 40 | - google-auth-oauthlib==0.4.6 41 | - grpcio==1.44.0 42 | - gym==0.21.0 43 | - idna==3.3 44 | - importlib-metadata==4.11.3 45 | - importlib-resources==5.6.0 46 | - kiwisolver==1.4.2 47 | - markdown==3.3.6 48 | - matplotlib==3.5.1 49 | - numpy==1.22.3 50 | - oauthlib==3.2.0 51 | - opencv-python==4.5.5.64 52 | - packaging==21.3 53 | - pandas==1.4.2 54 | - pathtools==0.1.2 55 | - pettingzoo==1.17.0 56 | - pillow==9.1.0 57 | - promise==2.3 58 | - protobuf==3.20.0 59 | - psutil==5.9.0 60 | - pyasn1==0.4.8 61 | - pyasn1-modules==0.2.8 62 | - pyparsing==3.0.7 63 | - python-dateutil==2.8.2 64 | - pytz==2022.1 65 | - pyvirtualdisplay==3.0 66 | - pyyaml==6.0 67 | - requests==2.27.1 68 | - requests-oauthlib==1.3.1 69 | - rsa==4.8 70 | - sentry-sdk==1.5.9 71 | - setproctitle==1.2.2 72 | - shortuuid==1.0.8 73 | - six==1.16.0 74 | - smmap==5.0.0 75 | - stable-baselines3==1.5.0 76 | - tensorboard==2.8.0 77 | - tensorboard-data-server==0.6.1 78 | - tensorboard-plugin-wit==1.8.1 79 | - torch==1.11.0 80 | - tqdm==4.64.0 81 | - typing-extensions==4.1.1 82 | - urllib3==1.26.9 83 | - wandb==0.12.15 84 | - werkzeug==2.1.1 85 | - zipp==3.8.0 86 | -------------------------------------------------------------------------------- /custom_envs/sample_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import random 3 | from gym.spaces import Discrete, Box 4 | import numpy as np 5 | from gym import Env 6 | gym.logger.set_level(40) 7 | class customEnv(Env): 8 | ''' four key functions 9 | init 10 | step 11 | random 12 | reset 13 | ''' 14 | # env for shower temprature https://www.youtube.com/watch?v=bD6V3rcr_54 15 | def __init__(self): 16 | # action space for example down , stay , up 17 | self.actionSpace = Discrete(3) 18 | # oversation space allows us to have continous values over the ranfe of layers 19 | self.observationSpace = Box(low= np.array([0]),high=np.array([100]), ) 20 | 21 | #state is the information from the env 22 | 23 | self.state = 38 - random.randint(-3,3) 24 | self.episodeLength = 60 # secs 25 | 26 | def step(self, action): 27 | # how we rake the action 28 | 29 | # apply action 30 | # here the actions are 0,1 ,2 to reduce the temp, stay the temp and increase it 31 | # if 0 : state = state + 0 -1 to reduce the state by 1 32 | # if 1 : state = state + 1-1 to keep the same state 33 | # if 2 : state = state + 2-1 to increaset the temp by 1 34 | 35 | self.state += action -1 36 | 37 | # reduce the episode length 38 | self.episodeLength -=1 #sec 39 | 40 | # calc reward 41 | # the aim is that the temp remains between 17 and 39 so we give a reward if it is in this temp 42 | # else we give a negative reward 43 | 44 | if self.state >= 17 and self.state<= 39 : 45 | reward = 1 46 | else: 47 | reward = -1 48 | 49 | # check if shower is done 50 | if self.episodeLength == 0: done = True 51 | else: done = False 52 | 53 | # some random noise to the state 54 | self.state += random.randint(-1,1) 55 | 56 | info ={} 57 | 58 | # how open ai requires 59 | return self.state, reward, done, info 60 | 61 | def render(self): 62 | # if to visualize 63 | pass 64 | def reset(self): 65 | self.state = 38 - random.randint(-3,3) 66 | self.episodeLength = 60 67 | return self.state 68 | env = customEnv() 69 | 70 | for episode in range(1,100): 71 | env.reset() 72 | done= False 73 | score = 0 74 | while not done: 75 | env.render() 76 | action = env.actionSpace.sample() 77 | state_, reward, done, info = env.step(action) 78 | 79 | score += reward 80 | print(f'Episode {episode} score {score} ') 81 | -------------------------------------------------------------------------------- /custom_envs/trainRamp.py: -------------------------------------------------------------------------------- 1 | import ray 2 | from ramp_env3 import SumoRampEnv 3 | from ray.rllib.agents.ppo import PPOTrainer 4 | 5 | 6 | import ray 7 | import ray.rllib.agents.ppo as ppo 8 | from ray.tune.logger import pretty_print 9 | 10 | 11 | '''' 12 | https://docs.ray.io/en/latest/rllib/rllib-models.html 13 | 14 | the vision network case, you’ll probably have to configure conv_filters, if your environment observations have custom 15 | sizes. For example, "model": {"dim": 42, "conv_filters": [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1]]} for 16 | 42x42 observations. Thereby, always make sure that the last Conv2D output has an output shape of [B, 1, 1, X] 17 | ([B, X, 1, 1] for PyTorch), where B=batch and X=last Conv2D layer’s number of filters, so that RLlib can flatten it. 18 | An informative error will be thrown if this is not the case. 19 | ''' 20 | config = ppo.DEFAULT_CONFIG.copy() 21 | 22 | config = { 23 | # this is a dict 24 | # "env": SumoRampEnv, 25 | "num_workers": 1, 26 | # "framework" : "tf2", 27 | "num_gpus": 1, 28 | "model": { 29 | "dim": 512, 30 | "conv_filters": [ # [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1], [1000, 1, 512]],#, [1000,512, 1]], 31 | [96, 11, 4], # 126 32 | [256, 5, 2], # 61 33 | [384, 3, 2], # 30 34 | [384, 3, 2], # 14 35 | [256, 3, 2], # 6 36 | [256, 3, 2], # 2 37 | [256, 1, 128], 38 | 39 | ], # lenet 40 | "post_fcnet_hiddens": [256, 256], 41 | # "post_fcnet_activation": "relu", 42 | # "fcnet_hiddens" : [10, 10 ], 43 | # "fcnet_activation" : "relu", 44 | 45 | }, 46 | "evaluation_num_workers": 1, 47 | # Only for evaluation runs, render the env. 48 | "evaluation_config": { 49 | "render_env": True, 50 | } 51 | 52 | } 53 | # 54 | # from ray import tune 55 | # 56 | # def tune_func(config): 57 | # tune.util.wait_for_gpu() 58 | # train() 59 | # 60 | # tune.run(PPOTrainer, config=config, verbose=3, 61 | # # resources_per_trial={"cpu": 12, "gpu": 1} , 62 | # reuse_actors=True, 63 | # stop={"training_iteration": 10e3}) 64 | 65 | 66 | 67 | 68 | 69 | ray.init() 70 | 71 | 72 | trainer = ppo.PPOTrainer(config=config, env=SumoRampEnv) 73 | 74 | # Can optionally call trainer.restore(path) to load a checkpoint. 75 | 76 | for i in range(1000): 77 | # Perform one iteration of training the policy with PPO 78 | result = trainer.train() 79 | print(pretty_print(result)) 80 | 81 | if i == 0: 82 | checkpoint = trainer.save() 83 | print("checkpoint saved at", checkpoint) 84 | ray.shutdown() 85 | 86 | -------------------------------------------------------------------------------- /custom_envs/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "id": "61547c8f", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import gym\n", 12 | "import random" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 5, 18 | "id": "e2a97f0d", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "image = np.random.randint(0,255, size=(512,512,3))" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 13, 28 | "id": "2fbaa74d", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "state_velocity = np.random.randint(-10,10, size = (7))\n", 33 | "state_pos_x = np.random.randint(-10,10, size = (7))\n", 34 | "state_pos_y = np.random.randint(-10,10, size = (7))" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 15, 40 | "id": "cf4dcf9c", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "ename": "ValueError", 45 | "evalue": "operands could not be broadcast together with shapes (512,512,3) (7,) ", 46 | "output_type": "error", 47 | "traceback": [ 48 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 49 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 50 | "\u001b[0;32m/var/folders/ph/y0swjc297dx47xlplt3w27xr0000gn/T/ipykernel_3030/3150006869.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mimage\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_pos_x\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_pos_y\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstate_velociy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 51 | "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (512,512,3) (7,) " 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "image + state_pos_x + state_pos_y + state_velociy" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "id": "5f351282", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [] 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python [conda env:base-flow]", 71 | "language": "python", 72 | "name": "conda-env-base-flow-py" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.9.7" 85 | } 86 | }, 87 | "nbformat": 4, 88 | "nbformat_minor": 5 89 | } 90 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigParallelRamp/ramp_parallel.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /custom_envs/MultiMergeBSMNoise2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from abc import ABC 3 | 4 | import gym 5 | from custom_envs.gymsumo import SumoRamp 6 | import traci 7 | import numpy as np 8 | from custom_envs.bsmMerge import BsmMerge, BsmMergeAllRewards 9 | from typing import Callable, Optional, Tuple, Union 10 | from scipy.ndimage.filters import gaussian_filter 11 | 12 | 13 | 14 | 15 | class MultiMerge(BsmMerge): 16 | 17 | def getObservations(self): 18 | # returns observations of the state 19 | 20 | state_speed = np.ones(7) * self.maxSpeed 21 | state_position_x = np.ones(7) 22 | state_position_y = np.ones(7) 23 | state_acc = np.zeros(7) 24 | 25 | 26 | vehicle_ids = self.getVehicleIds() 27 | state_image = np.array(self.render()) 28 | # state_image = gaussian_filter(state_image , sigma=1) 29 | 30 | if vehicle_ids: 31 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids) 32 | 33 | for i, vehicle in enumerate(obsLane0): 34 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0]) 35 | if vehicle: 36 | 37 | if vehicle[0] not in ["no_vehicle","", None]: 38 | 39 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 40 | state_acc[i] = traci.vehicle.getAcceleration(vehicle[0]) 41 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 42 | 43 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 44 | for i, vehicle in enumerate(obsLane1, len(obsLane0)): 45 | if vehicle: 46 | if vehicle not in ["no_vehicle","", None]: 47 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 48 | state_acc[i] = traci.vehicle.getAcceleration(vehicle[0]) 49 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 50 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 51 | 52 | # rl state information 53 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id) 54 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0] 55 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1] 56 | state_acc[-1] = traci.vehicle.getAcceleration(self.rl_car_id) 57 | 58 | #state_speed = np.clip(state_speed, 0, self.maxSpeed) 59 | #state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low), 60 | # abs(self.observation_space['xPos'].high)) 61 | #state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low), 62 | # abs(self.observation_space['yPos'].high)) 63 | sigmavalue = 1 64 | state_speed =gaussian_filter(state_speed , sigma= sigmavalue ) 65 | state_position_x = gaussian_filter(state_position_x , sigma= sigmavalue ) 66 | state_position_y = gaussian_filter( state_position_y, sigma= sigmavalue) 67 | state_acc = gaussian_filter(state_acc , sigma = sigmavalue) 68 | 69 | 70 | state = { 71 | 'image': state_image.astype(np.uint8), 72 | 'xPos': np.array(state_position_x, dtype=np.float32), 73 | 'yPos': np.array(state_position_y, dtype=np.float32), 74 | 'velocity': np.array(state_speed, dtype=np.float32), 75 | 'acceleration' : np.array(state_acc, dtype= np.float32) 76 | } 77 | 78 | return state 79 | 80 | 81 | -------------------------------------------------------------------------------- /train_Image_noBSM_final.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | from gym.spaces import Box 11 | from custom_envs.MultiMerge import Image_No_BSM as MultiMerge 12 | 13 | import os 14 | import wandb, glob 15 | from wandb.integration.sb3 import WandbCallback 16 | from stable_baselines3.common.monitor import Monitor 17 | import argparse 18 | 19 | parser = argparse.ArgumentParser(description='train PPO multi model') 20 | parser.add_argument("config", help="Config file") 21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 22 | 23 | args = parser.parse_args() 24 | module = __import__("config_file",fromlist= [args.config]) 25 | exp_config = getattr(module, args.config) 26 | 27 | timesteps = 500000 28 | 29 | config = { 30 | "policy_type": "MultiInputPolicy", 31 | "total_timesteps": timesteps, 32 | "env_name": "SumoRamp()", 33 | } 34 | pdir = os.path.abspath('../') 35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 36 | 37 | policy_kwargs = exp_config.policy_kwargs 38 | 39 | action_space = exp_config.action_space 40 | 41 | image_shape = exp_config.image_shape 42 | obsspaces = exp_config.obsspaces 43 | 44 | weights = exp_config.weights 45 | sumoParameters = exp_config.sumoParameters 46 | 47 | min_action = -1 48 | max_action = +1 49 | 50 | video_folder = dir + '/logs/videos/' 51 | video_length = 600 52 | 53 | def make_env(env_id, rank, seed=0, monitor_dir = None): 54 | """ 55 | Utility function for multiprocessed env. 56 | 57 | :param env_id: (str) the environment ID 58 | :param num_env: (int) the number of environments you wish to have in subprocesses 59 | :param seed: (int) the inital seed for RNG 60 | :param rank: (int) index of the subprocess 61 | """ 62 | 63 | def _init(): 64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 65 | isBaseline=False,render=0) 66 | env.seed(seed + rank) 67 | env = RescaleAction(env, min_action, max_action) 68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 69 | if monitor_path is not None: 70 | os.makedirs(monitor_dir, exist_ok=True) 71 | return env 72 | set_random_seed(seed) 73 | return _init 74 | 75 | 76 | if __name__ == '__main__': 77 | run = wandb.init( 78 | project="RMMRL-Training", 79 | name=f"Image+NoBSM_{args.config}", 80 | dir=dir, 81 | config=config, 82 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 83 | monitor_gym=True, # auto-upload the videos of agents playing the game 84 | save_code=True, # optional 85 | magic=True 86 | ) 87 | 88 | env_id = "MultiMerge" 89 | num_cpu =16# Number of processes to use 90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 93 | env = VecMonitor(venv=env) 94 | model = PPO(config["policy_type"], 95 | env, 96 | verbose=3, 97 | policy_kwargs=policy_kwargs, 98 | gamma=0.99, 99 | n_steps=512, 100 | learning_rate=0.0003, 101 | vf_coef=0.042202, 102 | max_grad_norm=0.9, 103 | gae_lambda=0.95, 104 | n_epochs=10, 105 | clip_range=0.2, 106 | batch_size=256, 107 | tensorboard_log=f"{dir}" 108 | ) 109 | 110 | model.learn( 111 | total_timesteps=int(config["total_timesteps"]), 112 | callback=WandbCallback( 113 | gradient_save_freq=5, 114 | model_save_freq=5000, 115 | model_save_path=f"{dir}/models/{run.id}", 116 | verbose=2, 117 | ), ) 118 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 119 | env.save(stats_path) 120 | 121 | -------------------------------------------------------------------------------- /train_multiModelNoise_final.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | from gym.spaces import Box 11 | from custom_envs.MultiMerge import BSM_Noise_Image_Noise as MultiMerge 12 | 13 | import os 14 | import wandb, glob 15 | from wandb.integration.sb3 import WandbCallback 16 | from stable_baselines3.common.monitor import Monitor 17 | import argparse 18 | 19 | parser = argparse.ArgumentParser(description='train PPO multi model') 20 | parser.add_argument("config", help="Config file") 21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 22 | 23 | args = parser.parse_args() 24 | module = __import__("config_file",fromlist= [args.config]) 25 | exp_config = getattr(module, args.config) 26 | 27 | timesteps = 500000 28 | 29 | config = { 30 | "policy_type": "MultiInputPolicy", 31 | "total_timesteps": timesteps, 32 | "env_name": "SumoRamp()", 33 | } 34 | pdir = os.path.abspath('../') 35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 36 | 37 | policy_kwargs = exp_config.policy_kwargs 38 | 39 | action_space = exp_config.action_space 40 | 41 | image_shape = exp_config.image_shape 42 | obsspaces = exp_config.obsspaces 43 | 44 | weights = exp_config.weights 45 | sumoParameters = exp_config.sumoParameters 46 | 47 | min_action = -1 48 | max_action = +1 49 | 50 | video_folder = dir + '/logs/videos/' 51 | video_length = 600 52 | 53 | def make_env(env_id, rank, seed=0, monitor_dir = None): 54 | """ 55 | Utility function for multiprocessed env. 56 | 57 | :param env_id: (str) the environment ID 58 | :param num_env: (int) the number of environments you wish to have in subprocesses 59 | :param seed: (int) the inital seed for RNG 60 | :param rank: (int) index of the subprocess 61 | """ 62 | 63 | def _init(): 64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 65 | isBaseline=False,render=0) 66 | env.seed(seed + rank) 67 | env = RescaleAction(env, min_action, max_action) 68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 69 | if monitor_path is not None: 70 | os.makedirs(monitor_dir, exist_ok=True) 71 | return env 72 | set_random_seed(seed) 73 | return _init 74 | 75 | if __name__ == '__main__': 76 | run = wandb.init( 77 | project="RMMRL-Training", 78 | name=f"MultiModal_noise", 79 | dir=dir, 80 | config=config, 81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 82 | monitor_gym=True, # auto-upload the videos of agents playing the game 83 | save_code=True, # optional 84 | magic=True 85 | ) 86 | 87 | env_id = "MultiMerge" 88 | num_cpu = 16# Number of processes to use 89 | # Create the vectorized environment 90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 93 | 94 | env = VecMonitor(venv=env) 95 | model = PPO( 96 | config["policy_type"], 97 | env, 98 | verbose=3, 99 | policy_kwargs=policy_kwargs, 100 | gamma=0.99, 101 | n_steps=512, 102 | learning_rate=0.0003, 103 | vf_coef=0.042202, 104 | max_grad_norm=0.9, 105 | gae_lambda=0.95, 106 | n_epochs=10, 107 | clip_range=0.2, 108 | batch_size=256, 109 | tensorboard_log=f"{dir}", 110 | ) 111 | 112 | model.learn( 113 | total_timesteps=int(config["total_timesteps"]), 114 | callback=WandbCallback( 115 | gradient_save_freq=5, 116 | model_save_freq=5000, 117 | model_save_path=f"{dir}/models/{run.id}", 118 | verbose=2, 119 | ), ) 120 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 121 | env.save(stats_path) 122 | 123 | -------------------------------------------------------------------------------- /train_multiModal_final.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | from gym.spaces import Box 11 | from custom_envs.MultiMerge import MultiMergeAllRewards as MultiMerge 12 | import os 13 | import wandb, glob 14 | from wandb.integration.sb3 import WandbCallback 15 | from stable_baselines3.common.monitor import Monitor 16 | import argparse 17 | 18 | parser = argparse.ArgumentParser(description='train PPO multi model') 19 | parser.add_argument("config", help="Config file") 20 | parser.add_argument("--render", default=0 , help = "should render") 21 | 22 | args = parser.parse_args() 23 | module = __import__("config_file",fromlist= [args.config]) 24 | exp_config = getattr(module, args.config) 25 | 26 | timesteps = 500000 27 | 28 | config = { 29 | "policy_type": "MultiInputPolicy", 30 | "total_timesteps": timesteps, 31 | "env_name": "SumoRamp()", 32 | } 33 | pdir = os.path.abspath('../') 34 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 35 | 36 | policy_kwargs = exp_config.policy_kwargs 37 | 38 | action_space = exp_config.action_space 39 | 40 | image_shape = exp_config.image_shape 41 | obsspaces = exp_config.obsspaces 42 | 43 | weights = exp_config.weights 44 | sumoParameters = exp_config.sumoParameters 45 | 46 | min_action = -1 47 | max_action = +1 48 | 49 | video_folder = dir + '/logs/videos/' 50 | video_length = 600 51 | 52 | def make_env(env_id, rank, seed=0, monitor_dir = None): 53 | """ 54 | Utility function for multiprocessed env. 55 | 56 | :param env_id: (str) the environment ID 57 | :param num_env: (int) the number of environments you wish to have in subprocesses 58 | :param seed: (int) the inital seed for RNG 59 | :param rank: (int) index of the subprocess 60 | """ 61 | 62 | def _init(): 63 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 64 | isBaseline=False,render=0) 65 | env.seed(seed + rank) 66 | env = RescaleAction(env, min_action, max_action) 67 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 68 | if monitor_path is not None: 69 | os.makedirs(monitor_dir, exist_ok=True) 70 | return env 71 | set_random_seed(seed) 72 | return _init 73 | 74 | if __name__ == '__main__': 75 | run = wandb.init( 76 | project="RMMRL-Training", 77 | name=f"MultiModal_NoNoise", 78 | dir=dir, 79 | config=config, 80 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 81 | monitor_gym=True, # auto-upload the videos of agents playing the game 82 | save_code=True, # optional 83 | magic=True 84 | ) 85 | 86 | env_id = "MultiMerge" 87 | num_cpu = 16# Number of processes to use 88 | # Create the vectorized environment 89 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 90 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 91 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 92 | 93 | env = VecMonitor(venv=env) 94 | model = PPO( 95 | config["policy_type"], 96 | env, 97 | verbose=3, 98 | policy_kwargs=policy_kwargs, 99 | gamma=0.99, 100 | n_steps=512, 101 | learning_rate=0.0001, 102 | vf_coef=0.042202, 103 | max_grad_norm=0.9, 104 | gae_lambda=0.95, 105 | n_epochs=10, 106 | clip_range=0.2, 107 | batch_size=256, 108 | tensorboard_log=f"{dir}", 109 | ) 110 | 111 | 112 | model.learn( 113 | total_timesteps=int(config["total_timesteps"]), 114 | callback=WandbCallback( 115 | gradient_save_freq=5, 116 | model_save_freq=5000, 117 | model_save_path=f"{dir}/models/{run.id}", 118 | verbose=2, 119 | ), 120 | ) 121 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 122 | env.save(stats_path) 123 | 124 | -------------------------------------------------------------------------------- /train_multiModalNoise_parallel.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | from gym.spaces import Box 11 | from custom_envs.MultiMergeParallel import BSM_Noise_Image_Noise as MultiMerge 12 | 13 | import os 14 | import wandb, glob 15 | from wandb.integration.sb3 import WandbCallback 16 | from stable_baselines3.common.monitor import Monitor 17 | import argparse 18 | 19 | parser = argparse.ArgumentParser(description='train PPO multi model') 20 | parser.add_argument("config", help="Config file") 21 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 22 | 23 | args = parser.parse_args() 24 | module = __import__("config_file",fromlist= [args.config]) 25 | exp_config = getattr(module, args.config) 26 | 27 | timesteps = 3000000 28 | 29 | config = { 30 | "policy_type": "MultiInputPolicy", 31 | "total_timesteps": timesteps, 32 | "env_name": "SumoRamp()", 33 | } 34 | pdir = os.path.abspath('../') 35 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 36 | 37 | policy_kwargs = exp_config.policy_kwargs 38 | 39 | action_space = exp_config.action_space 40 | 41 | image_shape = exp_config.image_shape 42 | obsspaces = exp_config.obsspaces 43 | 44 | weights = exp_config.weights 45 | sumoParameters = exp_config.sumoParameters 46 | 47 | min_action = -1 48 | max_action = +1 49 | 50 | video_folder = dir + '/logs/videos/' 51 | video_length = 600 52 | 53 | def make_env(env_id, rank, seed=0, monitor_dir = None): 54 | """ 55 | Utility function for multiprocessed env. 56 | 57 | :param env_id: (str) the environment ID 58 | :param num_env: (int) the number of environments you wish to have in subprocesses 59 | :param seed: (int) the inital seed for RNG 60 | :param rank: (int) index of the subprocess 61 | """ 62 | 63 | def _init(): 64 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 65 | isBaseline=False,render=0) 66 | env.seed(seed + rank) 67 | env = RescaleAction(env, min_action, max_action) 68 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 69 | if monitor_path is not None: 70 | os.makedirs(monitor_dir, exist_ok=True) 71 | return env 72 | set_random_seed(seed) 73 | return _init 74 | 75 | if __name__ == '__main__': 76 | run = wandb.init( 77 | project="Robust-OnRampMerging-Training", 78 | name=f"ParallelMultiModal_noise", 79 | dir=dir, 80 | config=config, 81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 82 | monitor_gym=True, # auto-upload the videos of agents playing the game 83 | save_code=True, # optional 84 | magic=True 85 | ) 86 | 87 | env_id = "MultiMerge" 88 | num_cpu = 16# Number of processes to use 89 | # Create the vectorized environment 90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 93 | 94 | env = VecMonitor(venv=env) 95 | model = PPO( 96 | config["policy_type"], 97 | env, 98 | verbose=3, 99 | policy_kwargs=policy_kwargs, 100 | gamma=0.99, 101 | n_steps=512, 102 | learning_rate=0.0003, 103 | vf_coef=0.042202, 104 | max_grad_norm=0.9, 105 | gae_lambda=0.95, 106 | n_epochs=10, 107 | clip_range=0.2, 108 | batch_size=256, 109 | tensorboard_log=f"{dir}", 110 | ) 111 | 112 | model.learn( 113 | total_timesteps=int(config["total_timesteps"]), 114 | callback=WandbCallback( 115 | gradient_save_freq=5, 116 | model_save_freq=5000, 117 | model_save_path=f"{dir}/models/{run.id}", 118 | verbose=2, 119 | ), ) 120 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 121 | env.save(stats_path) 122 | 123 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigParallelRamp/ramp_parallel.rou.xml_old: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 14 | 15 | 16 | 17 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /train_bsm_noImage_final.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | from gym.spaces import Box 11 | from custom_envs.MultiMerge import BSM_No_Image as MultiMerge 12 | import os 13 | import wandb, glob 14 | from wandb.integration.sb3 import WandbCallback 15 | from stable_baselines3.common.monitor import Monitor 16 | import argparse 17 | 18 | parser = argparse.ArgumentParser(description='train PPO multi model') 19 | parser.add_argument("config", help="Config file") 20 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma valu") 21 | 22 | args = parser.parse_args() 23 | module = __import__("config_file",fromlist= [args.config]) 24 | exp_config = getattr(module, args.config) 25 | 26 | timesteps = 500000 27 | 28 | config = { 29 | "policy_type": "MultiInputPolicy", 30 | "total_timesteps": timesteps, 31 | "env_name": "SumoRamp()", 32 | } 33 | pdir = os.path.abspath('../') 34 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 35 | 36 | policy_kwargs = exp_config.policy_kwargs 37 | 38 | action_space = exp_config.action_space 39 | 40 | image_shape = exp_config.image_shape 41 | obsspaces = exp_config.obsspaces 42 | 43 | weights = exp_config.weights 44 | sumoParameters = exp_config.sumoParameters 45 | 46 | min_action = -1 47 | max_action = +1 48 | 49 | video_folder = dir + '/logs/videos/' 50 | video_length = 600 51 | 52 | def make_env(env_id, rank, seed=0, monitor_dir = None): 53 | """ 54 | Utility function for multiprocessed env. 55 | 56 | :param env_id: (str) the environment ID 57 | :param num_env: (int) the number of environments you wish to have in subprocesses 58 | :param seed: (int) the inital seed for RNG 59 | :param rank: (int) index of the subprocess 60 | """ 61 | 62 | def _init(): 63 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 64 | isBaseline=False,render=0) 65 | env.seed(seed + rank) 66 | env = RescaleAction(env, min_action, max_action) 67 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 68 | if monitor_path is not None: 69 | os.makedirs(monitor_dir, exist_ok=True) 70 | return env 71 | set_random_seed(seed) 72 | return _init 73 | 74 | 75 | if __name__ == '__main__': 76 | run = wandb.init( 77 | project="RMMRL-Training", 78 | name=f"BSM+NoImage", 79 | dir=dir, 80 | config=config, 81 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 82 | monitor_gym=True, # auto-upload the videos of agents playing the game 83 | save_code=True, # optional 84 | magic=True 85 | ) 86 | 87 | env_id = "MultiMerge" 88 | num_cpu = 16# Number of processes to use 89 | # Create the vectorized environment 90 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 91 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 92 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 93 | 94 | env = VecMonitor(venv=env) 95 | model = PPO( 96 | config["policy_type"], 97 | env, 98 | verbose=3, 99 | policy_kwargs=policy_kwargs, 100 | gamma=0.99, 101 | n_steps=512, 102 | learning_rate=0.0003, 103 | vf_coef=0.042202, 104 | max_grad_norm=0.9, 105 | gae_lambda=0.95, 106 | n_epochs=10, 107 | clip_range=0.2, 108 | batch_size=256, 109 | tensorboard_log=f"{dir}", 110 | ) 111 | 112 | 113 | model.learn( 114 | total_timesteps=int(config["total_timesteps"]), 115 | callback=WandbCallback( 116 | gradient_save_freq=5, 117 | model_save_freq=5000, 118 | model_save_path=f"{dir}/models/{run.id}", 119 | verbose=2, 120 | ), 121 | ) 122 | model.save(f"{dir}/models/{run.id}/vecstats/") 123 | 124 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 125 | env.save(stats_path) 126 | -------------------------------------------------------------------------------- /test_multi_model.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 6 | from stable_baselines3.common.env_util import make_vec_env 7 | from stable_baselines3.common.utils import set_random_seed 8 | from gym.wrappers.rescale_action import RescaleAction 9 | from gym.spaces import Box 10 | from custom_envs.MultiMerge import MultiMergeAllRewards as MultiMerge 11 | 12 | 13 | import os 14 | import wandb, glob 15 | from wandb.integration.sb3 import WandbCallback 16 | from stable_baselines3.common.monitor import Monitor 17 | import argparse 18 | 19 | parser = argparse.ArgumentParser(description='test PPO multi model') 20 | parser.add_argument("dir", help="model path") 21 | parser.add_argument("--render", default =0, help = "should render default 0") 22 | parser.add_argument("stats_load", help="vec norm stats file") 23 | 24 | parser.add_argument("config", help="Config file") 25 | args = parser.parse_args() 26 | 27 | module = __import__("config_file",fromlist= [args.config]) 28 | exp_config = getattr(module, args.config) 29 | 30 | config = { 31 | "policy_type": "MultiInputPolicy", 32 | "env_name": "SumoRamp()", 33 | } 34 | 35 | 36 | pdir = os.path.abspath('../') 37 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 38 | 39 | 40 | policy_kwargs = exp_config.policy_kwargs 41 | 42 | action_space = exp_config.action_space 43 | 44 | image_shape = exp_config.image_shape 45 | obsspaces = exp_config.obsspaces 46 | 47 | weights = exp_config.weights 48 | sumoParameters = exp_config.sumoParameters 49 | 50 | min_action = -1 51 | max_action = +1 52 | 53 | video_folder = dir + '/logs/videos/' 54 | video_length = 180 55 | 56 | def make_env(env_id, rank, seed=0, monitor_dir = None): 57 | """ 58 | Utility function for multiprocessed env. 59 | 60 | :param env_id: (str) the environment ID 61 | :param num_env: (int) the number of environments you wish to have in subprocesses 62 | :param seed: (int) the inital seed for RNG 63 | :param rank: (int) index of the subprocess 64 | """ 65 | 66 | def _init(): 67 | 68 | 69 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 70 | isBaseline=False,render=0) 71 | env.seed(seed + rank) 72 | env = RescaleAction(env, min_action, max_action) 73 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 74 | if monitor_path is not None: 75 | os.makedirs(monitor_dir, exist_ok=True) 76 | env = Monitor(env, filename=monitor_path) 77 | 78 | return env 79 | 80 | set_random_seed(seed) 81 | return _init 82 | 83 | 84 | if __name__ == '__main__': 85 | run = wandb.init( 86 | project="Robust-OnRampMerging-Testing", 87 | dir=dir, 88 | name=f"multimodal_{args.config}", 89 | config=config, 90 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 91 | monitor_gym=True, # auto-upload the videos of agents playing the game 92 | save_code=True, # optional 93 | magic=True 94 | ) 95 | 96 | env_id = "MultiMerge" 97 | num_cpu = 1 # Number of processes to use 98 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 99 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 100 | env = VecNormalize.load(args.stats_load,env) 101 | env.training = False 102 | env.norm_reward =True 103 | env = VecMonitor(venv=env) 104 | model = PPO.load(args.dir, env) 105 | 106 | obs = env.reset() 107 | n_games = 10 108 | for i_games in range(n_games): 109 | 110 | done = False 111 | obs = env.reset() 112 | score = 0 113 | num_collisions = 0 114 | mergeTime = 0 115 | velocity_reward= [] 116 | acc_reward = [] 117 | while not done: 118 | action, _states = model.predict(obs) 119 | obs, rewards, done, info = env.step(action) 120 | if int(args.render)==1: 121 | env.render() 122 | score += rewards 123 | 124 | if int(info[0]['terminal']) == -1: 125 | num_collisions += 1 126 | if int(info[0]['terminal']) != 0: 127 | mergeTime = info[0]['mergeTime'] 128 | velocity_reward.append(info[0]['velocity_reward']) 129 | 130 | acc_reward.append((info[0]['acc_reward'])) 131 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}") 132 | wandb.log({ 133 | "episodic score": score, 134 | "num_collisions": num_collisions, 135 | "mergeTime": mergeTime, 136 | "acc_reward": np.mean(acc_reward), 137 | "velocity_reward": np.mean(velocity_reward), 138 | }, step=i_games) 139 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2_3.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 134 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2_1.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 133 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2.rou.xml_copy: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 133 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/ramp_2_2.rou.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 17 | 20 | 21 | 22 | 23 | 24 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 50 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 134 | -------------------------------------------------------------------------------- /custom_envs/sumo_Config/Ramp_1.net.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/Ramp_2_1.net.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /custom_envs/sumo_ConfigTaper/Ramp_2.net.xml_copy: -------------------------------------------------------------------------------- 1 | 2 | 3 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /custom_envs/MultiMerge_1.py: -------------------------------------------------------------------------------- 1 | import os 2 | from abc import ABC 3 | 4 | import gym 5 | from custom_envs.gymsumo import SumoRamp 6 | import traci 7 | import numpy as np 8 | from custom_envs.bsmMerge import BsmMerge, BsmMergeAllRewards 9 | from typing import Callable, Optional, Tuple, Union 10 | 11 | class MultiMerge(BsmMerge): 12 | def getObservations(self): 13 | # returns observations of the state 14 | 15 | state_speed = np.ones(7) * self.maxSpeed 16 | state_position_x = np.ones(7) 17 | state_position_y = np.ones(7) 18 | 19 | vehicle_ids = self.getVehicleIds() 20 | state_image = np.array(self.render()) 21 | if vehicle_ids: 22 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids) 23 | 24 | for i, vehicle in enumerate(obsLane0): 25 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0]) 26 | if vehicle: 27 | 28 | if vehicle[0] not in ["no_vehicle","", None]: 29 | 30 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 31 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 32 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 33 | for i, vehicle in enumerate(obsLane1, len(obsLane0)): 34 | if vehicle: 35 | if vehicle not in ["no_vehicle","", None]: 36 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 37 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 38 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 39 | 40 | # rl state information 41 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id) / self.maxSpeed 42 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0] 43 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1] 44 | state_speed = np.clip(state_speed, 0, self.maxSpeed) 45 | state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low), 46 | abs(self.observation_space['xPos'].high)) 47 | state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low), 48 | abs(self.observation_space['yPos'].high)) 49 | state = { 50 | 'image': state_image.astype(np.uint8), 51 | 'xPos': np.array(state_position_x, dtype=np.float32), 52 | 'yPos': np.array(state_position_y, dtype=np.float32), 53 | 'velocity': np.array(state_speed, dtype=np.float32)} 54 | 55 | return state 56 | 57 | class MultiMergeAllRewards(BsmMergeAllRewards): 58 | def getObservations(self): 59 | # returns observations of the state 60 | 61 | state_speed = np.ones(7) * self.maxSpeed 62 | state_position_x = np.ones(7) 63 | state_position_y = np.ones(7) 64 | 65 | vehicle_ids = self.getVehicleIds() 66 | state_image = np.array(self.render()) 67 | if vehicle_ids: 68 | for vehicle in vehicle_ids: 69 | if not "rl" in vehicle: 70 | traci.vehicle.setColor(vehicle, color=(255, 255, 255, 255)) # change vehicle color to white 71 | 72 | obsLane0, obsLane1 = self.getobservedVehicles(vehicle_ids) 73 | 74 | 75 | for i, vehicle in enumerate(obsLane0): 76 | maxSpeed = traci.vehicle.getMaxSpeed(vehicle_ids[0]) 77 | #print(vehicle) 78 | if vehicle: 79 | if vehicle[0] not in ["no_vehicle","", None]: 80 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 81 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 82 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 83 | #traci.vehicle.setColor(vehicle[0], color=(255, 0, 255, 255)) # change vehicle color to blue 84 | 85 | for i, vehicle in enumerate(obsLane1, len(obsLane0)): 86 | #print(vehicle) 87 | if vehicle: 88 | 89 | if vehicle[0] not in ["no_vehicle","", None]: 90 | state_speed[i] = traci.vehicle.getSpeed(vehicle[0]) 91 | 92 | state_position_x[i] = traci.vehicle.getPosition(vehicle[0])[0] 93 | state_position_y[i] = traci.vehicle.getPosition(vehicle[0])[1] 94 | #traci.vehicle.setColor(vehicle[0], color=(255, 255, 0, 255)) # change vehicle color to blue 95 | 96 | # rl state information 97 | state_speed[-1] = traci.vehicle.getSpeed(self.rl_car_id) / self.maxSpeed 98 | state_position_x[-1] = traci.vehicle.getPosition(self.rl_car_id)[0] 99 | state_position_y[-1] = traci.vehicle.getPosition(self.rl_car_id)[1] 100 | state_speed = np.clip(state_speed, 0, self.maxSpeed) 101 | state_position_x = np.clip(state_position_x, -abs(self.observation_space['xPos'].low), 102 | abs(self.observation_space['xPos'].high)) 103 | state_position_y = np.clip(state_position_y, -abs(self.observation_space['yPos'].low), 104 | abs(self.observation_space['yPos'].high)) 105 | state = { 106 | 'image': state_image.astype(np.uint8), 107 | 'xPos': np.array(state_position_x, dtype=np.float32), 108 | 'yPos': np.array(state_position_y, dtype=np.float32), 109 | 'velocity': np.array(state_speed, dtype=np.float32)} 110 | return state 111 | -------------------------------------------------------------------------------- /test_parallel_model.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 6 | from stable_baselines3.common.env_util import make_vec_env 7 | from stable_baselines3.common.utils import set_random_seed 8 | from gym.wrappers.rescale_action import RescaleAction 9 | # from custom_envs.rampTaperEnv_half import SumoRamp 10 | from gym.spaces import Box 11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 12 | # from custom_envs.bsmMerge import BsmMerge 13 | # load simple cnn + bsm reward env 14 | # from custom_envs.MultiMerge import MultiMerge 15 | # load cnn + bsm all rewards env 16 | from custom_envs.MultiMergeParallel import MultiMergeAllRewards as MultiMerge 17 | 18 | 19 | import os 20 | import wandb, glob 21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN 22 | from wandb.integration.sb3 import WandbCallback 23 | from stable_baselines3.common.monitor import Monitor 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser(description='test PPO multi model') 27 | 28 | parser.add_argument("dir", help="model path") 29 | parser.add_argument("--render", default =0, help = "should render default 0") 30 | 31 | parser.add_argument("stats_path", help="vec norm stats path file") 32 | parser.add_argument("config", help="Config file") 33 | args = parser.parse_args() 34 | 35 | module = __import__("config_file",fromlist= [args.config]) 36 | exp_config = getattr(module, args.config) 37 | 38 | 39 | timesteps = 3e6 40 | sub_timesteps = 10000 41 | 42 | config = { 43 | "policy_type": "MultiInputPolicy", 44 | "total_timesteps": timesteps, 45 | "env_name": "SumoRamp()", 46 | "sub_timesteps": sub_timesteps 47 | } 48 | 49 | 50 | pdir = os.path.abspath('../') 51 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 52 | 53 | 54 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False) 55 | policy_kwargs = exp_config.policy_kwargs 56 | 57 | action_space = exp_config.action_space 58 | 59 | image_shape = exp_config.image_shape 60 | obsspaces = exp_config.obsspaces 61 | 62 | weights = exp_config.weights 63 | sumoParameters = exp_config.sumoParameters 64 | 65 | min_action = -1 66 | max_action = +1 67 | 68 | video_folder = dir + '/logs/videos/' 69 | video_length = 600 70 | 71 | def make_env(env_id, rank, seed=0, monitor_dir = None): 72 | """ 73 | Utility function for multiprocessed env. 74 | 75 | :param env_id: (str) the environment ID 76 | :param num_env: (int) the number of environments you wish to have in subprocesses 77 | :param seed: (int) the inital seed for RNG 78 | :param rank: (int) index of the subprocess 79 | """ 80 | 81 | def _init(): 82 | 83 | 84 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 85 | isBaseline=False,render=0) 86 | env.seed(seed + rank) 87 | env = RescaleAction(env, min_action, max_action) 88 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 89 | if monitor_path is not None: 90 | os.makedirs(monitor_dir, exist_ok=True) 91 | env = Monitor(env, filename=monitor_path) 92 | 93 | return env 94 | 95 | set_random_seed(seed) 96 | return _init 97 | 98 | 99 | if __name__ == '__main__': 100 | run = wandb.init( 101 | project="Multi_Testing", 102 | dir=dir, 103 | config=config, 104 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 105 | monitor_gym=True, # auto-upload the videos of agents playing the game 106 | save_code=True, # optional 107 | magic=True 108 | ) 109 | 110 | env_id = "MultiMerge" 111 | num_cpu = 1 # Number of processes to use 112 | # Create the vectorized environment 113 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 114 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 115 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0, 116 | # video_length=300) 117 | 118 | # add vstack 119 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 120 | env = VecNormalize.load(args.stats_path,env) 121 | env = VecMonitor(venv=env) 122 | 123 | 124 | #code = wandb.Artifact('project-source', type='code') 125 | #for path in glob.glob('**/*.py', recursive=True): 126 | # code.add_file(path) 127 | 128 | #wandb.run.use_artifact(code) 129 | 130 | 131 | 132 | model = PPO.load(args.dir, env) 133 | 134 | obs = env.reset() 135 | n_games = 300 136 | for i_games in range(n_games): 137 | 138 | done = False 139 | obs = env.reset() 140 | score = 0 141 | num_collisions = 0 142 | mergeTime = 0 143 | 144 | while not done: 145 | action, _states = model.predict(obs) 146 | print('action', action) 147 | obs, rewards, done, info = env.step(action) 148 | if int(args.render)==1: 149 | env.render() 150 | score += rewards 151 | 152 | print('rewards', rewards) 153 | if int(info[0]['terminal']) == -1: 154 | num_collisions += 1 155 | if int(info[0]['terminal']) != 0: 156 | mergeTime = int(info[0]['mergeTime']) 157 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}") 158 | wandb.log({ 159 | "episodic score": score, 160 | "num_collisions": num_collisions, 161 | "mergeTime": mergeTime 162 | }, step=i_games) 163 | -------------------------------------------------------------------------------- /test_baseline_paralllel.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 6 | from stable_baselines3.common.env_util import make_vec_env 7 | from stable_baselines3.common.utils import set_random_seed 8 | from gym.wrappers.rescale_action import RescaleAction 9 | # from custom_envs.rampTaperEnv_half import SumoRamp 10 | from gym.spaces import Box 11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 12 | # from custom_envs.bsmMerge import BsmMerge 13 | # load simple cnn + bsm reward env 14 | # from custom_envs.MultiMerge import MultiMerge 15 | # load cnn + bsm all rewards env 16 | from custom_envs.MultiMergeParallel import MultiMergeAllRewards as MultiMerge 17 | import argparse 18 | 19 | import os 20 | import wandb, glob 21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN 22 | from wandb.integration.sb3 import WandbCallback 23 | from stable_baselines3.common.monitor import Monitor 24 | 25 | timesteps = 3e6 26 | sub_timesteps = 10000 27 | 28 | config = { 29 | "policy_type": "MultiInputPolicy", 30 | "total_timesteps": timesteps, 31 | "env_name": "SumoRamp()", 32 | "sub_timesteps": sub_timesteps 33 | } 34 | 35 | parser = argparse.ArgumentParser(description='test PPO multi model') 36 | 37 | 38 | parser.add_argument("--render",default = 0,help="should render default 0") 39 | parser.add_argument("config", help="Config file") 40 | 41 | args = parser.parse_args() 42 | module = __import__("config_file",fromlist= [args.config]) 43 | exp_config = getattr(module, args.config) 44 | pdir = os.path.abspath('../') 45 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 46 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False) 47 | 48 | 49 | 50 | policy_kwargs = exp_config.policy_kwargs 51 | 52 | action_space = exp_config.action_space 53 | 54 | image_shape = exp_config.image_shape 55 | obsspaces = exp_config.obsspaces 56 | 57 | weights = exp_config.weights 58 | sumoParameters = exp_config.sumoParameters 59 | 60 | 61 | min_action = -1 62 | max_action = +1 63 | 64 | video_folder = dir + '/logs/videos/' 65 | video_length = 600 66 | 67 | def make_env(env_id, rank, seed=0, monitor_dir = None): 68 | """ 69 | Utility function for multiprocessed env. 70 | 71 | :param env_id: (str) the environment ID 72 | :param num_env: (int) the number of environments you wish to have in subprocesses 73 | :param seed: (int) the inital seed for RNG 74 | :param rank: (int) index of the subprocess 75 | """ 76 | 77 | def _init(): 78 | 79 | 80 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 81 | isBaseline=True,render=0) 82 | env.seed(seed + rank) 83 | env = RescaleAction(env, min_action, max_action) 84 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 85 | if monitor_path is not None: 86 | os.makedirs(monitor_dir, exist_ok=True) 87 | env = Monitor(env, filename=monitor_path) 88 | 89 | return env 90 | 91 | set_random_seed(seed) 92 | return _init 93 | 94 | 95 | if __name__ == '__main__': 96 | run = wandb.init( 97 | project="SB3RampTraining", 98 | name="Baseline_ppo7_multi-all-rewards", 99 | dir=dir, 100 | config=config, 101 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 102 | monitor_gym=True, # auto-upload the videos of agents playing the game 103 | save_code=True, # optional 104 | magic=True 105 | ) 106 | 107 | env_id = "MultiMerge" 108 | num_cpu = 1 # Number of processes to use 109 | # Create the vectorized environment 110 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 111 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 112 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 113 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0, 114 | # video_length=300) 115 | 116 | # add vstack 117 | # env = VecFrameStack(env, n_stack=4) # stack 4 frames 118 | env = VecMonitor(venv=env) 119 | 120 | 121 | # code = wandb.Artifact('project-source', type='code') 122 | # for path in glob.glob('**/*.py', recursive=True): 123 | # code.add_file(path) 124 | # 125 | # wandb.run.use_artifact(code) 126 | 127 | 128 | 129 | # model = PPO.load(os.path.join(pdir,'trainedSBModels/multi_all_rewards/model'), env) 130 | 131 | obs = env.reset() 132 | n_games = 300 133 | for i_games in range(n_games): 134 | 135 | done = False 136 | obs = env.reset() 137 | score = 0 138 | num_collisions = 0 139 | mergeTime = 0 140 | while not done: 141 | action = env.action_space.sample() 142 | #print('action', action) 143 | obs, rewards, done, info = env.step(action) 144 | score += rewards 145 | if int(args.render) == int(1): 146 | 147 | env.render() 148 | #print('rewards', rewards) 149 | if int(info[0]['terminal']) == -1: 150 | num_collisions += 1 151 | if int(info[0]['terminal']) != 0: 152 | mergeTime = int(info[0]['mergeTime']) 153 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}") 154 | wandb.log({ 155 | "episodic score": score, 156 | "num_collisions": num_collisions, 157 | "mergeTime": mergeTime 158 | }, step=i_games) 159 | -------------------------------------------------------------------------------- /test_bsmNoise_PerfectImage.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 6 | from stable_baselines3.common.env_util import make_vec_env 7 | from stable_baselines3.common.utils import set_random_seed 8 | from gym.wrappers.rescale_action import RescaleAction 9 | # from custom_envs.rampTaperEnv_half import SumoRamp 10 | from gym.spaces import Box 11 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 12 | # from custom_envs.bsmMerge import BsmMerge 13 | # load simple cnn + bsm reward env 14 | # from custom_envs.MultiMerge import MultiMerge 15 | # load cnn + bsm all rewards env 16 | from custom_envs.MultiMerge import BSM_Noise_No_Image as MultiMerge 17 | 18 | 19 | import os 20 | import wandb, glob 21 | #from customFeatureExtractor import CustomCombinedExtractor, CustomNatureCNN 22 | from wandb.integration.sb3 import WandbCallback 23 | from stable_baselines3.common.monitor import Monitor 24 | import argparse 25 | 26 | parser = argparse.ArgumentParser(description='test PPO multi model') 27 | 28 | parser.add_argument("dir", help="model path") 29 | 30 | parser.add_argument("stats_path", help="vec env stats path") 31 | parser.add_argument("--render", default =0, help = "should render default 0") 32 | parser.add_argument("config", help="Config file") 33 | 34 | args = parser.parse_args() 35 | 36 | module = __import__("config_file",fromlist= [args.config]) 37 | exp_config = getattr(module, args.config) 38 | 39 | 40 | timesteps = 3e6 41 | sub_timesteps = 10000 42 | 43 | config = { 44 | "policy_type": "MultiInputPolicy", 45 | "total_timesteps": timesteps, 46 | "env_name": "SumoRamp()", 47 | "sub_timesteps": sub_timesteps 48 | } 49 | 50 | 51 | pdir = os.path.abspath('../') 52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 53 | 54 | 55 | # env = SumoRamp(action_space=action_space, obsspaces=obsspaces, sumoParameters = sumoParameters, weights= weights, isBaseline=False) 56 | policy_kwargs = exp_config.policy_kwargs 57 | 58 | action_space = exp_config.action_space 59 | 60 | image_shape = exp_config.image_shape 61 | obsspaces = exp_config.obsspaces 62 | 63 | weights = exp_config.weights 64 | sumoParameters = exp_config.sumoParameters 65 | 66 | min_action = -1 67 | max_action = +1 68 | 69 | video_folder = dir + '/logs/videos/' 70 | video_length = 600 71 | 72 | def make_env(env_id, rank, seed=0, monitor_dir = None): 73 | """ 74 | Utility function for multiprocessed env. 75 | 76 | :param env_id: (str) the environment ID 77 | :param num_env: (int) the number of environments you wish to have in subprocesses 78 | :param seed: (int) the inital seed for RNG 79 | :param rank: (int) index of the subprocess 80 | """ 81 | 82 | def _init(): 83 | 84 | 85 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 86 | isBaseline=False,render=0) 87 | env.seed(seed + rank) 88 | env = RescaleAction(env, min_action, max_action) 89 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 90 | if monitor_path is not None: 91 | os.makedirs(monitor_dir, exist_ok=True) 92 | env = Monitor(env, filename=monitor_path) 93 | 94 | return env 95 | 96 | set_random_seed(seed) 97 | return _init 98 | 99 | 100 | if __name__ == '__main__': 101 | run = wandb.init( 102 | project="Robust-OnRampMerging", 103 | name=f"Test_BSMNoise+NoImage_{args.config}", 104 | dir=dir, 105 | config=config, 106 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 107 | monitor_gym=True, # auto-upload the videos of agents playing the game 108 | save_code=True, # optional 109 | magic=True 110 | ) 111 | 112 | env_id = "MultiMerge" 113 | num_cpu = 1 # Number of processes to use 114 | # Create the vectorized environment 115 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 116 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 117 | #env = VecNormalize(env, norm_obs=True, norm_reward=True, training=False) 118 | 119 | env = VecFrameStack(env, 4) 120 | 121 | env = VecNormalize.load(args.stats_path, env) 122 | env.training = False 123 | env.norm_reward = False 124 | # env = VecVideoRecorder(env, video_folder=f"{dir}/videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0, 125 | # video_length=300) 126 | 127 | # add vstack 128 | env = VecMonitor(venv=env) 129 | 130 | 131 | #code = wandb.Artifact('project-source', type='code') 132 | #for path in glob.glob('**/*.py', recursive=True): 133 | # code.add_file(path) 134 | 135 | #wandb.run.use_artifact(code) 136 | 137 | 138 | 139 | model = PPO.load(args.dir, env) 140 | 141 | obs = env.reset() 142 | n_games = 300 143 | for i_games in range(n_games): 144 | 145 | done = False 146 | obs = env.reset() 147 | score = 0 148 | num_collisions = 0 149 | mergeTime = 0 150 | 151 | while not done: 152 | action, _states = model.predict(obs) 153 | obs, rewards, done, info = env.step(action) 154 | if int(args.render)==1: 155 | env.render() 156 | score += rewards 157 | 158 | if int(info[0]['terminal']) == -1: 159 | num_collisions += 1 160 | if int(info[0]['terminal']) != 0: 161 | mergeTime = int(info[0]['mergeTime']) 162 | print(f"score {score} num_collisions : {num_collisions} , mergetime : {mergeTime}") 163 | wandb.log({ 164 | "episodic score": score, 165 | "num_collisions": num_collisions, 166 | "mergeTime": mergeTime 167 | }, step=i_games) 168 | -------------------------------------------------------------------------------- /train_ImageNoise_noBSM_final.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | # from custom_envs.rampTaperEnv_half import SumoRamp 11 | from gym.spaces import Box 12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 13 | # from custom_envs.bsmMerge import BsmMerge 14 | # load simple cnn + bsm reward env 15 | # from custom_envs.MultiMerge import MultiMerge 16 | # load cnn + bsm all rewards env 17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 18 | from custom_envs.MultiMerge import Image_Noise_No_BSM as MultiMerge 19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 22 | 23 | 24 | import os 25 | import wandb, glob 26 | #from customFeatureExtractor import CustomCombinedExtractor 27 | from wandb.integration.sb3 import WandbCallback 28 | from stable_baselines3.common.monitor import Monitor 29 | #from config_file import sac_multi_config as exp_config 30 | import argparse 31 | 32 | parser = argparse.ArgumentParser(description='train PPO multi model') 33 | parser.add_argument("config", help="Config file") 34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 35 | 36 | args = parser.parse_args() 37 | module = __import__("config_file",fromlist= [args.config]) 38 | exp_config = getattr(module, args.config) 39 | 40 | timesteps = 50000 41 | subtimesteps = 10000 42 | 43 | sub_timesteps = 10000 44 | 45 | config = { 46 | "policy_type": "MultiInputPolicy", 47 | "total_timesteps": timesteps, 48 | "env_name": "SumoRamp()", 49 | "sub_timesteps": sub_timesteps 50 | } 51 | pdir = os.path.abspath('../') 52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 53 | 54 | policy_kwargs = exp_config.policy_kwargs 55 | 56 | action_space = exp_config.action_space 57 | 58 | image_shape = exp_config.image_shape 59 | obsspaces = exp_config.obsspaces 60 | 61 | weights = exp_config.weights 62 | sumoParameters = exp_config.sumoParameters 63 | 64 | min_action = -1 65 | max_action = +1 66 | 67 | video_folder = dir + '/logs/videos/' 68 | video_length = 600 69 | 70 | def make_env(env_id, rank, seed=0, monitor_dir = None): 71 | """ 72 | Utility function for multiprocessed env. 73 | 74 | :param env_id: (str) the environment ID 75 | :param num_env: (int) the number of environments you wish to have in subprocesses 76 | :param seed: (int) the inital seed for RNG 77 | :param rank: (int) index of the subprocess 78 | """ 79 | 80 | def _init(): 81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 82 | isBaseline=False,render=0) 83 | env.seed(seed + rank) 84 | env = RescaleAction(env, min_action, max_action) 85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 86 | if monitor_path is not None: 87 | os.makedirs(monitor_dir, exist_ok=True) 88 | return env 89 | set_random_seed(seed) 90 | return _init 91 | 92 | 93 | if __name__ == '__main__': 94 | run = wandb.init( 95 | project="Robust-OnRampMerging", 96 | name=f"ImageNoise+NoBSM_{args.config}", 97 | dir=dir, 98 | config=config, 99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 100 | monitor_gym=True, # auto-upload the videos of agents playing the game 101 | save_code=True, # optional 102 | magic=True 103 | ) 104 | 105 | env_id = "MultiMerge" 106 | num_cpu = 2# Number of processes to use 107 | # Create the vectorized environment 108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 112 | 113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}", 114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0, 115 | # video_length=300) 116 | 117 | env = VecMonitor(venv=env) 118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 119 | # isBaseline=False,render=0) 120 | #code = wandb.Artifact('project-source', type='code') 121 | #for path in glob.glob('**/*.py', recursive=True): 122 | # code.add_file(path) 123 | 124 | #wandb.run.use_artifact(code) 125 | model = PPO(config["policy_type"], env, 126 | verbose=3, 127 | gamma=0.95, 128 | n_steps=1200, 129 | ent_coef=0.0905168, 130 | learning_rate=0.005, 131 | vf_coef=0.042202, 132 | max_grad_norm=0.9, 133 | gae_lambda=0.7, 134 | n_epochs=5, 135 | clip_range=0.2, 136 | batch_size=1200, 137 | tensorboard_log=f"{dir}") 138 | 139 | model.learn( 140 | total_timesteps=int(config["total_timesteps"]), 141 | callback=WandbCallback( 142 | gradient_save_freq=5, 143 | model_save_freq=5000, 144 | model_save_path=f"{dir}/models/{run.id}", 145 | verbose=2, 146 | ), ) 147 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 148 | env.save(stats_path) 149 | 150 | -------------------------------------------------------------------------------- /train_ImageNoise_PerfectBSM.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | # from custom_envs.rampTaperEnv_half import SumoRamp 11 | from gym.spaces import Box 12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 13 | # from custom_envs.bsmMerge import BsmMerge 14 | # load simple cnn + bsm reward env 15 | # from custom_envs.MultiMerge import MultiMerge 16 | # load cnn + bsm all rewards env 17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 18 | from custom_envs.MultiMerge import BSM_Perfect_Noise_Image as MultiMerge 19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 22 | 23 | 24 | import os 25 | import wandb, glob 26 | #from customFeatureExtractor import CustomCombinedExtractor 27 | from wandb.integration.sb3 import WandbCallback 28 | from stable_baselines3.common.monitor import Monitor 29 | #from config_file import sac_multi_config as exp_config 30 | import argparse 31 | 32 | parser = argparse.ArgumentParser(description='train PPO multi model') 33 | parser.add_argument("config", help="Config file") 34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 35 | 36 | args = parser.parse_args() 37 | module = __import__("config_file",fromlist= [args.config]) 38 | exp_config = getattr(module, args.config) 39 | 40 | timesteps = 50000 41 | subtimesteps = 10000 42 | 43 | sub_timesteps = 10000 44 | 45 | config = { 46 | "policy_type": "MultiInputPolicy", 47 | "total_timesteps": timesteps, 48 | "env_name": "SumoRamp()", 49 | "sub_timesteps": sub_timesteps 50 | } 51 | pdir = os.path.abspath('../') 52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 53 | 54 | policy_kwargs = exp_config.policy_kwargs 55 | 56 | action_space = exp_config.action_space 57 | 58 | image_shape = exp_config.image_shape 59 | obsspaces = exp_config.obsspaces 60 | 61 | weights = exp_config.weights 62 | sumoParameters = exp_config.sumoParameters 63 | 64 | min_action = -1 65 | max_action = +1 66 | 67 | video_folder = dir + '/logs/videos/' 68 | video_length = 600 69 | 70 | def make_env(env_id, rank, seed=0, monitor_dir = None): 71 | """ 72 | Utility function for multiprocessed env. 73 | 74 | :param env_id: (str) the environment ID 75 | :param num_env: (int) the number of environments you wish to have in subprocesses 76 | :param seed: (int) the inital seed for RNG 77 | :param rank: (int) index of the subprocess 78 | """ 79 | 80 | def _init(): 81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 82 | isBaseline=False,render=0) 83 | env.seed(seed + rank) 84 | env = RescaleAction(env, min_action, max_action) 85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 86 | if monitor_path is not None: 87 | os.makedirs(monitor_dir, exist_ok=True) 88 | return env 89 | set_random_seed(seed) 90 | return _init 91 | 92 | 93 | if __name__ == '__main__': 94 | run = wandb.init( 95 | project="Robust-OnRampMerging", 96 | name=f"ImageNoise+PerfectBSM_{args.config}", 97 | dir=dir, 98 | config=config, 99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 100 | monitor_gym=True, # auto-upload the videos of agents playing the game 101 | save_code=True, # optional 102 | magic=True 103 | ) 104 | 105 | env_id = "MultiMerge" 106 | num_cpu = 2# Number of processes to use 107 | # Create the vectorized environment 108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 112 | 113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}", 114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0, 115 | # video_length=300) 116 | 117 | env = VecMonitor(venv=env) 118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 119 | # isBaseline=False,render=0) 120 | #code = wandb.Artifact('project-source', type='code') 121 | #for path in glob.glob('**/*.py', recursive=True): 122 | # code.add_file(path) 123 | 124 | #wandb.run.use_artifact(code) 125 | model = PPO(config["policy_type"], env, 126 | verbose=3, 127 | gamma=0.95, 128 | n_steps=1200, 129 | ent_coef=0.0905168, 130 | learning_rate=0.005, 131 | vf_coef=0.042202, 132 | max_grad_norm=0.9, 133 | gae_lambda=0.7, 134 | n_epochs=5, 135 | clip_range=0.2, 136 | batch_size=1200, 137 | tensorboard_log=f"{dir}") 138 | 139 | model.learn( 140 | total_timesteps=int(config["total_timesteps"]), 141 | callback=WandbCallback( 142 | gradient_save_freq=5, 143 | model_save_freq=5000, 144 | model_save_path=f"{dir}/models/{run.id}", 145 | verbose=2, 146 | ), ) 147 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 148 | env.save(stats_path) 149 | 150 | -------------------------------------------------------------------------------- /train_multiModelNoise_final_2.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | from stable_baselines3 import PPO 5 | from stable_baselines3.common.evaluation import evaluate_policy 6 | from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecVideoRecorder, VecMonitor, VecFrameStack 7 | from stable_baselines3.common.env_util import make_vec_env 8 | from stable_baselines3.common.utils import set_random_seed 9 | from gym.wrappers.rescale_action import RescaleAction 10 | # from custom_envs.rampTaperEnv_half import SumoRamp 11 | from gym.spaces import Box 12 | # from custom_envs.bsmMerge import BsmMergeAllRewards as BsmMerge 13 | # from custom_envs.bsmMerge import BsmMerge 14 | # load simple cnn + bsm reward env 15 | # from custom_envs.MultiMerge import MultiMerge 16 | # load cnn + bsm all rewards env 17 | #from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 18 | from custom_envs.MultiMerge import BSM_Noise_Image_Noise as MultiMerge 19 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 20 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 21 | # from custom_envs.MergeSingleBSMNoise import MultiMergeAllRewards as BSMMerge 22 | 23 | 24 | import os 25 | import wandb, glob 26 | #from customFeatureExtractor import CustomCombinedExtractor 27 | from wandb.integration.sb3 import WandbCallback 28 | from stable_baselines3.common.monitor import Monitor 29 | #from config_file import sac_multi_config as exp_config 30 | import argparse 31 | 32 | parser = argparse.ArgumentParser(description='train PPO multi model') 33 | parser.add_argument("config", help="Config file") 34 | parser.add_argument("--noise_sigma",default=0.1 , help="Image noise sigma value") 35 | 36 | args = parser.parse_args() 37 | module = __import__("config_file",fromlist= [args.config]) 38 | exp_config = getattr(module, args.config) 39 | 40 | timesteps = 500000 41 | subtimesteps = 10000 42 | 43 | sub_timesteps = 10000 44 | 45 | config = { 46 | "policy_type": "MultiInputPolicy", 47 | "total_timesteps": timesteps, 48 | "env_name": "SumoRamp()", 49 | "sub_timesteps": sub_timesteps 50 | } 51 | pdir = os.path.abspath('../') 52 | dir = os.path.join(pdir, 'SBRampSavedFiles/wandbsavedfiles') 53 | 54 | policy_kwargs = exp_config.policy_kwargs 55 | 56 | action_space = exp_config.action_space 57 | 58 | image_shape = exp_config.image_shape 59 | obsspaces = exp_config.obsspaces 60 | 61 | weights = exp_config.weights 62 | sumoParameters = exp_config.sumoParameters 63 | 64 | min_action = -1 65 | max_action = +1 66 | 67 | video_folder = dir + '/logs/videos/' 68 | video_length = 600 69 | 70 | def make_env(env_id, rank, seed=0, monitor_dir = None): 71 | """ 72 | Utility function for multiprocessed env. 73 | 74 | :param env_id: (str) the environment ID 75 | :param num_env: (int) the number of environments you wish to have in subprocesses 76 | :param seed: (int) the inital seed for RNG 77 | :param rank: (int) index of the subprocess 78 | """ 79 | 80 | def _init(): 81 | env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 82 | isBaseline=False,render=0 ) 83 | env.seed(seed + rank) 84 | env = RescaleAction(env, min_action, max_action) 85 | monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None 86 | if monitor_path is not None: 87 | os.makedirs(monitor_dir, exist_ok=True) 88 | return env 89 | set_random_seed(seed) 90 | return _init 91 | 92 | 93 | if __name__ == '__main__': 94 | run = wandb.init( 95 | project="Robust-OnRampMerging", 96 | name=f"MultiModal_noise", 97 | dir=dir, 98 | config=config, 99 | sync_tensorboard=True, # auto-upload sb3's tensorboard metrics 100 | monitor_gym=True, # auto-upload the videos of agents playing the game 101 | save_code=True, # optional 102 | magic=True 103 | ) 104 | 105 | env_id = "MultiMerge" 106 | num_cpu = 16# Number of processes to use 107 | # Create the vectorized environment 108 | # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) 109 | env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) 110 | env = VecFrameStack(env, n_stack=4) # stack 4 frames 111 | env = VecNormalize(env, norm_obs=True, norm_reward=True, training=True) 112 | 113 | # env = VecVideoRecorder(env, video_folder=f"./videos/{run.id}", 114 | # record_video_trigger=lambda x: x % config["sub_timesteps"] == 0, 115 | # video_length=300) 116 | 117 | env = VecMonitor(venv=env) 118 | # eval_env = MultiMerge(action_space=action_space, obsspaces=obsspaces, sumoParameters=sumoParameters, weights=weights, 119 | # isBaseline=False,render=0) 120 | #code = wandb.Artifact('project-source', type='code') 121 | #for path in glob.glob('**/*.py', recursive=True): 122 | # code.add_file(path) 123 | 124 | #wandb.run.use_artifact(code) 125 | model = PPO(config["policy_type"], env, 126 | verbose=3, 127 | learning_rate= 1e-5, 128 | n_steps = 512, 129 | batch_size = 256, 130 | n_epochs = 20, 131 | gamma = 0.99, 132 | gae_lambda = 0.9, 133 | clip_range = 0.2, 134 | clip_range_vf = None, 135 | ent_coef = 0.05, 136 | vf_coef= 0.5, 137 | max_grad_norm = 0.5, 138 | target_kl = 0.01, 139 | tensorboard_log=f"{dir}") 140 | model.learn( 141 | total_timesteps=int(config["total_timesteps"]), 142 | callback=WandbCallback( 143 | gradient_save_freq=5, 144 | model_save_freq=5000, 145 | model_save_path=f"{dir}/models/{run.id}", 146 | verbose=2, 147 | ), ) 148 | stats_path = os.path.join(f"{dir}/models/{run.id}/", "vec_normalize.pkl") 149 | env.save(stats_path) 150 | 151 | --------------------------------------------------------------------------------