├── .gitignore ├── LICENSE ├── README.md ├── assets ├── .gitkeep ├── ICRA23_Video_Submission.mp4 ├── highway-env.gif ├── self_improvement.png └── system_overview.png ├── experiments ├── README.md ├── agents │ ├── main_agent.py │ └── searcher.py ├── algorithms │ ├── ams_search.py │ ├── bayesian_search.py │ ├── ce_search.py │ ├── grid_search.py │ ├── monte_carlo_search.py │ └── slurm_verification.sh ├── analyses │ ├── .gitkeep │ ├── algorithm_results.ipynb │ ├── bar_chart_plots.ipynb │ ├── connect_verifications.ipynb │ ├── distribution_plots.ipynb │ ├── evaluation_results.ipynb │ ├── search_space_3d.ipynb │ ├── training_results.ipynb │ └── trajectory_plots.ipynb ├── configs │ ├── ams_search.yaml │ ├── bayesian_search.yaml │ ├── ce_search.yaml │ ├── env_config.yaml │ ├── evaluation_config.yaml │ ├── grid_search.yaml │ ├── monte_carlo_search.yaml │ ├── ppo_config.yaml │ ├── reward_tuning.yaml │ ├── sac_config.yaml │ ├── self_improvement.yaml │ └── train_config.yaml ├── evaluation │ ├── evaluate_model.py │ └── slurm_eval.sh ├── models │ └── custom_torch_model.py ├── results │ └── .gitkeep ├── training │ ├── ppo_train.py │ ├── sac_train.py │ ├── self_improvement.py │ └── slurm_train.sh └── utils │ ├── reward_tuning.py │ ├── reward_tuning_slurm.sh │ ├── scenarios.py │ ├── training_utils.py │ └── validation_utils.py ├── highway_environment ├── README.md ├── highway_environment │ ├── __init__.py │ ├── create_env.py │ ├── default_configs │ │ ├── env_config.yaml │ │ └── train_config.yaml │ ├── envs │ │ ├── __init__.py │ │ ├── environment.py │ │ └── observation.py │ └── test_train.py └── setup.py ├── requirements.txt └── slides ├── .gitkeep ├── ICRA23PaperPresentation.pptx ├── ICRA23VideoPresentation.pptx └── ICRA23_Poster_Presentation.pdf /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/README.md -------------------------------------------------------------------------------- /assets/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/ICRA23_Video_Submission.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/ICRA23_Video_Submission.mp4 -------------------------------------------------------------------------------- /assets/highway-env.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/highway-env.gif -------------------------------------------------------------------------------- /assets/self_improvement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/self_improvement.png -------------------------------------------------------------------------------- /assets/system_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/system_overview.png -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/README.md -------------------------------------------------------------------------------- /experiments/agents/main_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/agents/main_agent.py -------------------------------------------------------------------------------- /experiments/agents/searcher.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/agents/searcher.py -------------------------------------------------------------------------------- /experiments/algorithms/ams_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/ams_search.py -------------------------------------------------------------------------------- /experiments/algorithms/bayesian_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/bayesian_search.py -------------------------------------------------------------------------------- /experiments/algorithms/ce_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/ce_search.py -------------------------------------------------------------------------------- /experiments/algorithms/grid_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/grid_search.py -------------------------------------------------------------------------------- /experiments/algorithms/monte_carlo_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/monte_carlo_search.py -------------------------------------------------------------------------------- /experiments/algorithms/slurm_verification.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/slurm_verification.sh -------------------------------------------------------------------------------- /experiments/analyses/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/analyses/algorithm_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/algorithm_results.ipynb -------------------------------------------------------------------------------- /experiments/analyses/bar_chart_plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/bar_chart_plots.ipynb -------------------------------------------------------------------------------- /experiments/analyses/connect_verifications.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/connect_verifications.ipynb -------------------------------------------------------------------------------- /experiments/analyses/distribution_plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/distribution_plots.ipynb -------------------------------------------------------------------------------- /experiments/analyses/evaluation_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/evaluation_results.ipynb -------------------------------------------------------------------------------- /experiments/analyses/search_space_3d.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/search_space_3d.ipynb -------------------------------------------------------------------------------- /experiments/analyses/training_results.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/training_results.ipynb -------------------------------------------------------------------------------- /experiments/analyses/trajectory_plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/trajectory_plots.ipynb -------------------------------------------------------------------------------- /experiments/configs/ams_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ams_search.yaml -------------------------------------------------------------------------------- /experiments/configs/bayesian_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/bayesian_search.yaml -------------------------------------------------------------------------------- /experiments/configs/ce_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ce_search.yaml -------------------------------------------------------------------------------- /experiments/configs/env_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/env_config.yaml -------------------------------------------------------------------------------- /experiments/configs/evaluation_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/evaluation_config.yaml -------------------------------------------------------------------------------- /experiments/configs/grid_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/grid_search.yaml -------------------------------------------------------------------------------- /experiments/configs/monte_carlo_search.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/monte_carlo_search.yaml -------------------------------------------------------------------------------- /experiments/configs/ppo_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ppo_config.yaml -------------------------------------------------------------------------------- /experiments/configs/reward_tuning.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/reward_tuning.yaml -------------------------------------------------------------------------------- /experiments/configs/sac_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/sac_config.yaml -------------------------------------------------------------------------------- /experiments/configs/self_improvement.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/self_improvement.yaml -------------------------------------------------------------------------------- /experiments/configs/train_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/train_config.yaml -------------------------------------------------------------------------------- /experiments/evaluation/evaluate_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/evaluation/evaluate_model.py -------------------------------------------------------------------------------- /experiments/evaluation/slurm_eval.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/evaluation/slurm_eval.sh -------------------------------------------------------------------------------- /experiments/models/custom_torch_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/models/custom_torch_model.py -------------------------------------------------------------------------------- /experiments/results/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/training/ppo_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/ppo_train.py -------------------------------------------------------------------------------- /experiments/training/sac_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/sac_train.py -------------------------------------------------------------------------------- /experiments/training/self_improvement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/self_improvement.py -------------------------------------------------------------------------------- /experiments/training/slurm_train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/slurm_train.sh -------------------------------------------------------------------------------- /experiments/utils/reward_tuning.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/reward_tuning.py -------------------------------------------------------------------------------- /experiments/utils/reward_tuning_slurm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/reward_tuning_slurm.sh -------------------------------------------------------------------------------- /experiments/utils/scenarios.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/scenarios.py -------------------------------------------------------------------------------- /experiments/utils/training_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/training_utils.py -------------------------------------------------------------------------------- /experiments/utils/validation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/validation_utils.py -------------------------------------------------------------------------------- /highway_environment/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/README.md -------------------------------------------------------------------------------- /highway_environment/highway_environment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/__init__.py -------------------------------------------------------------------------------- /highway_environment/highway_environment/create_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/create_env.py -------------------------------------------------------------------------------- /highway_environment/highway_environment/default_configs/env_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/default_configs/env_config.yaml -------------------------------------------------------------------------------- /highway_environment/highway_environment/default_configs/train_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/default_configs/train_config.yaml -------------------------------------------------------------------------------- /highway_environment/highway_environment/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/__init__.py -------------------------------------------------------------------------------- /highway_environment/highway_environment/envs/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/environment.py -------------------------------------------------------------------------------- /highway_environment/highway_environment/envs/observation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/observation.py -------------------------------------------------------------------------------- /highway_environment/highway_environment/test_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/test_train.py -------------------------------------------------------------------------------- /highway_environment/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/setup.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/requirements.txt -------------------------------------------------------------------------------- /slides/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /slides/ICRA23PaperPresentation.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23PaperPresentation.pptx -------------------------------------------------------------------------------- /slides/ICRA23VideoPresentation.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23VideoPresentation.pptx -------------------------------------------------------------------------------- /slides/ICRA23_Poster_Presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23_Poster_Presentation.pdf --------------------------------------------------------------------------------