├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── .gitkeep
    ├── ICRA23_Video_Submission.mp4
    ├── highway-env.gif
    ├── self_improvement.png
    └── system_overview.png
├── experiments
    ├── README.md
    ├── agents
    │   ├── main_agent.py
    │   └── searcher.py
    ├── algorithms
    │   ├── ams_search.py
    │   ├── bayesian_search.py
    │   ├── ce_search.py
    │   ├── grid_search.py
    │   ├── monte_carlo_search.py
    │   └── slurm_verification.sh
    ├── analyses
    │   ├── .gitkeep
    │   ├── algorithm_results.ipynb
    │   ├── bar_chart_plots.ipynb
    │   ├── connect_verifications.ipynb
    │   ├── distribution_plots.ipynb
    │   ├── evaluation_results.ipynb
    │   ├── search_space_3d.ipynb
    │   ├── training_results.ipynb
    │   └── trajectory_plots.ipynb
    ├── configs
    │   ├── ams_search.yaml
    │   ├── bayesian_search.yaml
    │   ├── ce_search.yaml
    │   ├── env_config.yaml
    │   ├── evaluation_config.yaml
    │   ├── grid_search.yaml
    │   ├── monte_carlo_search.yaml
    │   ├── ppo_config.yaml
    │   ├── reward_tuning.yaml
    │   ├── sac_config.yaml
    │   ├── self_improvement.yaml
    │   └── train_config.yaml
    ├── evaluation
    │   ├── evaluate_model.py
    │   └── slurm_eval.sh
    ├── models
    │   └── custom_torch_model.py
    ├── results
    │   └── .gitkeep
    ├── training
    │   ├── ppo_train.py
    │   ├── sac_train.py
    │   ├── self_improvement.py
    │   └── slurm_train.sh
    └── utils
    │   ├── reward_tuning.py
    │   ├── reward_tuning_slurm.sh
    │   ├── scenarios.py
    │   ├── training_utils.py
    │   └── validation_utils.py
├── highway_environment
    ├── README.md
    ├── highway_environment
    │   ├── __init__.py
    │   ├── create_env.py
    │   ├── default_configs
    │   │   ├── env_config.yaml
    │   │   └── train_config.yaml
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── environment.py
    │   │   └── observation.py
    │   └── test_train.py
    └── setup.py
├── requirements.txt
└── slides
    ├── .gitkeep
    ├── ICRA23PaperPresentation.pptx
    ├── ICRA23VideoPresentation.pptx
    └── ICRA23_Poster_Presentation.pdf


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/README.md


--------------------------------------------------------------------------------
/assets/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/ICRA23_Video_Submission.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/ICRA23_Video_Submission.mp4


--------------------------------------------------------------------------------
/assets/highway-env.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/highway-env.gif


--------------------------------------------------------------------------------
/assets/self_improvement.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/self_improvement.png


--------------------------------------------------------------------------------
/assets/system_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/assets/system_overview.png


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/README.md


--------------------------------------------------------------------------------
/experiments/agents/main_agent.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/agents/main_agent.py


--------------------------------------------------------------------------------
/experiments/agents/searcher.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/agents/searcher.py


--------------------------------------------------------------------------------
/experiments/algorithms/ams_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/ams_search.py


--------------------------------------------------------------------------------
/experiments/algorithms/bayesian_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/bayesian_search.py


--------------------------------------------------------------------------------
/experiments/algorithms/ce_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/ce_search.py


--------------------------------------------------------------------------------
/experiments/algorithms/grid_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/grid_search.py


--------------------------------------------------------------------------------
/experiments/algorithms/monte_carlo_search.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/monte_carlo_search.py


--------------------------------------------------------------------------------
/experiments/algorithms/slurm_verification.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/algorithms/slurm_verification.sh


--------------------------------------------------------------------------------
/experiments/analyses/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/analyses/algorithm_results.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/algorithm_results.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/bar_chart_plots.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/bar_chart_plots.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/connect_verifications.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/connect_verifications.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/distribution_plots.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/distribution_plots.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/evaluation_results.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/evaluation_results.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/search_space_3d.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/search_space_3d.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/training_results.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/training_results.ipynb


--------------------------------------------------------------------------------
/experiments/analyses/trajectory_plots.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/analyses/trajectory_plots.ipynb


--------------------------------------------------------------------------------
/experiments/configs/ams_search.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ams_search.yaml


--------------------------------------------------------------------------------
/experiments/configs/bayesian_search.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/bayesian_search.yaml


--------------------------------------------------------------------------------
/experiments/configs/ce_search.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ce_search.yaml


--------------------------------------------------------------------------------
/experiments/configs/env_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/env_config.yaml


--------------------------------------------------------------------------------
/experiments/configs/evaluation_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/evaluation_config.yaml


--------------------------------------------------------------------------------
/experiments/configs/grid_search.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/grid_search.yaml


--------------------------------------------------------------------------------
/experiments/configs/monte_carlo_search.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/monte_carlo_search.yaml


--------------------------------------------------------------------------------
/experiments/configs/ppo_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/ppo_config.yaml


--------------------------------------------------------------------------------
/experiments/configs/reward_tuning.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/reward_tuning.yaml


--------------------------------------------------------------------------------
/experiments/configs/sac_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/sac_config.yaml


--------------------------------------------------------------------------------
/experiments/configs/self_improvement.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/self_improvement.yaml


--------------------------------------------------------------------------------
/experiments/configs/train_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/configs/train_config.yaml


--------------------------------------------------------------------------------
/experiments/evaluation/evaluate_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/evaluation/evaluate_model.py


--------------------------------------------------------------------------------
/experiments/evaluation/slurm_eval.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/evaluation/slurm_eval.sh


--------------------------------------------------------------------------------
/experiments/models/custom_torch_model.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/models/custom_torch_model.py


--------------------------------------------------------------------------------
/experiments/results/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/training/ppo_train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/ppo_train.py


--------------------------------------------------------------------------------
/experiments/training/sac_train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/sac_train.py


--------------------------------------------------------------------------------
/experiments/training/self_improvement.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/self_improvement.py


--------------------------------------------------------------------------------
/experiments/training/slurm_train.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/training/slurm_train.sh


--------------------------------------------------------------------------------
/experiments/utils/reward_tuning.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/reward_tuning.py


--------------------------------------------------------------------------------
/experiments/utils/reward_tuning_slurm.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/reward_tuning_slurm.sh


--------------------------------------------------------------------------------
/experiments/utils/scenarios.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/scenarios.py


--------------------------------------------------------------------------------
/experiments/utils/training_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/training_utils.py


--------------------------------------------------------------------------------
/experiments/utils/validation_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/experiments/utils/validation_utils.py


--------------------------------------------------------------------------------
/highway_environment/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/README.md


--------------------------------------------------------------------------------
/highway_environment/highway_environment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/__init__.py


--------------------------------------------------------------------------------
/highway_environment/highway_environment/create_env.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/create_env.py


--------------------------------------------------------------------------------
/highway_environment/highway_environment/default_configs/env_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/default_configs/env_config.yaml


--------------------------------------------------------------------------------
/highway_environment/highway_environment/default_configs/train_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/default_configs/train_config.yaml


--------------------------------------------------------------------------------
/highway_environment/highway_environment/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/__init__.py


--------------------------------------------------------------------------------
/highway_environment/highway_environment/envs/environment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/environment.py


--------------------------------------------------------------------------------
/highway_environment/highway_environment/envs/observation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/envs/observation.py


--------------------------------------------------------------------------------
/highway_environment/highway_environment/test_train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/highway_environment/test_train.py


--------------------------------------------------------------------------------
/highway_environment/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/highway_environment/setup.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/requirements.txt


--------------------------------------------------------------------------------
/slides/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/slides/ICRA23PaperPresentation.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23PaperPresentation.pptx


--------------------------------------------------------------------------------
/slides/ICRA23VideoPresentation.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23VideoPresentation.pptx


--------------------------------------------------------------------------------
/slides/ICRA23_Poster_Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/resuldagdanov/self-improving-RL/HEAD/slides/ICRA23_Poster_Presentation.pdf


--------------------------------------------------------------------------------