├── README.md
└── Safe-RL
├── AlwaysSafe
├── .gitignore
├── LICENSE
├── Pipfile
├── README.md
├── agents
│ ├── __init__.py
│ ├── abs_opt_cmdp.py
│ └── opt_cmdp.py
├── planners
│ ├── __init__.py
│ ├── abs_lp_optimistic.py
│ ├── lp.py
│ └── lp_optimistic.py
├── scripts
│ ├── .gitignore
│ ├── __init__.py
│ ├── cliff_walking.py
│ ├── factored.py
│ └── simple.py
├── tests
│ ├── __init__.py
│ ├── test_abs_opt_cmdp.py
│ ├── test_lp_agent.py
│ ├── test_lp_optimistic.py
│ ├── test_lp_optimistic_abs.py
│ ├── test_opt_cmdp.py
│ └── test_training.py
└── util
│ ├── __init__.py
│ ├── grb.py
│ ├── mdp.py
│ └── training.py
├── AutomotiveSafeRL
├── .gitignore
├── Project.toml
├── README.md
├── RNNFiltering
│ ├── RNNFiltering.jl
│ ├── bagging_training.jl
│ ├── data_generation.jl
│ ├── datagen.sh
│ ├── generate_data.sh
│ ├── generate_dataset.jl
│ ├── load_model_weights.jl
│ ├── model_loading.jl
│ ├── scp_model.sh
│ ├── train.sh
│ ├── train_single.sh
│ ├── train_tracking.jl
│ └── visualize_prediction.ipynb
├── evaluation
│ ├── evaluation.jl
│ ├── evaluation_functions.jl
│ ├── evaluation_script.sh
│ ├── helpers.jl
│ └── parallel_evaluation.jl
├── notebooks
│ ├── baseline.ipynb
│ ├── baseline_policy.ipynb
│ ├── car_mdp.ipynb
│ ├── crosswalk.ipynb
│ ├── decomposition.ipynb
│ ├── decomposition2.ipynb
│ ├── evaluation_scenarios.ipynb
│ ├── graphs.ipynb
│ ├── interactive_evaluation.ipynb
│ ├── joint_mask.ipynb
│ ├── joint_problem.ipynb
│ ├── ped_mdp.ipynb
│ ├── pedcar_mdp.ipynb
│ ├── plot_results.ipynb
│ ├── plots.ipynb
│ ├── profiling.ipynb
│ ├── qmdp_approximation.ipynb
│ ├── test.ipynb
│ └── tracking.ipynb
├── old_scripts
│ ├── accepting_states.jl
│ ├── baseline_script.jl
│ ├── carmdp_product.jl
│ ├── carmdp_script.jl
│ ├── carmdp_vi_until.jl
│ ├── evaluation_script.jl
│ ├── fast_pedcar_vi.jl
│ ├── joint_eval.jl
│ ├── jointmdp_script.jl
│ ├── pedcar_local_vi.jl
│ ├── pedcar_script.jl
│ ├── pedcar_sync.jl
│ ├── pedcar_vi.jl
│ ├── pedcar_vi_benchmark.jl
│ ├── pedcar_vi_eval.jl
│ ├── pedmdp_local_vi.jl
│ ├── pedmdp_script.jl
│ └── pedmdp_vi_until.jl
├── src
│ ├── baseline_policy.jl
│ ├── decomposed_tracking.jl
│ ├── decomposition.jl
│ ├── masked_dqn.jl
│ ├── masking.jl
│ ├── qmdp_approximation.jl
│ ├── render_helpers.jl
│ └── util.jl
├── test
│ ├── runtests.jl
│ ├── test_car_mdp.jl
│ ├── test_discretization.jl
│ ├── test_interpolation.jl
│ └── test_pedestrian_mdp.jl
└── training_scripts
│ ├── carmdp_training.jl
│ ├── dqn_jointeval.jl
│ ├── jointmdp_training.jl
│ ├── pedcar_dqn.jl
│ ├── pedcar_eval.jl
│ ├── pedcar_training.jl
│ ├── pedcar_vi.jl
│ ├── pedmdp_training.jl
│ ├── process_utility.jl
│ ├── sparse_vi.jl
│ ├── training.sh
│ ├── training.tex
│ └── until_dqn.jl
├── Constraint_RL_MPC
├── .idea
│ ├── Constraint_RL_MPC.iml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── workspace.xml
├── Abgabe
│ ├── Buffer
│ │ ├── ReplayBuffer.py
│ │ └── __pycache__
│ │ │ └── ReplayBuffer.cpython-35.pyc
│ ├── Disturbances
│ │ ├── external_disturbances_old.mat
│ │ ├── external_disturbances_randn.mat
│ │ └── external_disturbances_uniform.mat
│ ├── Model
│ │ ├── Linear_Env.py
│ │ └── __pycache__
│ │ │ └── Linear_Env.cpython-35.pyc
│ ├── Neural_Network
│ │ ├── Actor_Model.py
│ │ ├── Critic_Model.py
│ │ ├── NeuralNetwork.py
│ │ └── __pycache__
│ │ │ ├── Actor_Model.cpython-35.pyc
│ │ │ ├── Critic_Model.cpython-35.pyc
│ │ │ └── NeuralNetwork.cpython-35.pyc
│ ├── Normalize
│ │ ├── MinMax.py
│ │ └── __pycache__
│ │ │ └── MinMax.cpython-35.pyc
│ ├── Pre_training
│ │ ├── Immediate_constraint_functions.py
│ │ ├── Test_immediate_constraint_functions.py
│ │ ├── __pycache__
│ │ │ └── constraints.cpython-35.pyc
│ │ ├── constraints.py
│ │ ├── constraints_test_E_low_weights.h5f
│ │ ├── constraints_test_E_up_weights.h5f
│ │ ├── constraints_test_T_low_weights.h5f
│ │ ├── constraints_test_T_up_weights.h5f
│ │ └── readme.txt
│ ├── Training_MPC
│ │ ├── MPC.py
│ │ ├── Main_MPC.py
│ │ ├── Main_System_Identification.py
│ │ ├── SI_MPC_weights.h5f
│ │ ├── SI_MinMax.npy
│ │ ├── __pycache__
│ │ │ └── MPC.cpython-35.pyc
│ │ └── readme.txt
│ └── Training_RL
│ │ ├── DDPG.py
│ │ ├── Main_RL.py
│ │ ├── __pycache__
│ │ └── DDPG.cpython-35.pyc
│ │ ├── ddpg_Test1_5_weights_actor.h5f
│ │ ├── ddpg_Test1_5_weights_critic.h5f
│ │ ├── ddpg_Test2_5_weights_actor.h5f
│ │ ├── ddpg_Test2_5_weights_critic.h5f
│ │ ├── ddpg_Test3_5_weights_actor.h5f
│ │ ├── ddpg_Test3_5_weights_critic.h5f
│ │ └── readme.txt
└── README.md
├── LeaveNoTrace
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── coach_util.py
├── demo.py
├── env_util.py
├── envs
│ ├── __init__.py
│ ├── assets
│ │ ├── cliff_cheetah.xml
│ │ ├── cliff_walker.xml
│ │ ├── peg_insertion.xml
│ │ └── pusher.xml
│ ├── cliff_envs.py
│ ├── frozen_lake.py
│ ├── hopper.py
│ ├── peg_insertion.py
│ └── pusher.py
├── lnt.py
└── plot.png
├── PCPO
└── iclr_2020_code_submission.zip
├── RL-Safety-Algorithms
├── LICENSE
├── README.md
├── experiments
│ ├── benchmark_circle_tasks.py
│ ├── benchmark_gather_tasks.py
│ ├── benchmark_reach_tasks.py
│ ├── benchmark_run_tasks.py
│ └── safety_settings.py
├── rl_safety_algorithms
│ ├── __init__.py
│ ├── algs
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── cpo
│ │ │ ├── __init__.py
│ │ │ ├── cpo.py
│ │ │ └── defaults.py
│ │ ├── iwpg
│ │ │ ├── __init__.py
│ │ │ ├── defaults.py
│ │ │ └── iwpg.py
│ │ ├── lag-trpo
│ │ │ ├── __init__.py
│ │ │ ├── defaults.py
│ │ │ └── lag-trpo.py
│ │ ├── npg
│ │ │ ├── __init__.py
│ │ │ ├── defaults.py
│ │ │ └── npg.py
│ │ ├── pdo
│ │ │ ├── __init__.py
│ │ │ ├── defaults.py
│ │ │ └── pdo.py
│ │ ├── trpo
│ │ │ ├── __init__.py
│ │ │ ├── defaults.py
│ │ │ └── trpo.py
│ │ ├── utils.py
│ │ └── vtrace.py
│ ├── benchmark.py
│ ├── common
│ │ ├── __init__.py
│ │ ├── experiment_analysis.py
│ │ ├── loggers.py
│ │ ├── model.py
│ │ ├── mpi_tools.py
│ │ ├── multi_processing_utils.py
│ │ ├── online_mean_std.py
│ │ ├── trainer.py
│ │ └── utils.py
│ ├── play.py
│ └── train.py
├── setup.py
└── tests
│ ├── test_algs_mpi.py
│ ├── test_algs_single_thread.py
│ ├── test_gae.py
│ ├── test_mean_std.py
│ ├── test_mean_std_mpi.py
│ └── test_trust_region_utils.py
├── Safe-MBPO
├── .gitignore
├── LICENSE
├── README.md
├── config
│ ├── ant.json
│ ├── cheetah-no-flip.json
│ ├── hopper.json
│ └── humanoid.json
├── main.py
├── requirements.txt
└── src
│ ├── __init__.py
│ ├── checkpoint.py
│ ├── cli.py
│ ├── config.py
│ ├── defaults.py
│ ├── dynamics.py
│ ├── log.py
│ ├── normalization.py
│ ├── policy.py
│ ├── sampling.py
│ ├── shared.py
│ ├── smbpo.py
│ ├── squashed_gaussian.py
│ ├── ssac.py
│ ├── torch_util.py
│ ├── train.py
│ └── util.py
├── Safe-RL-Benchmark
├── .dockerignore
├── .gitignore
├── .travis.yml
├── LICENSE
├── Makefile
├── README.rst
├── SafeRLBench
│ ├── __init__.py
│ ├── algo
│ │ ├── README.rst
│ │ ├── __init__.py
│ │ ├── a3c.py
│ │ ├── policygradient.py
│ │ ├── q_learning.py
│ │ ├── safeopt.py
│ │ └── test.py
│ ├── base.py
│ ├── bench.py
│ ├── configuration.py
│ ├── envs
│ │ ├── README.rst
│ │ ├── __init__.py
│ │ ├── _quadrocopter
│ │ │ ├── __init__.py
│ │ │ ├── quadrocopter_classes.py
│ │ │ ├── quadrotor_dynamics.py
│ │ │ ├── quaternions.py
│ │ │ └── transformations.py
│ │ ├── general_mountaincar.py
│ │ ├── gym_wrap.py
│ │ ├── linear_car.py
│ │ ├── mdp.py
│ │ ├── quadrocopter.py
│ │ └── test.py
│ ├── error.py
│ ├── measure.py
│ ├── monitor.py
│ ├── policy
│ │ ├── __init__.py
│ │ ├── controller.py
│ │ ├── linear_policy.py
│ │ ├── neural_network.py
│ │ └── test.py
│ ├── spaces
│ │ ├── __init__.py
│ │ ├── bounded_space.py
│ │ ├── discrete_space.py
│ │ ├── rd_space.py
│ │ └── test.py
│ └── test
│ │ ├── test_bench.py
│ │ ├── test_configuration.py
│ │ ├── test_integration.py
│ │ └── test_measure.py
├── docs
│ ├── Makefile
│ ├── algorithm.rst
│ ├── api
│ │ ├── algo.rst
│ │ ├── bench.rst
│ │ ├── envs.rst
│ │ ├── measure.rst
│ │ ├── misc.rst
│ │ ├── policy.rst
│ │ ├── spaces.rst
│ │ └── srb.rst
│ ├── conf.py
│ ├── environment.rst
│ ├── index.rst
│ └── toc.rst
├── examples
│ ├── GettingStarted.ipynb
│ └── SafeOpt.ipynb
├── misc
│ ├── Dockerfile.python2
│ └── Dockerfile.python3
├── requirements.txt
├── requirements_dev.txt
├── setup.py
├── test_code.sh
└── tox.ini
├── Safe-Reinforcement-Learning
└── README.md
├── Safe_reinforcement_learning
├── README.md
├── Safe_RL_LQR_experiment.m
├── iterate_calculate.m
├── poster.pdf
├── quadconstr.m
├── quadhess.m
└── quadobj.m
├── Shield-Hybrid-Systems
├── .gitignore
├── Manifest.toml
├── Project.toml
├── README.md
├── Shared Code
│ ├── BBBarbaricReachabilityFunction.jl
│ ├── BBRigorousReachabilityFunction.jl
│ ├── BBShieldSynthesis.jl
│ ├── BBSquares.jl
│ ├── Ball.jl
│ ├── CCBarbaricReachabilityFunction.jl
│ ├── Cruise.jl
│ ├── DC-DC Converter.jl
│ ├── DCShielding.jl
│ ├── ExperimentUtilities.jl
│ ├── FlatUI.jl
│ ├── Get libbbshield.jl
│ ├── Get libccshield.jl
│ ├── Get libdcshield.jl
│ ├── Get libopshield.jl
│ ├── Get librwshield.jl
│ ├── OPShielding.jl
│ ├── OilPump.jl
│ ├── PlotsDefaults.jl
│ ├── RWShieldSynthesis.jl
│ ├── RWSquares.jl
│ ├── RandomWalk.jl
│ ├── ShieldSynthesis.jl
│ ├── Squares.jl
│ ├── libbbshield
│ │ ├── shield.c
│ │ └── shield_dump (sample).c
│ ├── libccshield
│ │ ├── postshield.c
│ │ ├── preshield.c
│ │ └── shield_dump (sample).c
│ ├── libdcshield
│ │ └── shield.c
│ ├── libopshield
│ │ ├── shield.c
│ │ └── shield_dump (sample).c
│ └── librwshield
│ │ ├── shield.c
│ │ └── shield_dump (sample).c
├── fig-BBGranularityCost
│ ├── Blueprints
│ │ ├── BB__Shielded.xml
│ │ └── TrainSaveEvaluateSingle.q
│ ├── ExtractQueryResults.jl
│ ├── Figure from CSV.jl
│ ├── Get libbbshield.jl
│ ├── Run Experiment.jl
│ └── Synthesize Set of Shields.jl
├── fig-BBShieldRobustness
│ ├── Check Robustness of Shields.jl
│ ├── Get libbbshield.jl
│ ├── Run Experiment.jl
│ └── StatisticalChecking.jl
├── fig-BBShieldingResultsGroup
│ ├── All Queries.py
│ ├── Blueprints
│ │ ├── BB__PostShielded.xml
│ │ ├── BB__PreShielded.xml
│ │ ├── BB__ShieldedLayabout.xml
│ │ ├── BB__Unshielded.xml
│ │ ├── PostShielded.q
│ │ ├── PreShielded.q
│ │ ├── ShieldedLayabout.q
│ │ └── UnShielded.q
│ ├── Example.png
│ ├── ReadMe.md
│ ├── ReadResults.jl
│ └── Run Experiment.jl
├── fig-BarbaricMethodAccuracy
│ ├── Example.png
│ ├── Reliability of Barbaric Method.jl
│ └── Run Experiment.jl
├── fig-CCShieldingResultsGroup
│ ├── All Queries.py
│ ├── Blueprints
│ │ ├── CC__PostShieldedDeterministic.xml
│ │ ├── CC__PostShieldedNondeterministic.xml
│ │ ├── CC__Shielded.xml
│ │ ├── CC__Unshielded.xml
│ │ ├── LoadEvaluate.q
│ │ ├── MinimizeCostEvaluate.q
│ │ ├── MinimizeInterventionsEvaluate.q
│ │ ├── NoStrategyEvaluate.q
│ │ ├── TrainSaveEvaluate.q
│ │ └── TrainSaveEvaluateSingle.q
│ ├── Example.png
│ ├── PostShield Strategy.jl
│ ├── ReadMe.md
│ ├── ReadResults.jl
│ └── Run Experiment.jl
├── fig-DCShieldingResultsGroup
│ ├── All Queries.py
│ ├── Blueprints
│ │ ├── DC__PostShielded.xml
│ │ ├── DC__PreShielded.xml
│ │ ├── DC__ShieldedLayabout.xml
│ │ ├── DC__Unshielded.xml
│ │ ├── PostShielded.q
│ │ ├── PreShielded.q
│ │ ├── ShieldedLayabout.q
│ │ └── Unshielded.q
│ ├── ReadMe.md
│ ├── ReadResults.jl
│ └── Run Experiment.jl
├── fig-DifferenceRigorousBarbaric
│ ├── Example.png
│ └── Run Experiment.jl
├── fig-NoRecovery
│ ├── BB No Recovery.jl
│ ├── Example.png
│ └── Run Experiment.jl
├── fig-OPShieldingResultsGroup
│ ├── All Queries.py
│ ├── Blueprints
│ │ ├── OP__PostShielded.xml
│ │ ├── OP__PreShielded.xml
│ │ ├── OP__ShieldedLayabout.xml
│ │ └── OP__Unshielded.xml
│ ├── Example.png
│ ├── OPStrategyVisualisation.jl
│ ├── ReadMe.md
│ ├── ReadResults.jl
│ └── Run Experiment.jl
├── fig-RWShieldingResultsGroup
│ ├── All Queries.py
│ ├── Blueprints
│ │ ├── PostShielded.q
│ │ ├── PreShielded.q
│ │ ├── RW__PostShielded.xml
│ │ ├── RW__PreShielded.xml
│ │ ├── RW__ShieldedLayabout.xml
│ │ ├── RW__Unshielded.xml
│ │ ├── ShieldedLayabout.q
│ │ └── Unshielded.q
│ ├── Example.png
│ ├── RandomWalk Shield.jl
│ ├── ReadMe.md
│ ├── ReadResults.jl
│ └── Run Experiment.jl
├── run_all.sh
├── tab-BBSynthesis
│ ├── Blueprints
│ │ ├── BB__PreShielded.xml
│ │ └── TrainSaveCheckSafety.q
│ ├── CheckSafetyOfPreshielded.jl
│ ├── Example.png
│ ├── ReadMe.md
│ ├── Run Experiment.jl
│ ├── Statistical Checking of Shield.jl
│ ├── Synthesize Set of Shields.jl
│ └── Table from CSVs.jl
├── tab-CCSynthesis
│ ├── Blueprints
│ │ ├── CC__PreShielded.xml
│ │ └── TrainSaveCheckSafety.q
│ ├── CC Statistical Checking of Shield.jl
│ ├── CC Synthesize Set of Shields.jl
│ ├── CheckSafetyOfPreshielded.jl
│ ├── Example.png
│ ├── ReadMe.md
│ ├── Run Experiment.jl
│ └── Table from CSVs.jl
├── tab-DCSynthesis
│ ├── Blueprints
│ │ ├── DC__PreShielded.xml
│ │ └── TrainSaveCheckSafety.q
│ ├── CheckSafetyOfPreshielded.jl
│ ├── DC Statistical Checking of Shield.jl
│ ├── DC Synthesize Set of Shields.jl
│ ├── DCShield.jl
│ ├── Run Experiment.jl
│ └── Table from CSVs.jl
├── tab-OPSynthesis
│ ├── Blueprints
│ │ ├── OP__PreShielded.xml
│ │ └── TrainSaveCheckSafety.q
│ ├── CheckSafetyOfPreshielded.jl
│ ├── OP Statistical Checking of Shield.jl
│ ├── OP Synthesize Set of Shields.jl
│ ├── OPShield.jl
│ ├── Run Experiment.jl
│ └── Table from CSVs.jl
└── tab-RWSynthesis
│ ├── Blueprints
│ ├── RW__PreShielded.xml
│ └── TrainSaveCheckSafety.q
│ ├── CheckSafetyOfPreshielded.jl
│ ├── Example.png
│ ├── RW Statistical Checking of Shield.jl
│ ├── RW Synthesize Set of Shields.jl
│ ├── ReadMe.md
│ ├── Run Experiment.jl
│ └── Table from CSVs.jl
├── safe-mbrl
├── .gitignore
├── baseline
│ ├── LICENSE
│ ├── README.md
│ ├── safe_rl
│ │ ├── __init__.py
│ │ ├── pg
│ │ │ ├── agents.py
│ │ │ ├── algos.py
│ │ │ ├── buffer.py
│ │ │ ├── network.py
│ │ │ ├── run_agent.py
│ │ │ ├── trust_region.py
│ │ │ └── utils.py
│ │ ├── sac
│ │ │ ├── __init__.py
│ │ │ └── sac.py
│ │ └── utils
│ │ │ ├── load_utils.py
│ │ │ ├── logx.py
│ │ │ ├── mpi_tf.py
│ │ │ ├── mpi_tools.py
│ │ │ ├── readme.md
│ │ │ ├── run_utils.py
│ │ │ └── serialization_utils.py
│ ├── scripts
│ │ ├── experiment.py
│ │ ├── plot.py
│ │ └── test_policy.py
│ └── setup.py
├── config.yml
├── data
│ ├── cg1
│ │ ├── cpo
│ │ │ └── cpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── ensemble-cem
│ │ │ ├── ensemble-cem_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-cem_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-cem_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-random
│ │ │ ├── ensemble-random_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-random_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-random_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-rce
│ │ │ ├── ensemble-rce_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-rce_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-rce_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── trpo-Lagrangian
│ │ │ └── trpo-Lagrangian
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── trpo
│ │ │ └── trpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ └── weights
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ ├── cg2
│ │ ├── cpo
│ │ │ └── cpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── ensemble-cem
│ │ │ ├── ensemble-cem_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-cem_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-cem_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-random
│ │ │ ├── ensemble-random_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-random_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-random_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-rce
│ │ │ ├── ensemble-rce_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-rce_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-rce_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── trpo-Lagrangian
│ │ │ └── trpo-Lagrangian
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── trpo
│ │ │ └── trpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ └── weights
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ ├── figures
│ │ ├── TestFigure3.png
│ │ ├── pg1-Cost.png
│ │ ├── pg1-Reward.png
│ │ ├── pg2-Cost.png
│ │ └── pg2-Reward.png
│ ├── pg1
│ │ ├── cpo
│ │ │ └── cpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── ensemble-cem
│ │ │ ├── ensemble-cem_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-cem_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-cem_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-random
│ │ │ ├── ensemble-random_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-random_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-random_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── ensemble-rce
│ │ │ ├── ensemble-rce_s0
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ ├── ensemble-rce_s10
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ │ └── ensemble-rce_s100
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── fix-dynamic-model-compare-optimizer
│ │ │ ├── model-ensemble-with-cem
│ │ │ │ └── model-ensemble-with-cem_s1000
│ │ │ │ │ ├── config.yml
│ │ │ │ │ └── progress.txt
│ │ │ ├── model-ensemble-with-random
│ │ │ │ └── model-ensemble-with-random_s1000
│ │ │ │ │ ├── config.yml
│ │ │ │ │ └── progress.txt
│ │ │ └── model-ensemble-with-ts
│ │ │ │ └── model-ensemble-with-ts_s1000
│ │ │ │ ├── config.yml
│ │ │ │ └── progress.txt
│ │ ├── trpo-Lagrangian
│ │ │ └── trpo-Lagrangian
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ ├── trpo
│ │ │ └── trpo
│ │ │ │ ├── config.json
│ │ │ │ └── progress.txt
│ │ └── weights
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ └── pg2
│ │ ├── cpo
│ │ ├── a-target10
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── b-target7.5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── c-target5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── d-target2.5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── e-target0.5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ └── f-target0.01
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── ensemble-cem
│ │ ├── ensemble-cem_s0
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── ensemble-cem_s10
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ └── ensemble-cem_s100
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── ensemble-random
│ │ ├── ensemble-random_s0
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── ensemble-random_s10
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ └── ensemble-random_s100
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── ensemble-rce
│ │ ├── ensemble-rce_s0
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── ensemble-rce_s10
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ └── ensemble-rce_s100
│ │ │ ├── config.yml
│ │ │ └── progress.txt
│ │ ├── trpo
│ │ └── trpo
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── trpo_lagrangian
│ │ ├── a-target10
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── b-target7.5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── c-target5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── d-taget2-5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ ├── f-target0.5
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ └── g-target0.01
│ │ │ ├── config.json
│ │ │ └── progress.txt
│ │ └── weights
│ │ ├── config.yml
│ │ └── progress.txt
├── env
│ ├── LICENSE
│ ├── README.md
│ ├── build
│ │ └── lib
│ │ │ └── safety_gym
│ │ │ ├── __init__.py
│ │ │ └── random_agent.py
│ ├── dist
│ │ └── safety_gym-0.0.0-py3.6.egg
│ ├── safety_gym.png
│ ├── safety_gym
│ │ ├── __init__.py
│ │ ├── bench
│ │ │ ├── bench_utils.py
│ │ │ └── characteristic_scores.json
│ │ ├── envs
│ │ │ ├── __init__.py
│ │ │ ├── engine.py
│ │ │ ├── mujoco.py
│ │ │ ├── suite-origin.py
│ │ │ ├── suite.py
│ │ │ └── world.py
│ │ ├── random_agent.py
│ │ ├── test
│ │ │ ├── test_bench.py
│ │ │ ├── test_button.py
│ │ │ ├── test_determinism.py
│ │ │ ├── test_engine.py
│ │ │ ├── test_envs.py
│ │ │ ├── test_goal.py
│ │ │ └── test_obs.py
│ │ └── xmls
│ │ │ ├── README.md
│ │ │ ├── car-origin.xml
│ │ │ ├── car.xml
│ │ │ ├── car_vel.xml
│ │ │ ├── doggo.xml
│ │ │ ├── point-origin.xml
│ │ │ ├── point.xml
│ │ │ └── rover4We.xml
│ └── setup.py
├── mbrl
│ ├── .gitignore
│ ├── __init__.py
│ ├── controllers
│ │ ├── __init__.py
│ │ ├── mpc_controller.py
│ │ └── safe_mpc_controller.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── constraint_model.py
│ │ ├── ensemble.py
│ │ └── model.py
│ └── optimizers
│ │ ├── __init__.py
│ │ ├── cem.py
│ │ ├── optimizer.py
│ │ ├── random.py
│ │ └── rce.py
├── media
│ ├── cg1_random.gif
│ ├── cg1_rce.gif
│ ├── cg2_random.gif
│ ├── cg2_rce.gif
│ ├── pg1_random.gif
│ ├── pg1_rce.gif
│ ├── pg1_trpo.gif
│ ├── pg1_trpol.gif
│ ├── pg2_random.gif
│ ├── pg2_rce.gif
│ ├── pg2_trpo_10.gif
│ └── pg2_trpol_10.gif
├── readme.md
├── requirements.txt
├── run.py
├── script
│ ├── count.py
│ └── plot.py
└── utils
│ ├── __init__.py
│ ├── env_utils.py
│ ├── logx.py
│ ├── mpi_pytorch.py
│ ├── mpi_tools.py
│ ├── plot.py
│ ├── run_entrypoint.py
│ ├── run_utils.py
│ ├── serialization_utils.py
│ └── user_config.py
├── safeRL
├── .gitignore
├── .gitmodules
├── HCOPE
│ ├── filter.py
│ ├── hcope.py
│ ├── hcope_debug.py
│ ├── hcope_test.py
│ └── policies.py
├── LICENSE.txt
├── README.md
├── README.md~
├── citation.cff
├── importance_sampling
│ ├── importance_sampling.png
│ └── importance_sampling.py
├── results
│ ├── IS_dist_+_0.1.png
│ ├── IS_dist_minus_0.1.png
│ ├── IS_dist_random.png
│ ├── IS_variance.png
│ ├── Result.png
│ ├── Theorem.png
│ ├── safe_actions.gif
│ ├── safe_actions_instability.gif
│ ├── safety_layer.png
│ ├── safety_optimization.png
│ └── safety_signal.png
└── safe_exploration
│ ├── filter.py
│ ├── learn_safety_function.py
│ ├── logz.py
│ ├── lqr_env.py
│ ├── optimizers.py
│ ├── plotSafetyFuct.py
│ ├── policies_safe.py
│ ├── run_policy_contrained.py
│ ├── shared_noise.py
│ ├── train_safe_explorer.py
│ └── utils.py
├── safe_learning
├── .dockerignore
├── .gitignore
├── .travis.yml
├── Dockerfile.dev
├── Dockerfile.python2
├── Dockerfile.python3
├── LICENSE
├── Makefile
├── README.rst
├── docs
│ ├── Makefile
│ ├── _templates
│ │ └── template.rst
│ ├── api.rst
│ ├── conf.py
│ ├── index.rst
│ ├── introduction.rst
│ ├── make.bat
│ └── requirements.txt
├── examples
│ ├── 1d_example.ipynb
│ ├── 1d_region_of_attraction_estimate.ipynb
│ ├── README.rst
│ ├── adaptive_safety_verification.ipynb
│ ├── basic_dynamic_programming.ipynb
│ ├── inverted_pendulum.ipynb
│ ├── lyapunov_function_learning.ipynb
│ ├── plotting.py
│ ├── reinforcement_learning_cartpole.ipynb
│ ├── reinforcement_learning_pendulum.ipynb
│ └── utilities.py
├── requirements.txt
├── requirements_dev.txt
├── safe_learning
│ ├── __init__.py
│ ├── configuration.py
│ ├── functions.py
│ ├── lyapunov.py
│ ├── reinforcement_learning.py
│ ├── tests
│ │ ├── test_functions.py
│ │ ├── test_lyapunov.py
│ │ ├── test_rl.py
│ │ └── test_utilities.py
│ └── utilities.py
├── scripts
│ ├── jupyter_output.py
│ └── test_code.sh
└── setup.py
├── safe_near_optimal_mdp
├── .gitignore
├── GPSG.png
├── LICENSE
├── README.md
├── arguments.py
├── data
│ └── simple
│ │ └── random_settings.npz
├── gp_safety_gym.py
├── main_oracle.py
├── main_safemdp.py
├── main_seo.py
├── main_sno_mdp.py
├── simple_make_rand_settings.py
├── test
│ └── test_gp_safety_gym.py
└── utils
│ ├── mdp_utilities.py
│ ├── reward_utilities.py
│ └── safety_utilities.py
├── safe_rl_papers
├── LICENSE
└── README.md
├── safety-starter-agents
├── .gitignore
├── LICENSE
├── README.md
├── safe_rl
│ ├── __init__.py
│ ├── pg
│ │ ├── agents.py
│ │ ├── algos.py
│ │ ├── buffer.py
│ │ ├── network.py
│ │ ├── run_agent.py
│ │ ├── trust_region.py
│ │ └── utils.py
│ ├── sac
│ │ ├── __init__.py
│ │ └── sac.py
│ └── utils
│ │ ├── load_utils.py
│ │ ├── logx.py
│ │ ├── mpi_tf.py
│ │ ├── mpi_tools.py
│ │ ├── readme.md
│ │ ├── run_utils.py
│ │ └── serialization_utils.py
├── scripts
│ ├── experiment.py
│ ├── plot.py
│ └── test_policy.py
└── setup.py
└── vertex-net
├── .gitignore
├── README.md
├── __init__.py
├── algos
├── __init__.py
└── ddpy.py
├── envs
├── __init__.py
├── hovercraft.py
└── pendulum.py
├── nets
├── __init__.py
├── policy_net.py
├── value_net.py
└── vertex_policy_net.py
├── run_hovercraft.py
├── run_pendulum.py
└── utils
├── __init__.py
└── replay_buffer.py
/Safe-RL/AlwaysSafe/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .idea
3 | notebooks/
4 | results/
5 | Pipfile.lock
6 | __pycache__
7 | *.pyc
8 |
9 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Thiago D. Simão
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | url = "https://pypi.python.org/simple"
3 | verify_ssl = true
4 | name = "pypi"
5 |
6 | [packages]
7 | matplotlib = "*"
8 | pandas = "*"
9 | tqdm = "*"
10 | gym = "*"
11 | cvxpy = "*"
12 |
13 | [packages.gym_factored]
14 | git = "git://github.com/tdsimao/gym-factored.git"
15 | editable = true
16 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .opt_cmdp import OptCMDPAgent
2 | from .abs_opt_cmdp import AbsOptCMDPAgent
3 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/planners/__init__.py:
--------------------------------------------------------------------------------
1 | from .lp import LinearProgrammingPlanner
2 | from .lp_optimistic import OptimisticLinearProgrammingPlanner
3 | from .abs_lp_optimistic import AbsOptimisticLinearProgrammingPlanner
4 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/scripts/.gitignore:
--------------------------------------------------------------------------------
1 | sandbox
2 |
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/scripts/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/tests/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/util/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/util/grb.py:
--------------------------------------------------------------------------------
1 | try:
2 | from gurobipy import Model, quicksum, GRB, GurobiError
3 | GUROBI_FOUND = True
4 | except ModuleNotFoundError as e:
5 | GUROBI_FOUND = False
6 |
7 |
8 | def solve_gurobi_lp(model, verbose=False, check_if_infeasible=False):
9 | if not verbose:
10 | model.Params.OutputFlag = 0
11 | model.optimize()
12 |
13 | if model.status == GRB.Status.INF_OR_UNBD:
14 | # Turn presolve off to determine whether model is infeasible or unbounded
15 | model.setParam(GRB.Param.Presolve, 0)
16 | model.optimize()
17 |
18 | if model.status == GRB.Status.OPTIMAL:
19 | # model.write('model.lp')
20 | # model.write('model.sol')
21 | if verbose:
22 | print('Optimal objective: {}'.format(model.objVal))
23 | return model
24 | elif model.status == GRB.Status.UNBOUNDED:
25 | model.write('model_unbounded.lp')
26 | raise GurobiError(model.status,
27 | 'Optimization stopped (UNBOUNDED), check the file model_unbounded.lp')
28 | elif model.status == GRB.Status.INFEASIBLE:
29 | if check_if_infeasible:
30 | model.write('model_infeasible.lp')
31 | model.computeIIS()
32 | model.write("model.ilp")
33 | raise GurobiError(model.status,
34 | 'Optimization stopped (INFEASIBLE), check files model_infeasible.lp and model.ilp')
35 | return model
36 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/.gitignore:
--------------------------------------------------------------------------------
1 | *.out
2 | *.jld
3 | *.jld2
4 | **/log*
5 | **/.ipynb_checkpoints
6 | *.lab
7 | *.tra
8 | *.csv
9 | *.webm
10 | *.bson
11 | *.hoa
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/datagen.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 generate_dataset.jl --seed=1 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen1.jodhpur.out &
2 | nohup julia1.0 generate_dataset.jl --seed=2 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen2.jodhpur.out &
3 | nohup julia1.0 generate_dataset.jl --seed=3 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen3.jodhpur.out &
4 | nohup julia1.0 generate_dataset.jl --seed=4 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen4.jodhpur.out &
5 | nohup julia1.0 generate_dataset.jl --seed=5 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen5.jodhpur.out &
6 |
7 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/generate_data.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 generate_dataset.jl --folder=/scratch/boutonm/ --ntrain=3000 --nval=500 --seed=1 > datagen.jodhpur.out &
2 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/model_loading.jl:
--------------------------------------------------------------------------------
1 | using Flux
2 | using StaticArrays
3 | using ProgressMeter
4 | using POMDPs
5 | using POMDPToolbox
6 | using AutomotiveDrivingModels
7 | using AutomotivePOMDPs
8 | using AutomotiveSensors
9 | using PedCar
10 | using BSON: @load
11 |
12 | mdp = PedCarMDP(pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7)
13 | pomdp = UrbanPOMDP(env=mdp.env,
14 | sensor = GaussianSensor(false_positive_rate=0.05,
15 | pos_noise = LinearNoise(min_noise=0.5, increase_rate=0.05),
16 | vel_noise = LinearNoise(min_noise=0.5, increase_rate=0.05)),
17 | ego_goal = LaneTag(2, 1),
18 | max_cars=1,
19 | max_peds=1,
20 | car_birth=0.7,
21 | ped_birth=0.7,
22 | obstacles=false, # no fixed obstacles
23 | lidar=false,
24 | ego_start=20,
25 | ΔT=0.5)
26 |
27 | rng = MersenneTwister(1)
28 | policy = RandomPolicy(rng, pomdp, VoidUpdater())
29 |
30 |
31 | @load "model_1.bson" model
32 | @load "weights_1.bson" weights
33 |
34 | @time mean(loss(val_X[i], val_Y[i]) for i=1:length(val_X))
35 |
36 | function loss(x, y)
37 | l = mean(Flux.mse.(model.(x), y))
38 | truncate!(model)
39 | reset!(model)
40 | return l
41 | end
42 |
43 | loss.(val_X, val_Y)
44 |
45 | xs = Flux.batchseq(val_X)
46 | ys = Flux.batchseq(val_Y)
47 | loss(xs, ys)
48 |
49 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/scp_model.sh:
--------------------------------------------------------------------------------
1 | scp boutonm@bethpage:/home/boutonm/AutomotiveSafeRL/training_scripts/RNNFiltering/*.bson .
2 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/train.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 bagging_training.jl --resume 10 --seed 10 > nn_1.jodhpur.out &
2 | nohup julia1.0 bagging_training.jl --resume 20 --seed 20 > nn_2.jodhpur.out &
3 | nohup julia1.0 bagging_training.jl --resume 30 --seed 30 > nn_3.jodhpur.out &
4 | nohup julia1.0 bagging_training.jl --resume 40 --seed 40 > nn_4.jodhpur.out &
5 | nohup julia1.0 bagging_training.jl --resume 50 --seed 50 > nn_5.jodhpur.out &
6 | #nohup julia1.0 bagging_training.jl --seed 6 > nn_6.jodhpur.out &
7 | #nohup julia1.0 bagging_training.jl --seed 7 > nn_7.jodhpur.out &
8 | #nohup julia1.0 bagging_training.jl --seed 8 > nn_8.jodhpur.out &
9 | #nohup julia1.0 bagging_training.jl --seed 9 > nn_9.jodhpur.out &
10 | #nohup julia1.0 bagging_training.jl --seed 10 > nn_10.jodhpur.out &
11 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/train_single.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 train_tracking.jl --seed=1 --entity=car > car1.jodhpur.out &
2 | nohup julia1.0 train_tracking.jl --seed=2 --entity=car > car2.jodhpur.out &
3 | nohup julia1.0 train_tracking.jl --seed=3 --entity=car > car3.jodhpur.out &
4 | nohup julia1.0 train_tracking.jl --seed=4 --entity=car > car4.jodhpur.out &
5 | nohup julia1.0 train_tracking.jl --seed=5 --entity=car > car5.jodhpur.out &
6 |
7 | nohup julia1.0 train_tracking.jl --seed=1 --entity=ped > ped1.jodhpur.out &
8 | nohup julia1.0 train_tracking.jl --seed=2 --entity=ped > ped2.jodhpur.out &
9 | nohup julia1.0 train_tracking.jl --seed=3 --entity=ped > ped3.jodhpur.out &
10 | nohup julia1.0 train_tracking.jl --seed=4 --entity=ped > ped4.jodhpur.out &
11 | nohup julia1.0 train_tracking.jl --seed=5 --entity=ped > ped5.jodhpur.out &
12 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_product.jl:
--------------------------------------------------------------------------------
1 | rng = MersenneTwister(1)
2 | using AutomotivePOMDPs
3 | using MDPModelChecking
4 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
5 | using DiscreteValueIteration
6 | using ProgressMeter, Parameters, JLD
7 |
8 | params = UrbanParams(nlanes_main=1,
9 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
10 | crosswalk_length = [14.0, 14., 14.0],
11 | crosswalk_width = [4.0, 4.0, 3.1],
12 | stop_line = 22.0)
13 | env = UrbanEnv(params=params);
14 |
15 | mdp = CarMDP(env = env, vel_res=2.0, pos_res=3.0);
16 |
17 | function MDPModelChecking.labels(mdp::CarMDP, s::CarMDPState)
18 | if s.crash
19 | return ["crash"]
20 | elseif s.ego.posF.s >= get_end(mdp.env.roadway[mdp.ego_goal]) &&
21 | get_lane(mdp.env.roadway, s.ego).tag == mdp.ego_goal
22 | return ["goal"]
23 | else
24 | return ["!crash", "!goal"]
25 | end
26 | end
27 |
28 | property = "!crash U goal"
29 |
30 | solver = ModelCheckingSolver(property=property, solver=ValueIterationSolver())
31 |
32 | policy = solve(solver, mdp, verbose=true)
33 |
34 | JLD.save("carmdp.jld", "policy", policy)
35 | JLD.save("car_acc_states.jld", "accepting_states", policy.mdp.accepting_states)
36 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_vi_until.jl:
--------------------------------------------------------------------------------
1 | rng = MersenneTwister(1)
2 | @everywhere begin
3 | using AutomotivePOMDPs
4 | using MDPModelChecking
5 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
6 | using DiscreteValueIteration
7 | using ProgressMeter, Parameters, JLD
8 | end
9 | params = UrbanParams(nlanes_main=1,
10 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
11 | crosswalk_length = [14.0, 14., 14.0],
12 | crosswalk_width = [4.0, 4.0, 3.1],
13 | stop_line = 22.0)
14 | env = UrbanEnv(params=params);
15 |
16 | mdp = CarMDP(env = env, pos_res=2., vel_res=3., car_birth=0.7)
17 |
18 | # reachability analysis
19 | mdp.collision_cost = 0.
20 | mdp.γ = 1.
21 | mdp.goal_reward = 1.
22 |
23 | solver = ParallelValueIterationSolver(n_procs=7)
24 |
25 | policy = solve(solver, mdp, verbose=true)
26 | JLD.save("car_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy)
27 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/pedcar_vi_benchmark.jl:
--------------------------------------------------------------------------------
1 | @everywhere begin
2 | using POMDPs, POMDPToolbox, DiscreteValueIteration
3 | using AutomotivePOMDPs, AutomotiveDrivingModels
4 | end
5 | rng = MersenneTwister(1)
6 |
7 | params = UrbanParams(nlanes_main=1,
8 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
9 | crosswalk_length = [14.0, 14., 14.0],
10 | crosswalk_width = [4.0, 4.0, 3.1],
11 | stop_line = 22.0)
12 | env = UrbanEnv(params=params);
13 |
14 | mdp = PedCarMDP(env=env, pos_res=6.0, vel_res=3.0, ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0))
15 | # reachability analysis
16 | mdp.collision_cost = 0.
17 | mdp.γ = 1.
18 | mdp.goal_reward = 1.
19 |
20 | solver = ParallelValueIterationSolver(n_procs=8, max_iterations=4, belres=1e-4)
21 | policy = solve(solver, mdp, verbose=true)
22 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/pedmdp_vi_until.jl:
--------------------------------------------------------------------------------
1 | rng = MersenneTwister(1)
2 | @everywhere begin
3 | using AutomotivePOMDPs
4 | using MDPModelChecking
5 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
6 | using DiscreteValueIteration
7 | using ProgressMeter, Parameters, JLD
8 | end
9 | params = UrbanParams(nlanes_main=1,
10 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
11 | crosswalk_length = [14.0, 14., 14.0],
12 | crosswalk_width = [4.0, 4.0, 3.1],
13 | stop_line = 22.0)
14 | env = UrbanEnv(params=params);
15 |
16 | mdp = PedMDP(env = env, pos_res=1., vel_res=1., ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0))
17 |
18 | # reachability analysis
19 | mdp.collision_cost = 0.
20 | mdp.γ = 1.
21 | mdp.goal_reward = 1.
22 |
23 | solver = ParallelValueIterationSolver(n_procs=7)
24 |
25 | policy = solve(solver, mdp, verbose=true)
26 | JLD.save("ped_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy)
27 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/runtests.jl:
--------------------------------------------------------------------------------
1 | using Base.Test
2 | using Parameters
3 | #dep
4 | include("../AutomotivePOMDPs/AutomotivePOMDPs.jl")
5 | using AutomotivePOMDPs
6 | using POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
7 | using GridInterpolations, StaticArrays
8 | include("mdp_models/discretization.jl")
9 | include("mdp_models/pedestrian_mdp/pomdp_types.jl")
10 | include("mdp_models/pedestrian_mdp/state_space.jl")
11 |
12 | rng = MersenneTwister(1)
13 |
14 | include("test_discretization.jl")
15 | include("test_pedestrian_mdp.jl")
16 | include("test_interpolation.jl")
17 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/test_car_mdp.jl:
--------------------------------------------------------------------------------
1 |
2 | function test_stateindexing(mdp::CarMDP)
3 | state_space = states(mdp)
4 | for (i, s) in enumerate(state_space)
5 | if i != stateindex(mdp, s)
6 | return false
7 | end
8 | end
9 | return true
10 | end
11 |
12 |
13 |
14 | params = UrbanParams(nlanes_main=1,
15 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
16 | crosswalk_length = [10.0, 10., 10.0],
17 | crosswalk_width = [4.0, 4.0, 3.1],
18 | stop_line = 22.0)
19 | env = UrbanEnv(params=params);
20 |
21 | mdp = CarMDP(env = env);
22 |
23 | @test test_stateindexing(mdp)
24 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/test_pedestrian_mdp.jl:
--------------------------------------------------------------------------------
1 |
2 |
3 | function test_stateindexing(mdp::PedMDP)
4 | state_space = states(mdp)
5 | for (i, s) in enumerate(state_space)
6 | if i != stateindex(mdp, s)
7 | return false
8 | end
9 | end
10 | return true
11 | end
12 |
13 |
14 |
15 | params = UrbanParams(nlanes_main=1,
16 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
17 | crosswalk_length = [10.0, 10., 10.0],
18 | crosswalk_width = [4.0, 4.0, 3.1],
19 | stop_line = 22.0)
20 | env = UrbanEnv(params=params);
21 |
22 | mdp = PedMDP(env = env);
23 |
24 | @test test_stateindexing(mdp)
25 |
--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/training_scripts/training.sh:
--------------------------------------------------------------------------------
1 | nohup julia jointmdp_training.jl --log log_nm100 --cost 2 > log100.out &
2 | nohup julia jointmdp_training.jl --log log_nm101 --cost 3 > log101.out &
3 | nohup julia jointmdp_training.jl --log log_nm102 --cost 0.5 > log102.out &
4 | nohup julia jointmdp_training.jl --log log_nm103 --cost 10 > log103.out &
5 | nohup julia jointmdp_training.jl --log log_nm104 --cost 20 > log104.out &
6 |
7 | #nohup julia jointmdp_script.jl --log log60 --goal 1 > log60.out &
8 | #nohup julia jointmdp_script.jl --log log61 --goal 1.5 > log61.out &
9 | #nohup julia jointmdp_script.jl --log log62 --goal 2 > log62.out &
10 | #nohup julia jointmdp_script.jl --log log63 --goal 3 > log63.out &
11 | #nohup julia jointmdp_script.jl --log log64 --goal 5 > log64.out &
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/Constraint_RL_MPC.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/ReplayBuffer.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 | import random
3 |
4 |
5 | class ReplayBuffer():
6 |
7 | def __init__(self, buffer_size):
8 | self.buffer_size = buffer_size
9 | self.num_experiences = 0
10 | self.buffer = deque()
11 |
12 | def size(self):
13 | return self.buffer_size
14 |
15 | def add_with_dist(self, state, action, reward, new_state, done, dist):
16 | experience = (state, action, reward, new_state, done, dist)
17 | if self.num_experiences < self.buffer_size:
18 | self.buffer.append(experience)
19 | self.num_experiences += 1
20 | else:
21 | self.buffer.popleft()
22 | self.buffer.append(experience)
23 |
24 | def add(self, state, action, reward, new_state, done):
25 | experience = (state, action, reward, new_state, done)
26 | if self.num_experiences < self.buffer_size:
27 | self.buffer.append(experience)
28 | self.num_experiences += 1
29 | else:
30 | self.buffer.popleft()
31 | self.buffer.append(experience)
32 |
33 | def sample(self, batch_size):
34 | # Randomly sample batch_size examples
35 | if self.num_experiences < batch_size:
36 | return random.sample(self.buffer, self.num_experiences)
37 | else:
38 | return random.sample(self.buffer, batch_size)
39 |
40 | def erase(self):
41 | self.buffer = deque()
42 | self.num_experiences = 0
43 |
44 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/MinMax.py:
--------------------------------------------------------------------------------
1 | """
2 | Min - max normalization
3 | """
4 |
5 |
6 | def minmax_norm(x, min_x, max_x):
7 | """
8 | This function normalizes data
9 | :param x: input data
10 | :param min_x: minimum value
11 | :param max_x: output data
12 | :return: normalized input data x_norm
13 | """
14 | x_norm = (x - min_x)/(max_x - min_x)
15 |
16 | return x_norm
17 |
18 |
19 | def minmax_norm_back(x_norm, min_x, max_x):
20 | """
21 | This function denormalizes data
22 | :param x_norm: input data
23 | :param min_x: minimum value
24 | :param max_x: output data
25 | :return: real input data x
26 | """
27 | x = x_norm * (max_x - min_x) + min_x
28 |
29 | return x
30 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | ##### Immediate_constraint_functions.py ##################################################################
3 |
4 | Pre-training phase to learn immediate constraint functions
5 | has to be evaluated for every constraint
6 |
7 |
8 | PARAMETER:
9 |
10 | num_samples = number of samples per episode
11 | num_episodes = number of episodes
12 |
13 | state_flag = 0-> Temperature low, 1-> Energy low, 2-> Temperature up, 3-> Energy up, define safety signal
14 |
15 | # define network parameters
16 | num_in
17 | num_out
18 | num_hidden
19 | activation
20 | activation_out
21 | optimizer
22 |
23 | OUTPUT:
24 |
25 | network weights of the trained network is saved in the same folder
26 |
27 |
28 | ##### Test_Immediate_constraint_functions.py ##############################################################
29 |
30 | evaluation of safety layer, to make sure that the constraints are working
31 | loads neural network weights, so they have to be trained before
32 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | ##### Main_System_Identification.py ################################################
3 |
4 | trains neural network for system identification of the model
5 | has to be evaluated first
6 |
7 | PARAMETER:
8 |
9 | num_samples = number of samples per episode
10 | num_episodes = number of episodes
11 |
12 | # network parameter
13 | num_hidden
14 | activation
15 | activation_out
16 | optimizer
17 |
18 | # model paramter
19 | Q
20 | R
21 |
22 | dist_flag = 0-> train without disturbances 1-> train with disturbances
23 |
24 |
25 | OUTPUT:
26 |
27 | network weights of the trained network is saved in the same folder
28 | evolution of error is plotted
29 |
30 |
31 | ##### Main_MPC.py ##################################################################
32 |
33 | Execution of the MPC algorithm with the trained network
34 |
35 |
36 | PARAMETER:
37 |
38 | network parameters have to be the same as the SI parameters
39 |
40 | N = prediction horizon
41 | S = samples to be evaluated
42 |
43 | OUTPUT:
44 |
45 | evolution of states and inputs is plotted
46 |
47 |
48 |
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f
--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | ##### Main_RL.py ##################################################################
3 |
4 | Training phase and test phase of the DDPG algorithm
5 | it can be evaluated together with MPC, if enabled
6 |
7 |
8 | PARAMETER:
9 |
10 | num_samples = number of samples per episode
11 | num_episodes = number of episodes
12 |
13 | episodesTrain = number of episodes for the training
14 | episodesTest = number of episodes for the test
15 | stepsEpisodes = number of samples per episode training
16 | stepsEpisodes_test = number of samples per episode training
17 |
18 | future_steps_tracing = number of steps the tracing trajectory is used from the future -> 0 = Nonw
19 | buffersize = size of replay buffer
20 |
21 | disturbance = 0 -> no disturbance , 1 -> added disturbance
22 | future_steps_dist = number of steps the disturbance is used from the future -> 0 = None
23 |
24 | # parameter of the noise process
25 | sigma
26 | theta
27 | mu
28 |
29 | constraints = Flag constraints [None, SafetyLayer, Rewardshaping]
30 |
31 | # Environmental details
32 | Q
33 | R
34 | ENV_NAME = Name where the weights are saved
35 |
36 |
37 | # MPC parameters
38 | do_MPC = Flag whether MPC should be evaluated
39 | N = Prediction horizon for MPC result
40 |
41 | OUTPUT:
42 |
43 | network weights of the trained network is saved in the same folder
44 | evolution of states and inputs is ploted
45 |
46 |
47 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "coach"]
2 | path = coach
3 | url = https://github.com/ben-eysenbach/coach.git
4 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/envs/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/frozen_lake.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv as _FrozenLakeEnv
2 | from gym import spaces
3 | import numpy as np
4 |
5 |
6 | class FrozenLakeEnv(_FrozenLakeEnv):
7 | """Modified version of FrozenLake-v0.
8 |
9 | 1. Convert integer states to one hot encoding.
10 | 2. Make the goal state reversible
11 | """
12 | def __init__(self, map_name):
13 | super(FrozenLakeEnv, self).__init__(map_name=map_name,
14 | is_slippery=False)
15 | self.observation_space = spaces.Box(low=np.zeros(self.nS),
16 | high=np.ones(self.nS))
17 | # Make the goal state not terminate
18 | goal_s = self.nS - 1
19 | left_s = goal_s - 1
20 | up_s = goal_s - int(np.sqrt(self.nS))
21 |
22 | self.P[goal_s] = {
23 | 0: [(1.0, left_s, 0.0, False)],
24 | 1: [(1.0, goal_s, 1.0, True)],
25 | 2: [(1.0, goal_s, 1.0, True)],
26 | 3: [(1.0, up_s, 0.0, True)],
27 | }
28 |
29 | def _s_to_one_hot(self, s):
30 | one_hot = np.zeros(self.nS)
31 | one_hot[s] = 1.
32 | return one_hot
33 |
34 | def step(self, a):
35 | (s, r, done, info) = super(FrozenLakeEnv, self).step(a)
36 | done = (s == self.nS - 1) # Assume we can't detect dangerous states
37 | one_hot = self._s_to_one_hot(s)
38 | r -= 1 # Make the reward be in {-1, 0}
39 | return (one_hot, r, done, info)
40 |
41 | def reset(self):
42 | s = super(FrozenLakeEnv, self).reset()
43 | one_hot = self._s_to_one_hot(s)
44 | return one_hot
45 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/hopper.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco.hopper import HopperEnv as _HopperEnv
2 |
3 |
4 | class HopperEnv(_HopperEnv):
5 | """Modified version of Hopper-v1."""
6 |
7 | def step(self, action):
8 | (obs, r, done, info) = super(HopperEnv, self).step(action)
9 | return (obs, r, False, info)
10 |
--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/plot.png
--------------------------------------------------------------------------------
/Safe-RL/PCPO/iclr_2020_code_submission.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/PCPO/iclr_2020_code_submission.zip
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Sven Gronauer, Technical University Munich (TUM)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/README.md:
--------------------------------------------------------------------------------
1 | # RL-Safety-Algorithms
2 |
3 | Algorithms for Safe Reinforcement Learning Problems that were tested and
4 | benchmarked in the
5 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym).
6 |
7 | ## Installation
8 |
9 | Install this repository with:
10 |
11 | ```
12 | git clone https://github.com/SvenGronauer/RL-Safety-Algorithms.git
13 |
14 | cd RL-Safety-Algorithms
15 |
16 | pip install -e .
17 | ```
18 |
19 |
20 | ## Getting Started
21 |
22 | Works with every environment that is compatible with the OpenAI Gym interface:
23 |
24 | ```
25 | python -m rl_safety_algorithms.train --alg trpo --env MountainCarContinuous-v0
26 | ```
27 |
28 | For an open-source framework to benchmark and test safety, we recommend the
29 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym). To train an
30 | algorithms such as Constrained Policy Optimization, run:
31 |
32 | ```
33 | python -m rl_safety_algorithms.train --alg cpo --env SafetyBallCircle-v0
34 | ```
35 |
36 | ## Benchmark
37 |
38 | In order to benchmark tasks from the
39 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym),
40 | we have prepared scripts in the `experiments` directory.
41 |
42 | ```
43 | cd experiments/
44 | python benchmark_circle_tasks.py
45 | ```
46 |
47 | In our experiments, we used a Threadripper 3990X CPU with 64 physical CPU cores,
48 | thus, we ran the experiments with the following flag for optimal MPI usage:
49 |
50 | ```
51 | python benchmark_circle_tasks.py --num-cores 64
52 | ```
53 |
54 | Plots from experiment runs can be also taken from the
55 | [Bullet-Safety-Gym Benchmarks](https://github.com/SvenGronauer/Bullet-Safety-Gym/blob/master/docs/benchmark.md)
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_circle_tasks.py:
--------------------------------------------------------------------------------
1 | from rl_safety_algorithms.benchmark import Benchmark
2 | import bullet_safety_gym # noqa
3 | from safety_settings import alg_setup, argument_parser
4 |
5 |
6 | def main(args):
7 | env_specific_kwargs = {
8 | 'SafetyBallCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000},
9 | 'SafetyCarCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000},
10 | 'SafetyDroneCircle-v0': {'epochs': 1000, 'steps_per_epoch': 64000},
11 | 'SafetyAntCircle-v0': {'epochs': 1500, 'steps_per_epoch': 64000},
12 | }
13 | bench = Benchmark(
14 | alg_setup,
15 | env_ids=list(env_specific_kwargs.keys()),
16 | log_dir=args.log_dir,
17 | num_cores=args.num_cores,
18 | num_runs=args.num_runs,
19 | env_specific_kwargs=env_specific_kwargs,
20 | use_mpi=True,
21 | init_seed=args.seed,
22 | )
23 | bench.run()
24 |
25 |
26 | if __name__ == '__main__':
27 | args = argument_parser()
28 | main(args)
29 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_gather_tasks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | from rl_safety_algorithms.benchmark import Benchmark
4 | import bullet_safety_gym # noqa
5 | from safety_settings import alg_setup, argument_parser
6 |
7 |
8 | def main(args):
9 | env_specific_kwargs = {
10 | 'SafetyBallGather-v0': {'epochs': 500, 'cost_limit': 0.2,
11 | 'steps_per_epoch': 32000},
12 | 'SafetyCarGather-v0': {'epochs': 500, 'cost_limit': 0.2,
13 | 'steps_per_epoch': 32000},
14 | 'SafetyDroneGather-v0': {'epochs': 1000, 'cost_limit': 0.2,
15 | 'steps_per_epoch': 64000},
16 | 'SafetyAntGather-v0': {'epochs': 1000, 'cost_limit': 0.2,
17 | 'steps_per_epoch': 64000}
18 | }
19 | bench = Benchmark(
20 | alg_setup,
21 | env_ids=list(env_specific_kwargs.keys()),
22 | log_dir=args.log_dir,
23 | num_cores=args.num_cores,
24 | num_runs=args.num_runs,
25 | env_specific_kwargs=env_specific_kwargs,
26 | use_mpi=True,
27 | init_seed=args.seed,
28 | )
29 | bench.run()
30 |
31 |
32 | if __name__ == '__main__':
33 | args = argument_parser()
34 | main(args)
35 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_reach_tasks.py:
--------------------------------------------------------------------------------
1 | from rl_safety_algorithms.benchmark import Benchmark
2 | import bullet_safety_gym # noqa
3 | from safety_settings import alg_setup, argument_parser
4 |
5 |
6 | def main(args):
7 | env_specific_kwargs = {
8 | 'SafetyBallReach-v0': {'epochs': 500, 'steps_per_epoch': 32000,
9 | 'cost_limit': 10}, # terminates after 250 steps
10 | 'SafetyCarReach-v0': {'epochs': 1000, 'steps_per_epoch': 32000,
11 | 'cost_limit': 10}, # terminates after 500 steps
12 | 'SafetyDroneReach-v0': {'epochs': 1000, 'steps_per_epoch': 64000,
13 | 'cost_limit': 10}, # terminates after 500 steps
14 | 'SafetyAntReach-v0': {'epochs': 1500, 'steps_per_epoch': 64000},
15 | }
16 | bench = Benchmark(
17 | alg_setup,
18 | env_ids=list(env_specific_kwargs.keys()),
19 | log_dir=args.log_dir,
20 | num_cores=args.num_cores,
21 | num_runs=args.num_runs,
22 | env_specific_kwargs=env_specific_kwargs,
23 | use_mpi=True,
24 | init_seed=args.seed
25 | )
26 | bench.run()
27 |
28 |
29 | if __name__ == '__main__':
30 | args = argument_parser()
31 | main(args)
32 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_run_tasks.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | from rl_safety_algorithms.benchmark import Benchmark
4 | import bullet_safety_gym # noqa
5 | from safety_settings import alg_setup, argument_parser
6 |
7 |
8 | def main(args):
9 | env_specific_kwargs = {
10 | 'SafetyBallRun-v0': {'epochs': 100, 'steps_per_epoch': 32000},
11 | 'SafetyCarRun-v0': {'epochs': 200, 'steps_per_epoch': 32000},
12 | 'SafetyDroneRun-v0': {'epochs': 500, 'steps_per_epoch': 64000},
13 | 'SafetyAntRun-v0': {'epochs': 500, 'steps_per_epoch': 64000},
14 | }
15 | bench = Benchmark(
16 | alg_setup,
17 | env_ids=list(env_specific_kwargs.keys()),
18 | log_dir=args.log_dir,
19 | num_cores=args.num_cores,
20 | num_runs=args.num_runs,
21 | env_specific_kwargs=env_specific_kwargs,
22 | use_mpi=True,
23 | init_seed=args.seed,
24 | )
25 | bench.run()
26 |
27 |
28 | if __name__ == '__main__':
29 | args = argument_parser()
30 | main(args)
31 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/safety_settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 |
4 |
5 | alg_setup = {
6 | 'trpo': {"target_kl": [0.001, 0.01]},
7 | 'lag-trpo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2],
8 | 'lambda_lr': [0.001, 0.01, 0.1]}, # SGD is default
9 | 'cpo': {'target_kl': [1.0e-4, 5.0e-4, 1.0e-3], 'lam_c': [0.50, 0.90, 0.95]},
10 | 'pdo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2],
11 | 'lambda_lr': [0.001, 0.01, 0.1]}, # Adam is default
12 | }
13 |
14 |
15 | def get_alg_setup():
16 | return alg_setup
17 |
18 |
19 | def argument_parser():
20 | n_cpus = os.cpu_count()
21 | parser = argparse.ArgumentParser(
22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter
23 | )
24 | parser.add_argument('--num-cores', '-c', type=int, default=n_cpus,
25 | help='Number of parallel processes generated.')
26 | parser.add_argument('--num-runs', '-r', type=int, default=4,
27 | help='Number of total runs that are executed.')
28 | parser.add_argument('--log-dir', type=str, default='/var/tmp/ga87zej',
29 | help='Define a custom directory for logging.')
30 | parser.add_argument('--seed', type=int, default=0,
31 | help='Define the initial seed.')
32 | args = parser.parse_args()
33 | return args
34 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/defaults.py:
--------------------------------------------------------------------------------
1 | def defaults():
2 | return dict(
3 | actor='mlp',
4 | ac_kwargs={
5 | 'pi': {'hidden_sizes': (64, 64),
6 | 'activation': 'tanh'},
7 | 'val': {'hidden_sizes': (64, 64),
8 | 'activation': 'tanh'}
9 | },
10 | adv_estimation_method='gae',
11 | epochs=300, # 9.8M steps
12 | gamma=0.99,
13 | lam_c=0.95,
14 | steps_per_epoch=64 * 1000, # default: 64k
15 | target_kl=0.0001,
16 | use_exploration_noise_anneal=True
17 | )
18 |
19 |
20 | def locomotion():
21 | """Default hyper-parameters for Bullet's locomotion environments."""
22 | params = defaults()
23 | params['epochs'] = 312
24 | params['max_ep_len'] = 1000
25 | params['steps_per_epoch'] = 32 * 1000
26 | params['vf_lr'] = 3e-4 # default choice is Adam
27 | return params
28 |
29 |
30 | # Hack to circumvent kwarg errors with the official PyBullet Envs
31 | def gym_locomotion_envs():
32 | params = locomotion()
33 | return params
34 |
35 |
36 | def gym_manipulator_envs():
37 | """Default hyper-parameters for Bullet's manipulation environments."""
38 | params = defaults()
39 | params['epochs'] = 312
40 | params['max_ep_len'] = 150
41 | params['steps_per_epoch'] = 32 * 1000
42 | params['vf_lr'] = 3e-4 # default choice is Adam
43 | return params
44 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/defaults.py:
--------------------------------------------------------------------------------
1 | """
2 | Define default parameters for Importance-weighted Policy Gradient (IWPG)
3 | algorithm.
4 | """
5 |
6 |
7 | def defaults():
8 | return dict(
9 | actor='mlp',
10 | ac_kwargs={
11 | 'pi': {'hidden_sizes': (64, 64),
12 | 'activation': 'tanh'},
13 | 'val': {'hidden_sizes': (64, 64),
14 | 'activation': 'tanh'}
15 | },
16 | adv_estimation_method='gae',
17 | epochs=300,
18 | gamma=0.99,
19 | steps_per_epoch=32 * 1000,
20 | # Early stopping criterion adds robustness towards hyper-parameters
21 | # see "Successful ingredients" Paper
22 | use_kl_early_stopping=True,
23 | )
24 |
25 |
26 | def locomotion():
27 | """Default hyper-parameters for Bullet's locomotion environments."""
28 | params = defaults()
29 | params['epochs'] = 312
30 | params['max_ep_len'] = 1000
31 | params['pi_lr'] = 3e-4 # default choice is Adam
32 | params['steps_per_epoch'] = 8 * 1000
33 | params['vf_lr'] = 3e-4 # default choice is Adam
34 | return params
35 |
36 |
37 | # Hack to circumvent kwarg errors with the official PyBullet Envs
38 | def gym_locomotion_envs():
39 | params = locomotion()
40 | return params
41 |
42 |
43 | def gym_manipulator_envs():
44 | """Default hyper-parameters for Bullet's manipulation environments."""
45 | params = defaults()
46 | params['epochs'] = 312
47 | params['max_ep_len'] = 150
48 | params['pi_lr'] = 3e-4 # default choice is Adam
49 | params['steps_per_epoch'] = 32 * 1000
50 | params['vf_lr'] = 3e-4 # default choice is Adam
51 | return params
52 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/defaults.py:
--------------------------------------------------------------------------------
1 | """
2 | Define default parameters for Lagrangian-TRPO algorithm.
3 | """
4 |
5 |
6 | def defaults():
7 | return dict(
8 | actor='mlp',
9 | ac_kwargs={
10 | 'pi': {'hidden_sizes': (64, 64),
11 | 'activation': 'tanh'},
12 | 'val': {'hidden_sizes': (64, 64),
13 | 'activation': 'tanh'}
14 | },
15 | adv_estimation_method='gae',
16 | epochs=300,
17 | gamma=0.99,
18 | steps_per_epoch=64 * 1000,
19 | use_exploration_noise_anneal=True
20 | )
21 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/defaults.py:
--------------------------------------------------------------------------------
1 | """
2 | Define default parameters for NPG algorithm.
3 | """
4 |
5 |
6 | def defaults():
7 | return dict(
8 | actor='mlp',
9 | ac_kwargs={
10 | 'pi': {'hidden_sizes': (64, 64),
11 | 'activation': 'tanh'},
12 | 'val': {'hidden_sizes': (64, 64),
13 | 'activation': 'tanh'}
14 | },
15 | adv_estimation_method='gae',
16 | epochs=300,
17 | gamma=0.99,
18 | steps_per_epoch=64 * 1000,
19 | target_kl=0.01,
20 | )
21 |
22 |
23 | def bullet():
24 | """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0."""
25 | return defaults()
26 |
27 |
28 | def gym_locomotion_envs():
29 | """Default hyper-parameters for Bullet's locomotion environments."""
30 | params = defaults()
31 | params['epochs'] = 312
32 | params['max_ep_len'] = 1000
33 | params['pi_lr'] = 1e-4 # default choice is Adam
34 | params['steps_per_epoch'] = 32 * 1000
35 | return params
36 |
37 |
38 | def gym_manipulator_envs():
39 | params = defaults()
40 | params['epochs'] = 312
41 | params['max_ep_len'] = 150
42 | params['pi_lr'] = 1e-4 # default choice is Adam
43 | params['steps_per_epoch'] = 32 * 1000
44 | return params
45 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/defaults.py:
--------------------------------------------------------------------------------
1 | def defaults():
2 | return dict(
3 | actor='mlp',
4 | ac_kwargs={
5 | 'pi': {'hidden_sizes': (64, 64),
6 | 'activation': 'tanh'},
7 | 'val': {'hidden_sizes': (64, 64),
8 | 'activation': 'tanh'}
9 | },
10 | adv_estimation_method='gae',
11 | epochs=300, # 9.8M steps
12 | gamma=0.99,
13 | lambda_lr=0.001,
14 | lambda_optimizer='Adam',
15 | steps_per_epoch=64 * 1000,
16 | target_kl=0.001,
17 | use_exploration_noise_anneal=True
18 | )
19 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/defaults.py:
--------------------------------------------------------------------------------
1 | """
2 | Define default parameters for NPG algorithm.
3 | """
4 |
5 |
6 | def defaults():
7 | return dict(
8 | actor='mlp',
9 | ac_kwargs={
10 | 'pi': {'hidden_sizes': (64, 64),
11 | 'activation': 'tanh'},
12 | 'val': {'hidden_sizes': (64, 64),
13 | 'activation': 'tanh'}
14 | },
15 | adv_estimation_method='gae',
16 | epochs=300,
17 | gamma=0.99,
18 | steps_per_epoch=64 * 1000, # default: 64k
19 | target_kl=0.01,
20 | use_exploration_noise_anneal=True
21 | )
22 |
23 |
24 | def bullet():
25 | """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0."""
26 | return defaults()
27 |
28 |
29 | def gym_locomotion_envs():
30 | """Default hyper-parameters for Bullet's locomotion environments."""
31 | params = defaults()
32 | params['epochs'] = 312
33 | params['max_ep_len'] = 1000
34 | params['steps_per_epoch'] = 64 * 1000
35 | return params
36 |
37 |
38 | def gym_manipulator_envs():
39 | params = defaults()
40 | params['epochs'] = 312
41 | params['max_ep_len'] = 150
42 | params['steps_per_epoch'] = 32 * 1000
43 | return params
44 |
45 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 | import sys
3 |
4 | if sys.version_info.major != 3:
5 | raise TypeError(
6 | 'This Python is only compatible with Python 3, but you are running '
7 | 'Python {}. The installation will likely fail.'.format(
8 | sys.version_info.major))
9 |
10 | with open("README.md", "r") as fh:
11 | long_description = fh.read()
12 |
13 | setuptools.setup(
14 | name="rl_safety_algorithms", # this is the name displayed in 'pip list'
15 | version="0.1",
16 | author="Sven Gronauer",
17 | author_email="sven.gronauer@tum.de",
18 | description="Algorithms for Safe Reinforcement Learning Problems.",
19 | install_requires=[
20 | 'mpi4py', # can be skipped if you want to use single threads
21 | 'numpy',
22 | 'torch'
23 | ],
24 | long_description=long_description,
25 | long_description_content_type="text/markdown",
26 | url="https://github.com/sven.gronauer",
27 | packages=setuptools.find_packages(),
28 | classifiers=[
29 | "Programming Language :: Python :: 3",
30 | "License :: OSI Approved :: MIT License",
31 | "Operating System :: OS Independent",
32 | ],
33 | )
34 |
--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/tests/test_algs_single_thread.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import gym
3 | import pybullet_envs # noqa
4 | import rl_safety_algorithms.common.utils as U
5 | from rl_safety_algorithms.algs import core
6 | import inspect
7 | import sys
8 | from rl_safety_algorithms.common.loggers import setup_logger_kwargs
9 |
10 |
11 | class TestAlgorithms(unittest.TestCase):
12 |
13 | @staticmethod
14 | def check_alg(alg_name, env_id):
15 | """" Run one epoch update with algorithm."""
16 | print(f'Run {alg_name}.')
17 | defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id)
18 | defaults['epochs'] = 1
19 | defaults['num_mini_batches'] = 4
20 | defaults['steps_per_epoch'] = 1000
21 | defaults['verbose'] = False
22 |
23 | defaults['logger_kwargs'] = setup_logger_kwargs(
24 | exp_name='unittest',
25 | seed=0,
26 | base_dir='/var/tmp/',
27 | datestamp=True,
28 | level=0,
29 | use_tensor_board=True,
30 | verbose=False)
31 | alg = U.get_alg_class(alg_name, env_id, **defaults)
32 | # sanity check of argument passing
33 | assert alg.alg == alg_name, f'Expected {alg_name} but got {alg.alg}'
34 | # return learn_fn(env_id, **defaults)
35 | ac, env = alg.learn()
36 |
37 | return ac, env
38 |
39 | def test_algorithms(self):
40 | """ Run all the specified algorithms."""
41 | algs = ['iwpg', 'npg', 'trpo', 'lag-trpo', 'pdo', 'cpo']
42 | for alg in algs:
43 | ac, env = self.check_alg(alg, 'HopperBulletEnv-v0')
44 | self.assertTrue(isinstance(env, gym.Env))
45 |
46 |
47 | if __name__ == '__main__':
48 | unittest.main()
49 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Garrett Thomas
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/README.md:
--------------------------------------------------------------------------------
1 | # Safe-MBPO
2 | Code for the NeurIPS 2021 paper "Safe Reinforcement Learning by Imagining the Near Future" by Garrett Thomas, Yuping Luo, and Tengyu Ma.
3 |
4 | Some code is borrowed from [Force](https://github.com/gwthomas/force).
5 |
6 | ## Installation
7 | We are using Python 3.8. The required packages can be installed via
8 |
9 | pip install -r requirements.txt
10 |
11 | You also must set the `ROOT_DIR` in `code/defaults.py`.
12 | This is where experiments' logs and checkpoints will be placed.
13 |
14 | Once setup is complete, run the code using the following command:
15 |
16 | python main.py -c config/ENV.json
17 |
18 | where ENV is replaced appropriately. To override a specific hyperparameter, add `-s PARAM VALUE` where `PARAM` is a string.
19 | Use `.` to specify hierarchical structure in the config, e.g. `-s alg_cfg.horizon 10`.
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/ant.json:
--------------------------------------------------------------------------------
1 | {
2 | "env_name": "ant",
3 | "alg_cfg": {
4 | "sac_cfg": {
5 | "target_entropy": -4.0
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/cheetah-no-flip.json:
--------------------------------------------------------------------------------
1 | {
2 | "env_name": "cheetah-no-flip",
3 | "alg_cfg": {
4 | "sac_cfg": {
5 | "target_entropy": -3.0
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/hopper.json:
--------------------------------------------------------------------------------
1 | {
2 | "env_name": "hopper",
3 | "alg_cfg": {
4 | "sac_cfg": {
5 | "target_entropy": -1.0
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/humanoid.json:
--------------------------------------------------------------------------------
1 | {
2 | "env_name": "humanoid",
3 | "alg_cfg": {
4 | "sac_cfg": {
5 | "target_entropy": -2.0
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | tqdm
4 | h5py
5 | opencv-python
6 | torch==1.4.0
7 | gym==0.17.2
8 | mujoco-py==2.0.2.13
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe-MBPO/src/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/defaults.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | PRECISION = 2
4 | OPTIMIZER = torch.optim.Adam
5 | BATCH_SIZE = 256
6 | ACTOR_LR = 3e-4
7 | CRITIC_LR = 1e-3
8 |
9 | # ROOT_DIR = None # set a path (directory) where experiments should be saved
10 | ROOT_DIR = '/tiger/u/gwthomas/data/smbpo'
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/normalization.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .torch_util import Module
4 |
5 |
6 | class Normalizer(Module):
7 | def __init__(self, dim, epsilon=1e-6):
8 | super().__init__()
9 | self.dim = dim
10 | self.epsilon = epsilon
11 | self.register_buffer('mean', torch.zeros(dim))
12 | self.register_buffer('std', torch.zeros(dim))
13 |
14 | def fit(self, X):
15 | assert torch.is_tensor(X)
16 | assert X.dim() == 2
17 | assert X.shape[1] == self.dim
18 | self.mean.data.copy_(X.mean(dim=0))
19 | self.std.data.copy_(X.std(dim=0))
20 |
21 | def forward(self, x):
22 | return (x - self.mean) / (self.std + self.epsilon)
23 |
24 | def unnormalize(self, normal_X):
25 | return self.mean + (self.std * normal_X)
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/shared.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from gym.wrappers import RescaleAction
4 |
5 | from .sampling import SampleBuffer
6 |
7 |
8 | def get_env(env_name, wrap_torch=True):
9 | from .env.torch_wrapper import TorchWrapper
10 | from .env.hopper_no_bonus import HopperNoBonusEnv
11 | from .env.cheetah_no_flip import CheetahNoFlipEnv
12 | from .env.ant_no_bonus import AntNoBonusEnv
13 | from .env.humanoid_no_bonus import HumanoidNoBonusEnv
14 | envs = {
15 | 'hopper': HopperNoBonusEnv,
16 | 'cheetah-no-flip': CheetahNoFlipEnv,
17 | 'ant': AntNoBonusEnv,
18 | 'humanoid': HumanoidNoBonusEnv
19 | }
20 | env = envs[env_name]()
21 | if not (np.all(env.action_space.low == -1.0) and np.all(env.action_space.high == 1.0)):
22 | env = RescaleAction(env, -1.0, 1.0)
23 | if wrap_torch:
24 | env = TorchWrapper(env)
25 | return env
26 |
27 |
28 | class SafetySampleBuffer(SampleBuffer):
29 | COMPONENT_NAMES = (*SampleBuffer.COMPONENT_NAMES, 'violations')
30 |
31 | def __init__(self, *args, **kwargs):
32 | super().__init__(*args, **kwargs)
33 | self._create_buffer('violations', torch.bool, [])
--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/squashed_gaussian.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch.nn.functional as F
3 | from torch import distributions as pd
4 |
5 |
6 | # Borrowed from https://github.com/denisyarats/pytorch_sac
7 |
8 | class TanhTransform(pd.transforms.Transform):
9 | env_name = pd.constraints.real
10 | coenv_name = pd.constraints.interval(-1.0, 1.0)
11 | bijective = True
12 | sign = +1
13 |
14 | def __init__(self, cache_size=1):
15 | super().__init__(cache_size=cache_size)
16 |
17 | @staticmethod
18 | def atanh(x):
19 | return 0.5 * (x.log1p() - (-x).log1p())
20 |
21 | def __eq__(self, other):
22 | return isinstance(other, TanhTransform)
23 |
24 | def _call(self, x):
25 | return x.tanh()
26 |
27 | def _inverse(self, y):
28 | # We do not clamp to the boundary here as it may degrade the performance of certain algorithms.
29 | # one should use `cache_size=1` instead
30 | return self.atanh(y)
31 |
32 | def log_abs_det_jacobian(self, x, y):
33 | # We use a formula that is more numerically stable, see details in the following link
34 | # https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
35 | return 2. * (math.log(2.) - x - F.softplus(-2. * x))
36 |
37 |
38 | class SquashedGaussian(pd.transformed_distribution.TransformedDistribution):
39 | def __init__(self, loc, scale, validate_args=None):
40 | base_dist = pd.Normal(loc, scale)
41 | super().__init__(base_dist, TanhTransform(), validate_args=validate_args)
42 |
43 | @property
44 | def mean(self):
45 | mu = self.base_dist.loc
46 | for transform in self.transforms:
47 | mu = transform(mu)
48 | return mu
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.dockerignore:
--------------------------------------------------------------------------------
1 | examples
2 | htmlcov
3 | .travis.yml
4 | .gitignore
5 | .git
6 | *.pyc
7 | .ipynb_checkpoints
8 | __pycache__
9 | SafeRLBench.egg-info
10 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .ipynb_checkpoints
3 | .DS_Store
4 | .idea
5 | .coverage
6 | covhtml
7 | MANIFEST
8 | _build
9 |
10 | *.pyc
11 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | sudo: required
4 |
5 | services:
6 | - docker
7 |
8 | env:
9 | - PYTHON=python2
10 | - PYTHON=python3
11 |
12 | # Setup docker container
13 | install:
14 | - docker build -f misc/Dockerfile.${PYTHON} -t test-image .
15 | - docker ps -a
16 | - ci_env=`bash <(curl -s https://codecov.io/env)`
17 |
18 | # Run tests
19 | script:
20 | - docker run test-image flake8 SafeRLBench --exclude "test*.py,__init__.py,_quadrocopter" --ignore=E402,W503 --show-source
21 | - docker run test-image flake8 SafeRLBench --filename="__init__.py,test*.py" --ignore=F,E402,W503 --show-source
22 | - docker run test-image pydocstyle SafeRLBench --match='(?!__init__).*\.py'
23 | - docker run $ci_env test-image /bin/bash -c "nosetests --with-doctest --with-coverage --cover-package=SafeRLBench --verbosity=2 SafeRLBench ; bash <(curl -s https://codecov.io/bash)"
24 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Felix Berkenkamp
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import logging
4 |
5 | from .configuration import SRBConfig
6 |
7 | # Initialize configuration
8 | config = SRBConfig(logging.getLogger(__name__))
9 |
10 | from .monitor import AlgoMonitor, EnvMonitor
11 | from .base import EnvironmentBase, Space, AlgorithmBase, Policy, ProbPolicy
12 | from .bench import Bench, BenchConfig
13 | from . import algo
14 | from . import envs
15 | from . import policy
16 | from . import spaces
17 | from . import error
18 | from . import measure
19 |
20 | # Add things to all
21 | __all__ = ['EnvironmentBase',
22 | 'Space',
23 | 'AlgorithmBase',
24 | 'Policy',
25 | 'ProbPolicy',
26 | 'AlgoMonitor',
27 | 'EnvMonitor',
28 | 'SRBConfig',
29 | 'Bench',
30 | 'BenchConfig',
31 | 'envs',
32 | 'algo',
33 | 'policy',
34 | 'spaces',
35 | 'measure',
36 | 'error']
37 |
38 |
39 | # Import test after __all__ (no documentation)
40 | # from numpy.testing import Tester
41 | # test = Tester().test
42 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/__init__.py:
--------------------------------------------------------------------------------
1 | """Algorithm Module.
2 |
3 | =================== =========================================
4 | Algorithm
5 | =============================================================
6 | A3C Asynchronous Actor-Critic Agents
7 | PolicyGradient Different Policy Gradient Implementations
8 | DiscreteQLearning Q-Learning using a table
9 | SafeOpt Bayesian Optimization with SafeOpt
10 | SafeOptSwarm Bayesion Optimization with SafeOptSwarm
11 | =================== =========================================
12 | """
13 |
14 | from .policygradient import PolicyGradient
15 | from .safeopt import SafeOpt, SafeOptSwarm
16 | from .a3c import A3C
17 | from .q_learning import DiscreteQLearning
18 |
19 | __all__ = ['PolicyGradient', 'SafeOpt', 'A3C', 'DiscreteQLearning',
20 | 'SafeOptSwarm']
21 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/test.py:
--------------------------------------------------------------------------------
1 | """Algorithm Tests."""
2 |
3 | from SafeRLBench.algo import PolicyGradient, A3C
4 | from SafeRLBench.envs import LinearCar
5 | from .policygradient import CentralFDEstimator, estimators
6 |
7 | from SafeRLBench.policy import NeuralNetwork
8 |
9 | from unittest2 import TestCase
10 | from mock import MagicMock, Mock
11 |
12 |
13 | class TestPolicyGradient(TestCase):
14 | """PolicyGradientTestClass."""
15 |
16 | def test_pg_init(self):
17 | """Test: POLICYGRADIENT: initialization."""
18 | env_mock = MagicMock()
19 | pol_mock = Mock()
20 |
21 | for key, item in estimators.items():
22 | pg = PolicyGradient(env_mock, pol_mock, estimator=key)
23 | self.assertIsInstance(pg.estimator, item)
24 |
25 | pg = PolicyGradient(env_mock, pol_mock, estimator=CentralFDEstimator)
26 | self.assertIsInstance(pg.estimator, CentralFDEstimator)
27 |
28 | self.assertRaises(ImportError, PolicyGradient,
29 | env_mock, pol_mock, CentralFDEstimator(env_mock))
30 |
31 |
32 | class TestA3C(TestCase):
33 | """A3C Test Class."""
34 |
35 | def test_a3c_init(self):
36 | """Test: A3C: initialization."""
37 | a3c = A3C(LinearCar(), NeuralNetwork([2, 6, 1]))
38 |
39 | fields = ['environment', 'policy', 'max_it', 'num_workers', 'rate',
40 | 'done', 'policy', 'p_net', 'v_net', 'workers', 'threads',
41 | 'global_counter', 'sess']
42 |
43 | for field in fields:
44 | assert hasattr(a3c, field)
45 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .general_mountaincar import GeneralMountainCar
4 | from .linear_car import LinearCar
5 | from .gym_wrap import GymWrap
6 | from .quadrocopter import Quadrocopter
7 | from .mdp import MDP
8 |
9 | __all__ = [
10 | 'GeneralMountainCar',
11 | 'LinearCar',
12 | 'GymWrap',
13 | 'Quadrocopter',
14 | 'MDP'
15 | ]
16 |
17 | # TODO: Envs: Add module docs in __init__ file.
18 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/_quadrocopter/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | from .quadrotor_dynamics import QuadrotorDynamics
4 | from .quadrocopter_classes import StateVector
5 |
6 | __all__ = ['QuadrotorDynamics', 'StateVector']
7 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from .linear_policy import LinearPolicy, NoisyLinearPolicy
2 | from .linear_policy import DiscreteLinearPolicy
3 | from .neural_network import NeuralNetwork
4 | from .controller import NonLinearQuadrocopterController
5 |
6 | __all__ = [
7 | 'LinearPolicy',
8 | 'NoisyLinearPolicy',
9 | 'DiscreteLinearPolicy',
10 | 'NeuralNetwork',
11 | 'NonLinearQuadrocopterController'
12 | ]
13 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, absolute_import
2 |
3 | from .rd_space import RdSpace
4 | from .bounded_space import BoundedSpace
5 | from .discrete_space import DiscreteSpace
6 |
7 | __all__ = ['RdSpace', 'BoundedSpace', 'DiscreteSpace']
8 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/discrete_space.py:
--------------------------------------------------------------------------------
1 | """Discrete space implementation."""
2 |
3 | from SafeRLBench import Space
4 |
5 | import numpy as np
6 |
7 |
8 | class DiscreteSpace(Space):
9 | """Discrete Space.
10 |
11 | Let d be the dimension of the space, then it will contain elements
12 | {0, 1, ... , dim-1}.
13 |
14 | Examples
15 | --------
16 | Create a `DiscreteSpace` with three states:
17 | >>> from SafeRLBench.spaces import DiscreteSpace
18 | >>> discrete_space = DiscreteSpace(3)
19 | """
20 |
21 | def __init__(self, dim):
22 | """Initialize `DiscreteSpace`.
23 |
24 | Parameters
25 | ----------
26 | dim : int
27 | Number of states.
28 | """
29 | assert dim > 0, ("If you need a discrete space without elements, you "
30 | + "do not need this class.")
31 | self._dim = dim
32 |
33 | def contains(self, x):
34 | """Check if element is part of the space."""
35 | return (isinstance(x, int) and x >= 0 and x < self._dim)
36 |
37 | def sample(self):
38 | """Sample an element of the space."""
39 | return np.random.randint(self._dim)
40 |
41 | @property
42 | def dimension(self):
43 | """Return dimension of the space."""
44 | return self._dim
45 |
46 | def __repr__(self):
47 | return 'DiscreteSpace(dim=%d)' % self._dim
48 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/rd_space.py:
--------------------------------------------------------------------------------
1 | """R^d with any shape."""
2 | import numpy as np
3 | from SafeRLBench import Space
4 |
5 |
6 | class RdSpace(Space):
7 | """R^d Vectorspace."""
8 |
9 | def __init__(self, shape):
10 | """Initialize with shape."""
11 | self.shape = shape
12 | self._dim = None
13 |
14 | def contains(self, x):
15 | """Check if element is contained."""
16 | return isinstance(x, np.ndarray) and x.shape == self.shape
17 |
18 | def sample(self):
19 | """Return arbitrary element."""
20 | return np.ones(self.shape)
21 |
22 | @property
23 | def dimension(self):
24 | """Return dimension of the space."""
25 | if self._dim is None:
26 | d = 1
27 | for i in range(len(self.shape)):
28 | d *= self.shape[i]
29 | self._dim = d
30 | return self._dim
31 |
32 | def __repr__(self):
33 | return 'RdSpace(shape=%s)' % str(self.shape)
34 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/test.py:
--------------------------------------------------------------------------------
1 | """Tests for spaces module."""
2 | from __future__ import absolute_import
3 |
4 | from functools import partial
5 | import inspect
6 |
7 | from numpy import array
8 | import SafeRLBench.spaces as spaces
9 |
10 |
11 | """Dictionary storing initialization arguments for classes."""
12 | class_arguments = {
13 | spaces.BoundedSpace: [array([-1, -2]), array([1, 0])],
14 | spaces.RdSpace: [(3, 2)],
15 | spaces.DiscreteSpace: [5]
16 | }
17 |
18 |
19 | class TestSpaces(object):
20 | """Wrap spaces tests."""
21 |
22 | classes = []
23 |
24 | @classmethod
25 | def setUpClass(cls):
26 | """Initialize classes list."""
27 | for name, c in inspect.getmembers(spaces):
28 | if inspect.isclass(c):
29 | cls.classes.append(c)
30 |
31 | def exhaustive_tests(self):
32 | """Check: Spaces tests initial values for testing."""
33 | for c in self.classes:
34 | if c not in class_arguments:
35 | assert(False)
36 |
37 | def generate_tests(self):
38 | """Generate tests for spaces implementations."""
39 | for c in self.classes:
40 | if c in class_arguments:
41 | check = partial(self.check_contains)
42 | check.description = ('Test: ' + c.__name__.upper()
43 | + ': implementation.')
44 | yield check, c
45 |
46 | def check_contains(self, c):
47 | """Check if contains and element is implemented."""
48 | space = c(*class_arguments[c])
49 | try:
50 | x = space.sample()
51 | b = space.contains(x)
52 | except NotImplementedError:
53 | assert(False)
54 | assert(b)
55 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = SafeRLBench
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/algorithm.rst:
--------------------------------------------------------------------------------
1 | Algorithms
2 | ==========
3 |
4 | .. include:: ../SafeRLBench/algo/README.rst
5 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/algo.rst:
--------------------------------------------------------------------------------
1 | Algorithm Module
2 | ================
3 |
4 | This module contains implementations of different algorithms. Please refer to
5 | the class documentation for detailed instructions on how to use them.
6 |
7 | .. contents:: Contents
8 | :local:
9 |
10 | AlgorithmBase
11 | -------------
12 |
13 | .. autoclass:: SafeRLBench.AlgorithmBase
14 | :members:
15 |
16 | A3C
17 | ---
18 |
19 | .. autoclass:: SafeRLBench.algo.A3C
20 | :members:
21 |
22 | Policy Gradient
23 | ---------------
24 |
25 | .. autoclass:: SafeRLBench.algo.PolicyGradient
26 | :members:
27 |
28 | Q-Learning
29 | ----------
30 |
31 | .. autoclass:: SafeRLBench.algo.DiscreteQLearning
32 | :members:
33 |
34 | SafeOpt
35 | -------
36 |
37 | .. autoclass:: SafeRLBench.algo.SafeOpt
38 | :members:
39 |
40 | SafeOptSwarm
41 | ------------
42 |
43 | .. autoclass:: SafeRLBench.algo.SafeOptSwarm
44 | :members:
45 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/bench.rst:
--------------------------------------------------------------------------------
1 | Benchmark
2 | =========
3 |
4 | .. contents:: Contents
5 | :local:
6 |
7 | Bench
8 | -----
9 |
10 | .. autoclass:: SafeRLBench.Bench
11 | :members:
12 |
13 | BenchConfig
14 | -----------
15 |
16 | .. autoclass:: SafeRLBench.BenchConfig
17 | :members:
18 |
19 | BenchRun
20 | --------
21 |
22 | .. autoclass:: SafeRLBench.bench.BenchRun
23 | :members:
24 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/envs.rst:
--------------------------------------------------------------------------------
1 | Environment Module
2 | ==================
3 |
4 | .. contents:: Contents
5 | :local:
6 |
7 | EnvironmentBase
8 | ---------------
9 |
10 | .. autoclass:: SafeRLBench.EnvironmentBase
11 | :members:
12 |
13 | GeneralMountainCar
14 | ------------------
15 |
16 | .. autoclass:: SafeRLBench.envs.GeneralMountainCar
17 | :members:
18 |
19 | GymWrap
20 | -------
21 |
22 | .. autoclass:: SafeRLBench.envs.GymWrap
23 | :members:
24 |
25 | LinearCar
26 | ---------
27 |
28 | .. autoclass:: SafeRLBench.envs.LinearCar
29 | :members:
30 |
31 | MDP
32 | ---
33 |
34 | .. autoclass:: SafeRLBench.envs.MDP
35 | :members:
36 |
37 | Quadrocopter
38 | ------------
39 |
40 | .. autoclass:: SafeRLBench.envs.Quadrocopter
41 | :members:
42 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/measure.rst:
--------------------------------------------------------------------------------
1 | Measure Module
2 | ==============
3 |
4 | .. contents:: Contents
5 | :local:
6 |
7 | Measure
8 | -------
9 |
10 | .. autoclass:: SafeRLBench.measure.Measure
11 | :members:
12 |
13 | BestPerformance
14 | ---------------
15 |
16 | .. autoclass:: SafeRLBench.measure.BestPerformance
17 | :members:
18 |
19 | SafetyMeasure
20 | -------------
21 |
22 | .. autoclass:: SafeRLBench.measure.SafetyMeasure
23 | :members:
24 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/misc.rst:
--------------------------------------------------------------------------------
1 | Miscellaneous
2 | =============
3 |
4 | .. contents:: Contents
5 | :local:
6 |
7 | Configuration
8 | -------------
9 |
10 | .. autoclass:: SafeRLBench.SRBConfig
11 | :members:
12 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/policy.rst:
--------------------------------------------------------------------------------
1 | Policy Module
2 | =============
3 |
4 | .. contents::
5 | :local:
6 |
7 | Bases
8 | -----
9 |
10 | Deterministic Policy Base
11 | ~~~~~~~~~~~~~~~~~~~~~~~~~
12 |
13 | .. autoclass:: SafeRLBench.Policy
14 | :members:
15 |
16 | Probabilistic Policy Base
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~
18 |
19 | .. autoclass:: SafeRLBench.ProbPolicy
20 | :members:
21 |
22 | Linear Policies
23 | ---------------
24 |
25 | LinearPolicy
26 | ~~~~~~~~~~~~
27 |
28 | .. autoclass:: SafeRLBench.policy.LinearPolicy
29 | :members:
30 |
31 | DiscreteLinearPolicy
32 | ~~~~~~~~~~~~~~~~~~~~
33 |
34 | .. autoclass:: SafeRLBench.policy.DiscreteLinearPolicy
35 | :members:
36 |
37 | NoisyLinearPolicy
38 | ~~~~~~~~~~~~~~~~~
39 |
40 | .. autoclass:: SafeRLBench.policy.NoisyLinearPolicy
41 | :members:
42 |
43 | NonLinearQuadrocopterController
44 | -------------------------------
45 |
46 | .. autoclass:: SafeRLBench.policy.NonLinearQuadrocopterController
47 | :members:
48 |
49 | NeuralNetwork
50 | -------------
51 |
52 | .. autoclass:: SafeRLBench.policy.NeuralNetwork
53 | :members:
54 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/spaces.rst:
--------------------------------------------------------------------------------
1 | Spaces Module
2 | =============
3 |
4 | .. contents:: Contents
5 | :local:
6 |
7 | Space
8 | -----
9 |
10 | .. autoclass:: SafeRLBench.Space
11 | :members:
12 |
13 | BoundedSpace
14 | ------------
15 |
16 | .. autoclass:: SafeRLBench.spaces.BoundedSpace
17 | :members:
18 |
19 | DiscreteSpace
20 | -------------
21 |
22 | .. autoclass:: SafeRLBench.spaces.DiscreteSpace
23 | :members:
24 |
25 | RdSpace
26 | -------
27 |
28 | .. autoclass:: SafeRLBench.spaces.RdSpace
29 | :members:
30 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/srb.rst:
--------------------------------------------------------------------------------
1 | API
2 | ===
3 |
4 | .. toctree::
5 |
6 | algo
7 | envs
8 | policy
9 | spaces
10 | measure
11 | bench
12 | misc
13 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/environment.rst:
--------------------------------------------------------------------------------
1 | Environments
2 | ============
3 |
4 | .. include:: ../SafeRLBench/envs/README.rst
5 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. SafeRLBench documentation master file, created by
2 | sphinx-quickstart on Mon Mar 27 16:08:01 2017.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | .. include:: ../README.rst
7 |
8 | .. include:: toc.rst
9 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/toc.rst:
--------------------------------------------------------------------------------
1 | Content
2 | =======
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | algorithm
8 | environment
9 | api/srb
10 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python2:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda
2 |
3 | ENV TF_CPP_MIN_LOG_LEVEL=2
4 |
5 | # Install build essentials and clean up
6 | RUN apt-get update --quiet \
7 | && apt-get install -y --no-install-recommends --quiet build-essential \
8 | && apt-get clean
9 |
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 | && apt-get clean
13 |
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 | && conda install python=2.7 pip numpy scipy nose --yes --quiet \
17 | && conda clean --yes --all \
18 | && hash -r
19 |
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 |
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 | && rm -rf /root/.cache
26 |
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 | && rm -rf /root/.cache
30 |
31 | # Install extra python2 requirements
32 | RUN pip --no-cache-dir install futures multiprocessing \
33 | && rm -rf /root/.cache
34 |
35 | # Install SafeOpt
36 | RUN git clone https://github.com/befelix/SafeOpt.git \
37 | && cd SafeOpt \
38 | && python setup.py install \
39 | && rm -rf /SafeOpt
40 |
41 | # Copy the main code
42 | COPY . /code
43 | RUN cd /code && python setup.py develop
44 |
45 | WORKDIR /code
46 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python3:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 |
3 | ENV TF_CPP_MIN_LOG_LEVEL=2
4 |
5 | # Install build essentials and clean up
6 | RUN apt-get update --quiet \
7 | && apt-get install -y --no-install-recommends --quiet build-essential \
8 | && apt-get clean
9 |
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 | && apt-get clean
13 |
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 | && conda install python=3.5 pip numpy scipy nose --yes --quiet \
17 | && conda clean --yes --all \
18 | && hash -r
19 |
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 |
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 | && rm -rf /root/.cache
26 |
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 | && rm -rf /root/.cache
30 |
31 | # Install SafeOpt
32 | RUN git clone https://github.com/befelix/SafeOpt.git \
33 | && cd SafeOpt \
34 | && python setup.py install \
35 | && rm -rf /SafeOpt
36 |
37 | # Copy the main code
38 | COPY . /code
39 | RUN cd /code && python setup.py develop
40 |
41 | WORKDIR /code
42 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.7
2 | scipy >= 0.19.0
3 | six >= 1.10
4 | futures >= 3.0.5
5 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | gym >= 0.8.0
2 | tensorflow >= 1.0.0
3 | GPy >= 1.6.1
4 |
5 | # Style testing
6 | flake8 >= 3.3.0
7 | pep8 >= 1.7.0
8 | pep8-naming >= 0.4.1
9 | pydocstyle >= 1.1.1
10 |
11 | # Unittesting
12 | nose >= 1.3.7
13 | nose-exclude >= 0.5.0
14 | coverage >= 4.3.4
15 | unittest2 >= 1.1.0
16 | mock >= 2.0.0
17 |
18 | # Documentation
19 | sphinx >= 1.5.3
20 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='SafeRLBench',
5 | version='1.0.1',
6 | author='Nicolas Ochsner',
7 | author_email='ochsnern@student.ethz.ch',
8 | packages=[
9 | 'SafeRLBench',
10 | 'SafeRLBench.algo',
11 | 'SafeRLBench.envs',
12 | 'SafeRLBench.spaces',
13 | 'SafeRLBench.policy',
14 | ],
15 | description='Safe Reinforcement Learning Benchmark',
16 | keywords='reinforcement-learning benchmark',
17 | url='https://github.com/befelix/Safe-RL-Benchmark',
18 | install_requires=[
19 | 'numpy >= 1.7',
20 | 'scipy >= 0.19.0',
21 | 'six >= 1.10',
22 | 'futures >= 3.0.5;python_version<"3.2"'
23 | ],
24 | extras_require={
25 | 'gym': ['gym >= 0.8.0'],
26 | 'safeopt': ['GPy >= 1.6.1', 'safeopt >= 0.1'],
27 | 'neural': ['tensorflow >= 1.0.0'],
28 | },
29 | dependency_links=[
30 | 'git+https://github.com/befelix/SafeOpt/tarball/master#egg=safeopt-0.1'
31 | ],
32 | )
33 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/test_code.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | module="SafeRLBench"
4 |
5 | get_script_dir () {
6 | SOURCE="${BASH_SOURCE[0]}"
7 | # While $SOURCE is a symlink, resolve it
8 | while [ -h "$SOURCE" ]; do
9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 | SOURCE="$( readlink "$SOURCE" )"
11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 | done
14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 | echo "$DIR"
16 | }
17 |
18 | # tensorflow environment variable
19 | export TF_CPP_MIN_LOG_LEVEL='3'
20 |
21 | # Change to script root
22 | cd $(get_script_dir)
23 | GREEN='\033[0;32m'
24 | NC='\033[0m'
25 |
26 | BOLD=$(tput bold)
27 | NORMAL=$(tput sgr0)
28 |
29 | # Run style tests
30 | echo -e "${GREEN}${BOLD}Running style tests:${NC}"
31 | flake8 $module --exclude test*.py,__init__.py,_quadrocopter --show-source
32 |
33 | # Ignore import errors for __init__ and tests
34 | flake8 $module --filename=__init__.py,test*.py --ignore=F --show-source
35 |
36 | echo -e "${GREEN}${BOLD}Testing docstring conventions:${NC}"
37 | # Test docstring conventions
38 | pydocstyle $module --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__"
39 |
40 | echo -e "${GREEN}${BOLD}Running unit tests in current environment.${NC}"
41 | nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=$module $module 2>&1 | grep -v "^Level "
42 |
43 | # Export html
44 | coverage html
45 |
--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27, py35
3 |
4 | [testenv]
5 | deps =
6 | nose
7 | numpy
8 | theano
9 | mock
10 | unittest2
11 | commands = nosetests
12 |
13 | [flake8]
14 | ignore = E402,W503,D105,D413
15 | exclude =
16 | SafeRLBench/envs/_quadrocopter*
17 |
18 | [pydocstyle]
19 | add_ignore = D203,D105,D413
20 | match_dir = '[^\.\_].*'
21 |
22 | [coverage:run]
23 | omit =
24 | */_quadrocopter*
25 |
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/README.md:
--------------------------------------------------------------------------------
1 | ## Description
2 | Codes for the constrained Linear-Quadratic Regulator (LQR) experiment.
3 | ## Reference
4 | Ming Yu, Zhuoran Yang, Mladen Kolar, and Zhaoran Wang. Convergent Policy Optimization for Safe Reinforcement Learning. In NeurIPS 2019.
5 | ## Run codes
6 | Run "Safe_RL_LQR_experiment.m"
7 |
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/iterate_calculate.m:
--------------------------------------------------------------------------------
1 | function X = iterate_calculate( Init, M, N )
2 | % this function iteratively solve for the following equation for X:
3 | % X = M + N'*X*N
4 | % starting from Init
5 |
6 | X = Init; diff = 1; iter = 0;
7 | while diff > 1e-3
8 | iter = iter + 1;
9 | X_old = X;
10 | X = M + N'*X*N;
11 | diff = norm(X_old - X);
12 | end
13 |
14 | end
15 |
16 |
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe_reinforcement_learning/poster.pdf
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadconstr.m:
--------------------------------------------------------------------------------
1 | function [y,yeq,grady,gradyeq] = quadconstr(x,H,k,d)
2 | jj = length(H); % jj is the number of inequality constraints
3 | y = zeros(1,jj);
4 | for i = 1:jj
5 | y(i) = 1/2*x'*H{i}*x + k{i}'*x + d{i};
6 | end
7 | yeq = [];
8 |
9 | if nargout > 2
10 | grady = zeros(length(x),jj);
11 | for i = 1:jj
12 | grady(:,i) = H{i}*x + k{i};
13 | end
14 | end
15 | gradyeq = [];
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadhess.m:
--------------------------------------------------------------------------------
1 | function hess = quadhess(x,lambda,Q,H)
2 | hess = Q;
3 | jj = length(H); % jj is the number of inequality constraints
4 | for i = 1:jj
5 | hess = hess + lambda.ineqnonlin(i)*H{i};
6 | end
--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadobj.m:
--------------------------------------------------------------------------------
1 | function [y,grady] = quadobj(x,Q,f,c)
2 | y = 1/2*x'*Q*x + f'*x + c;
3 | if nargout > 1
4 | grady = Q*x + f;
5 | end
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/.gitignore:
--------------------------------------------------------------------------------
1 | ## Julia ignores ##
2 |
3 | # Files generated by invoking Julia with --code-coverage
4 | *.jl.cov
5 | *.jl.*.cov
6 |
7 | # Files generated by invoking Julia with --track-allocation
8 | *.jl.mem
9 |
10 | # System-specific files and directories generated by the BinaryProvider and BinDeps packages
11 | # They contain absolute paths specific to the host computer, and so should not be committed
12 | deps/deps.jl
13 | deps/build.log
14 | deps/downloads/
15 | deps/usr/
16 | deps/src/
17 |
18 | # Build artifacts for creating documentation generated by the Documenter package
19 | docs/build/
20 | docs/site/
21 |
22 | # File generated by Pkg, the package manager, based on a corresponding Project.toml
23 | # It records a fixed state of all packages used by the project. As such, it should not be
24 | # committed for packages, but should be committed for applications that require a static
25 | # environment.
26 |
27 | # Manifest.toml
28 |
29 |
30 | ## C ##
31 |
32 | *.o
33 | *.so
34 |
35 |
36 | ## Additional Ignores ##
37 |
38 | # Don't want to commit changes to these
39 | shield_dump.c
40 |
41 | # My sync program keeps pooping in the corners
42 | *.insyncdl
43 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/Project.toml:
--------------------------------------------------------------------------------
1 | [deps]
2 | ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
3 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
4 | Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
5 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
6 | Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
7 | Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
8 | GridShielding = "d6812381-bd27-4ab8-a35f-a1c7ba1f8c22"
9 | HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
10 | InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
12 | LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
13 | Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
14 | MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
15 | Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
16 | NaturalSort = "c020b1a1-e9b0-503a-9c33-f039bfc54a85"
17 | PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
18 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
19 | Pluto = "c3e4b0f8-55cb-11ea-2926-15256bba5781"
20 | PlutoLinks = "0ff47ea0-7a50-410d-8455-4348d5de0420"
21 | PlutoSerialization = "89dfed0f-77d6-439b-aaac-839db4b25fb8"
22 | PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
23 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
24 | ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
25 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
26 | ReachabilityAnalysis = "1e97bd63-91d1-579d-8e8d-501d2b57c93f"
27 | Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
28 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
29 | StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
30 | Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
31 | Unzip = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
32 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/Shared Code/PlotsDefaults.jl:
--------------------------------------------------------------------------------
1 | # This may be the shortest code-file I've ever authored.
2 | Plots.default(fontfamily="serif-roman")
3 |
4 | halfpage = 300
5 |
6 | squeezed = (halfpage, 150)
7 | regular = (halfpage, 220)
8 | tall = (halfpage, 400)
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/Blueprints/TrainSaveEvaluateSingle.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded
11 |
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded
14 |
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/ExtractQueryResults.jl:
--------------------------------------------------------------------------------
1 | struct UppaalQueryFailedException <: Exception
2 | message::AbstractString
3 | end
4 |
5 | function extract_query_results(query_results::AbstractString)
6 | results = []
7 | open(query_results) do file
8 | for line in eachline(file)
9 | m_mean = match(r"mean=([\d.e-]+)", line)
10 | aborted = occursin(r"EXCEPTION: |is time-locked.|-- Aborted.", line)
11 |
12 | if aborted
13 | throw(UppaalQueryFailedException(line))
14 | end
15 |
16 | if m_mean === nothing
17 | continue
18 | end
19 |
20 | push!(results, m_mean[1])
21 | end
22 | end
23 |
24 | results
25 | end
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldRobustness/StatisticalChecking.jl:
--------------------------------------------------------------------------------
1 | function evaluate_safety(mechanics, policy, number_of_runs;
2 | run_duration=120,
3 | min_v_on_impact=1,
4 | unlucky=false)
5 |
6 | safety_violations_observed = 0
7 | unsafe_trace = []
8 | rand_step = eps()
9 |
10 | for run in 1:number_of_runs
11 | v, p = 0, rand(7:rand_step:10)
12 | # Simulate the ball for run_duration seconds
13 | vs, ps, ts = simulate_sequence(mechanics, v, p, policy, run_duration,
14 | min_v_on_impact=min_v_on_impact,
15 | unlucky=unlucky)
16 | # See if it ends at v=0, p=0
17 | if last(vs) == 0 && last(ps) == 0
18 | safety_violations_observed += 1
19 | end
20 | end
21 | (; safety_violations_observed, number_of_runs)
22 | end
23 |
24 | # It does not choose a random policy. It returns a policy that acts randomly.
25 | function random_policy(hit_chance)
26 | return (v, p) ->
27 | if rand(0:eps():1) <= hit_chance
28 | "hit"
29 | else
30 | "nohit"
31 | end
32 | end
33 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PostShielded.q:
--------------------------------------------------------------------------------
1 | //Load a strategy using deterrence in {1000, 100, 10, 0}, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy Deterrence1000 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence1000.strategy.json")
5 |
6 | /* formula 2 */
7 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence1000
8 |
9 | /* formula 3 */
10 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence1000
11 |
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:interventions) under Deterrence1000
14 |
15 | /* formula 5 */
16 | strategy Deterrence100 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence100.strategy.json")
17 |
18 | /* formula 6 */
19 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence100
20 |
21 | /* formula 7 */
22 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence100
23 |
24 | /* formula 8 */
25 | E[<=120;%checks%] (max:interventions) under Deterrence100
26 |
27 | /* formula 9 */
28 | strategy Deterrence10 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence10.strategy.json")
29 |
30 | /* formula 10 */
31 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence10
32 |
33 | /* formula 11 */
34 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence10
35 |
36 | /* formula 12 */
37 | E[<=120;%checks%] (max:interventions) under Deterrence10
38 |
39 | /* formula 13 */
40 | strategy Deterrence0 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence0.strategy.json")
41 |
42 | /* formula 14 */
43 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence0
44 |
45 | /* formula 15 */
46 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence0
47 |
48 | /* formula 16 */
49 | E[<=120;%checks%] (max:interventions) under Deterrence0
50 |
51 |
52 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded
11 |
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded
14 |
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
1 | //Evaluate the queries with no strategy applied
2 |
3 | /* formula 2 */
4 | E[<=120;%checks%] (max:LearnerPlayer.fired)
5 |
6 | /* formula 3 */
7 | E[<=120;%checks%] (max:(number_deaths > 0))
8 |
9 | /* formula 4 */
10 | E[<=120;%checks%] (max:interventions)
11 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeCostEvaluate.q:
--------------------------------------------------------------------------------
1 |
2 | /* formula 1 */
3 | strategy MinCost = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
4 |
5 | /* formula 2 */
6 | saveStrategy("%resultsdir%/MinCost.strategy.json", MinCost)
7 |
8 | /* formula 3 */
9 | E[<=120;%checks%] (max: D/1000) under MinCost
10 |
11 | /* formula 4 */
12 | E[<=120;%checks%] (max:(rDistance <= 0)) under MinCost
13 |
14 | /* formula 5 */
15 | E[<=120;%checks%] (max: interventions) under MinCost
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeInterventionsEvaluate.q:
--------------------------------------------------------------------------------
1 |
2 | /* formula 1 */
3 | strategy MinInterventions = minE (interventions) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
4 |
5 | /* formula 2 */
6 | saveStrategy("%resultsdir%/MinInterventions.strategy.json", MinInterventions)
7 |
8 | /* formula 3 */
9 | E[<=120;%checks%] (max: D/1000) under MinInterventions
10 |
11 | /* formula 4 */
12 | E[<=120;%checks%] (max:(rDistance <= 0)) under MinInterventions
13 |
14 | /* formula 5 */
15 | E[<=120;%checks%] (max: interventions) under MinInterventions
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/NoStrategyEvaluate.q:
--------------------------------------------------------------------------------
1 | //Evaluate the queries with no strategy applied
2 |
3 | /* formula 1 */
4 | E[<=120;%checks%] (max: D/1000)
5 |
6 | /* formula 2 */
7 | E[<=120;%checks%] (max:(rDistance <= 0))
8 |
9 | /* formula 3 */
10 | E[<=120;%checks%] (max:interventions)
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/TrainSaveEvaluateSingle.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 | // HACK: Since this query file is only used for PreShield, haven't implemented a way to count interventions. It will be zero, because I need a number to be printed.
3 |
4 | /* formula 1 */
5 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
6 |
7 | /* formula 2 */
8 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
9 |
10 | /* formula 3 */
11 | E[<=120;%checks%] (max: D/1000) under PreShielded
12 |
13 | /* formula 4 */
14 | E[<=120;%checks%] (max:(rDistance <= 0)) under PreShielded
15 |
16 | /* formula 5 */
17 | E[<=120;2] (max: 0)
18 |
19 |
20 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0) under PreShielded
11 |
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:number_deaths > 0) under PreShielded
14 |
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
1 | //Evaluate the queries with no strategy applied
2 |
3 | /* formula 2 */
4 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0)
5 |
6 | /* formula 3 */
7 | E[<=120;%checks%] (max:number_deaths > 0)
8 |
9 | /* formula 4 */
10 | E[<=120;%checks%] (max:interventions)
11 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PostShielded.q:
--------------------------------------------------------------------------------
1 | //Load a strategy using deterrence in {1000, 100, 10, 0}, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy Deterrence1000 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence1000.strategy.json")
5 |
6 | /* formula 2 */
7 | E[#<=30;%checks%] (max:total_cost) under Deterrence1000
8 |
9 | /* formula 3 */
10 | E[#<=30;%checks%00] (max:t>1) under Deterrence1000
11 |
12 | /* formula 4 */
13 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence1000
14 |
15 | /* formula 5 */
16 | strategy Deterrence100 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence100.strategy.json")
17 |
18 | /* formula 6 */
19 | E[#<=30;%checks%] (max:total_cost) under Deterrence100
20 |
21 | /* formula 7 */
22 | E[#<=30;%checks%00] (max:t>1) under Deterrence100
23 |
24 | /* formula 8 */
25 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence100
26 |
27 | /* formula 9 */
28 | strategy Deterrence10 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence10.strategy.json")
29 |
30 | /* formula 10 */
31 | E[#<=30;%checks%] (max:total_cost) under Deterrence10
32 |
33 | /* formula 11 */
34 | E[#<=30;%checks%00] (max:t>1) under Deterrence10
35 |
36 | /* formula 12 */
37 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence10
38 |
39 |
40 | /* formula 13 */
41 | strategy Deterrence0 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence10.strategy.json")
42 |
43 | /* formula 14 */
44 | E[#<=30;%checks%] (max:total_cost) under Deterrence0
45 |
46 | /* formula 15 */
47 | E[#<=30;%checks%00] (max:t>1) under Deterrence0
48 |
49 | /* formula 16 */
50 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence0
51 |
52 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | E[#<=30;%checks%] (max:total_cost) under PreShielded
11 |
12 | /* formula 4 */
13 | E[#<=30;%checks%00] (max:t>1) under PreShielded
14 |
15 | /* formula 5 */
16 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
1 | //Evaluate the queries with no strategy applied
2 |
3 | /* formula 1 */
4 | E[#<=30;%checks%] (max:total_cost)
5 |
6 | /* formula 2 */
7 | E[#<=30;%checks%00] (max:t>1)
8 |
9 | /* formula 3 */
10 | E[#<=30;%checks%] (max:100*interventions/(steps || 1))
11 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then check its safety.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | Pr[<=120] (<> number_deaths > 0) under PreShielded
11 |
12 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
1 | # Synthesize and Test Shields
2 |
3 | Synthesize shields and test their safety against different random agent.
4 | "Shield" is used as shorthand for a nondeterministic strategy that can be used to shield a learning agent or another strategy.
5 |
6 | A random agent is defined by it's `hit_chance` such that it will choose randomly between actions `(hit, nohit)` with probabilities `(1-hit_chance, hit_chance)`.
7 |
8 | Shields are synthesised using either a "barbaric" or "rigorous" reachability method.
9 | The rigorous method makes use of the library `ReachabilityAnalysis.jl` to over-approximate possible outcomes of the system. This gives theoretical guarantees for the safety, at the cost of more compute time and a less optimistic shield.
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice.
11 |
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "Run Experiment.jl"` from within this folder.
13 |
14 | Some of files are Pluto Notebooks, which by their nature are also valid standalone julia scripts.
15 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | Pr[<=120] (<> rDistance <= 0) under PreShielded
11 |
12 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
1 | # Synthesize and Test Cruise Control Shields
2 |
3 | Synthesize shields and test their safety on a random agent.
4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy.
5 |
6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used.
7 | It is the random agent with uniform chance of picking any action.
8 |
9 | Shields are synthesised using the "barbaric" reachability method only.
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice.
11 |
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder.
13 |
14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code found in `Shared Code`.
15 |
16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts.
17 |
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-DCSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | Pr[<=120] (<> number_deaths > 0) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-OPSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then check its safety.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (aov) [<=120] {p} -> {t, v}: <> elapsed >= 120
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | Pr[<=120] (<>(number_deaths > 0)) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
1 | // Train a single strategy, save it, then evaluate it.
2 |
3 | /* formula 1 */
4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1
5 |
6 | /* formula 2 */
7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
8 |
9 | /* formula 3 */
10 | Pr[#<=30] (<> t>1) under PreShielded
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png
--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
1 | # Synthesize and Test Cruise Control Shields
2 |
3 | Synthesize shields and test their safety on a random agent.
4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy.
5 |
6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used.
7 | It is the random agent with uniform chance of picking any action.
8 |
9 | Shields are synthesised using the "barbaric" reachability method only.
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice.
11 |
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder.
13 |
14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code found in `Shared Code`.
15 |
16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts.
17 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/.gitignore:
--------------------------------------------------------------------------------
1 | *.*~
2 | __pycache__/
3 | *.pkl
4 | **/*.egg-info
5 | .python-version
6 | .idea/
7 | .vscode/
8 | .DS_Store
9 | _build/
10 | data/*ppo*
11 | *.pickle
12 | .ipynb_checkpoints/
13 | *.ckpt
14 | #*.png
15 | *.pt
16 | */simple_save/*
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/__init__.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.util import deprecation as deprecation
2 | deprecation._PRINT_DEPRECATION_WARNINGS = False
3 |
4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo
5 | from safe_rl.sac.sac import sac
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/pg/trust_region.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from safe_rl.pg.utils import EPS
4 |
5 |
6 | """
7 | Tensorflow utilities for trust region optimization
8 | """
9 |
10 | def flat_concat(xs):
11 | return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0)
12 |
13 | def flat_grad(f, params):
14 | return flat_concat(tf.gradients(xs=params, ys=f))
15 |
16 | def hessian_vector_product(f, params):
17 | # for H = grad**2 f, compute Hx
18 | g = flat_grad(f, params)
19 | x = tf.placeholder(tf.float32, shape=g.shape)
20 | return x, flat_grad(tf.reduce_sum(g*x), params)
21 |
22 | def assign_params_from_flat(x, params):
23 | flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars
24 | splits = tf.split(x, [flat_size(p) for p in params])
25 | new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)]
26 | return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)])
27 |
28 |
29 | """
30 | Conjugate gradient
31 | """
32 |
33 | def cg(Ax, b, cg_iters=10):
34 | x = np.zeros_like(b)
35 | r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start.
36 | p = r.copy()
37 | r_dot_old = np.dot(r,r)
38 | for _ in range(cg_iters):
39 | z = Ax(p)
40 | alpha = r_dot_old / (np.dot(p, z) + EPS)
41 | x += alpha * p
42 | r -= alpha * z
43 | r_dot_new = np.dot(r,r)
44 | p = r + (r_dot_new / r_dot_old) * p
45 | r_dot_old = r_dot_new
46 | return x
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/pg/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.signal
3 |
4 | EPS = 1e-8
5 |
6 | def combined_shape(length, shape=None):
7 | if shape is None:
8 | return (length,)
9 | return (length, shape) if np.isscalar(shape) else (length, *shape)
10 |
11 | def keys_as_sorted_list(dict):
12 | return sorted(list(dict.keys()))
13 |
14 | def values_as_sorted_list(dict):
15 | return [dict[k] for k in keys_as_sorted_list(dict)]
16 |
17 | def discount_cumsum(x, discount):
18 | """
19 | magic from rllab for computing discounted cumulative sums of vectors.
20 |
21 | input:
22 | vector x,
23 | [x0,
24 | x1,
25 | x2]
26 |
27 | output:
28 | [x0 + discount * x1 + discount^2 * x2,
29 | x1 + discount * x2,
30 | x2]
31 | """
32 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
33 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/load_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import joblib
4 | import os
5 | import os.path as osp
6 | import tensorflow as tf
7 | from safe_rl.utils.logx import restore_tf_graph
8 |
9 | def load_policy(fpath, itr='last', deterministic=False):
10 |
11 | # handle which epoch to load from
12 | if itr=='last':
13 | saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11]
14 | itr = '%d'%max(saves) if len(saves) > 0 else ''
15 | else:
16 | itr = '%d'%itr
17 |
18 | # load the things!
19 | sess = tf.Session(graph=tf.Graph())
20 | model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr))
21 |
22 | # get the correct op for executing actions
23 | if deterministic and 'mu' in model.keys():
24 | # 'deterministic' is only a valid option for SAC policies
25 | print('Using deterministic action op.')
26 | action_op = model['mu']
27 | else:
28 | print('Using default action op.')
29 | action_op = model['pi']
30 |
31 | # make function for producing an action given a single state
32 | get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0]
33 |
34 | # try to load environment from save
35 | # (sometimes this will fail because the environment could not be pickled)
36 | try:
37 | state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl'))
38 | env = state['env']
39 | except:
40 | env = None
41 |
42 | return env, get_action, sess
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/readme.md:
--------------------------------------------------------------------------------
1 | # Utils
2 |
3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo).
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/run_utils.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os.path as osp
3 |
4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data')
5 |
6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True):
7 |
8 | # Make base path
9 | ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ''
10 | relpath = ''.join([ymd_time, exp_name])
11 |
12 | if seed is not None:
13 | # Make a seed-specific subfolder in the experiment directory.
14 | if datestamp:
15 | hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
16 | subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)])
17 | else:
18 | subfolder = ''.join([exp_name, '_s', str(seed)])
19 | relpath = osp.join(relpath, subfolder)
20 |
21 | data_dir = data_dir or DEFAULT_DATA_DIR
22 | logger_kwargs = dict(output_dir=osp.join(data_dir, relpath),
23 | exp_name=exp_name)
24 | return logger_kwargs
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | def convert_json(obj):
4 | """ Convert obj to a version which can be serialized with JSON. """
5 | if is_json_serializable(obj):
6 | return obj
7 | else:
8 | if isinstance(obj, dict):
9 | return {convert_json(k): convert_json(v)
10 | for k,v in obj.items()}
11 |
12 | elif isinstance(obj, tuple):
13 | return (convert_json(x) for x in obj)
14 |
15 | elif isinstance(obj, list):
16 | return [convert_json(x) for x in obj]
17 |
18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 | return convert_json(obj.__name__)
20 |
21 | elif hasattr(obj,'__dict__') and obj.__dict__:
22 | obj_dict = {convert_json(k): convert_json(v)
23 | for k,v in obj.__dict__.items()}
24 | return {str(obj): obj_dict}
25 |
26 | return str(obj)
27 |
28 | def is_json_serializable(v):
29 | try:
30 | json.dumps(v)
31 | return True
32 | except:
33 | return False
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup
4 | import sys
5 |
6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
7 | "Safety Starter Agents is designed to work with Python 3.6 and greater. " \
8 | + "Please install it before proceeding."
9 |
10 | setup(
11 | name='safe_rl',
12 | packages=['safe_rl'],
13 | install_requires=[
14 | 'gym~=0.15.3',
15 | 'joblib==0.14.0',
16 | 'matplotlib==3.1.1',
17 | 'mpi4py==3.0.2',
18 | 'mujoco_py==2.0.2.7',
19 | 'numpy~=1.17.4',
20 | 'seaborn==0.8.1',
21 | 'tensorflow==1.15.4',
22 | ],
23 | )
24 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg1/ensemble-cem/ensemble-cem_s10/config.yml:
--------------------------------------------------------------------------------
1 | arguments:
2 | config: ./data/config.yml
3 | correct: 0
4 | dir: data/cg1/
5 | ensemble: 5
6 | episode: 10
7 | epoch: 70
8 | level: 1
9 | load: null
10 | name: ensemble-cem
11 | obs_stack: false
12 | optimizer: cem
13 | render: false
14 | robot: car
15 | save: false
16 | seed: 10
17 | task: goal
18 | test: false
19 | cost_config:
20 | batch: 2000
21 | load: false
22 | load_folder: null
23 | max_ratio: 3
24 | model_param:
25 | boosting_type: gbdt
26 | learning_rate: 0.3
27 | max_depth: 8
28 | n_estimators: 400
29 | n_jobs: 1
30 | num_leaves: 12
31 | safe_buffer_size: 50000
32 | save: false
33 | save_folder: null
34 | unsafe_buffer_size: 10000
35 | dynamic_config:
36 | activation: relu
37 | batch_size: 256
38 | buffer_size: 500000
39 | data_split: 0.8
40 | hidden_sizes:
41 | - 1024
42 | - 1024
43 | - 1024
44 | learning_rate: 0.001
45 | load: false
46 | load_folder: null
47 | n_epochs: 70
48 | save: false
49 | save_folder: null
50 | test_freq: 5
51 | test_ratio: 0.15
52 | exp_name: ensemble-cem
53 | mpc_config:
54 | CCE:
55 | alpha: 0.1
56 | epsilon: 0.01
57 | init_mean: 0
58 | init_var: 1
59 | max_iters: 8
60 | minimal_elites: 5
61 | num_elites: 12
62 | popsize: 500
63 | CEM:
64 | alpha: 0.1
65 | epsilon: 0.01
66 | init_mean: 0
67 | init_var: 1
68 | max_iters: 8
69 | num_elites: 12
70 | popsize: 500
71 | RANDOM:
72 | popsize: 5000
73 | gamma: 0.98
74 | horizon: 8
75 | optimizer: CEM
76 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg1/weights/config.yml:
--------------------------------------------------------------------------------
1 | arguments:
2 | config: ./config.yml
3 | dir: data/cg1
4 | ensemble: 0
5 | episode: 10
6 | epoch: 80
7 | level: 1
8 | load: null
9 | name: rce
10 | optimizer: rce
11 | render: false
12 | robot: car
13 | save: true
14 | seed: 1
15 | test: false
16 | cost_config:
17 | batch: 2000
18 | load: false
19 | load_folder: null
20 | max_ratio: 3
21 | model_param:
22 | boosting_type: gbdt
23 | learning_rate: 0.3
24 | max_depth: 8
25 | n_estimators: 400
26 | n_jobs: 1
27 | num_leaves: 12
28 | safe_buffer_size: 50000
29 | save: true
30 | save_folder: data/cg1/rce/rce_s1
31 | unsafe_buffer_size: 10000
32 | dynamic_config:
33 | activation: relu
34 | batch_size: 256
35 | buffer_size: 500000
36 | data_split: 0.8
37 | hidden_sizes:
38 | - 1024
39 | - 1024
40 | - 1024
41 | learning_rate: 0.001
42 | load: false
43 | load_folder: null
44 | n_ensembles: 4
45 | n_epochs: 70
46 | save: true
47 | save_folder: data/cg1/rce/rce_s1
48 | test_freq: 5
49 | test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 | CEM:
53 | alpha: 0.1
54 | epsilon: 0.01
55 | init_mean: 0
56 | init_var: 1
57 | max_iters: 8
58 | num_elites: 12
59 | popsize: 500
60 | RANDOM:
61 | popsize: 5000
62 | RCE:
63 | alpha: 0.1
64 | epsilon: 0.01
65 | init_mean: 0
66 | init_var: 1
67 | max_iters: 8
68 | minimal_elites: 5
69 | num_elites: 12
70 | popsize: 500
71 | gamma: 0.98
72 | horizon: 8
73 | optimizer: RCE
74 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg2/weights/config.yml:
--------------------------------------------------------------------------------
1 | arguments:
2 | config: ./config.yml
3 | dir: data/cg2
4 | ensemble: 0
5 | episode: 10
6 | epoch: 80
7 | level: 2
8 | load: null
9 | name: rce
10 | optimizer: rce
11 | render: false
12 | robot: car
13 | save: true
14 | seed: 1
15 | test: false
16 | cost_config:
17 | batch: 2000
18 | load: false
19 | load_folder: null
20 | max_ratio: 3
21 | model_param:
22 | boosting_type: gbdt
23 | learning_rate: 0.3
24 | max_depth: 8
25 | n_estimators: 400
26 | n_jobs: 1
27 | num_leaves: 12
28 | safe_buffer_size: 50000
29 | save: true
30 | save_folder: data/cg2/rce/rce_s1
31 | unsafe_buffer_size: 10000
32 | dynamic_config:
33 | activation: relu
34 | batch_size: 256
35 | buffer_size: 500000
36 | data_split: 0.8
37 | hidden_sizes:
38 | - 1024
39 | - 1024
40 | - 1024
41 | learning_rate: 0.001
42 | load: false
43 | load_folder: null
44 | n_ensembles: 4
45 | n_epochs: 70
46 | save: true
47 | save_folder: data/cg2/rce/rce_s1
48 | test_freq: 5
49 | test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 | CEM:
53 | alpha: 0.1
54 | epsilon: 0.01
55 | init_mean: 0
56 | init_var: 1
57 | max_iters: 8
58 | num_elites: 12
59 | popsize: 500
60 | RANDOM:
61 | popsize: 5000
62 | RCE:
63 | alpha: 0.1
64 | epsilon: 0.01
65 | init_mean: 0
66 | init_var: 1
67 | max_iters: 8
68 | minimal_elites: 5
69 | num_elites: 12
70 | popsize: 500
71 | gamma: 0.98
72 | horizon: 8
73 | optimizer: RCE
74 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/TestFigure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/TestFigure3.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg1-Cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Cost.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg1-Reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Reward.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg2-Cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Cost.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg2-Reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Reward.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/pg1/weights/config.yml:
--------------------------------------------------------------------------------
1 | arguments:
2 | config: ./config.yml
3 | dir: data/pg1
4 | ensemble: 0
5 | episode: 10
6 | epoch: 80
7 | level: 1
8 | load: null
9 | name: rce
10 | optimizer: rce
11 | render: false
12 | robot: point
13 | save: true
14 | seed: 1
15 | test: false
16 | cost_config:
17 | batch: 2000
18 | load: false
19 | load_folder: null
20 | max_ratio: 3
21 | model_param:
22 | boosting_type: gbdt
23 | learning_rate: 0.3
24 | max_depth: 8
25 | n_estimators: 400
26 | n_jobs: 1
27 | num_leaves: 12
28 | safe_buffer_size: 50000
29 | save: true
30 | save_folder: data/pg1/rce/rce_s1
31 | unsafe_buffer_size: 10000
32 | dynamic_config:
33 | activation: relu
34 | batch_size: 256
35 | buffer_size: 500000
36 | data_split: 0.8
37 | hidden_sizes:
38 | - 1024
39 | - 1024
40 | - 1024
41 | learning_rate: 0.001
42 | load: false
43 | load_folder: null
44 | n_ensembles: 4
45 | n_epochs: 70
46 | save: true
47 | save_folder: data/pg1/rce/rce_s1
48 | test_freq: 5
49 | test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 | CEM:
53 | alpha: 0.1
54 | epsilon: 0.01
55 | init_mean: 0
56 | init_var: 1
57 | max_iters: 8
58 | num_elites: 12
59 | popsize: 500
60 | RANDOM:
61 | popsize: 5000
62 | RCE:
63 | alpha: 0.1
64 | epsilon: 0.01
65 | init_mean: 0
66 | init_var: 1
67 | max_iters: 8
68 | minimal_elites: 5
69 | num_elites: 12
70 | popsize: 500
71 | gamma: 0.98
72 | horizon: 8
73 | optimizer: RCE
74 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/pg2/weights/config.yml:
--------------------------------------------------------------------------------
1 | arguments:
2 | config: ./config.yml
3 | dir: data/pg2
4 | ensemble: 0
5 | episode: 10
6 | epoch: 80
7 | level: 2
8 | load: null
9 | name: rce
10 | optimizer: rce
11 | render: false
12 | robot: point
13 | save: true
14 | seed: 1
15 | test: false
16 | cost_config:
17 | batch: 2000
18 | load: false
19 | load_folder: null
20 | max_ratio: 3
21 | model_param:
22 | boosting_type: gbdt
23 | learning_rate: 0.3
24 | max_depth: 8
25 | n_estimators: 400
26 | n_jobs: 1
27 | num_leaves: 12
28 | safe_buffer_size: 50000
29 | save: true
30 | save_folder: data/pg2/rce/rce_s1
31 | unsafe_buffer_size: 10000
32 | dynamic_config:
33 | activation: relu
34 | batch_size: 256
35 | buffer_size: 500000
36 | data_split: 0.8
37 | hidden_sizes:
38 | - 1024
39 | - 1024
40 | - 1024
41 | learning_rate: 0.001
42 | load: false
43 | load_folder: null
44 | n_ensembles: 4
45 | n_epochs: 70
46 | save: true
47 | save_folder: data/pg2/rce/rce_s1
48 | test_freq: 5
49 | test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 | CEM:
53 | alpha: 0.1
54 | epsilon: 0.01
55 | init_mean: 0
56 | init_var: 1
57 | max_iters: 8
58 | num_elites: 12
59 | popsize: 500
60 | RANDOM:
61 | popsize: 5000
62 | RCE:
63 | alpha: 0.1
64 | epsilon: 0.01
65 | init_mean: 0
66 | init_var: 1
67 | max_iters: 8
68 | minimal_elites: 5
69 | num_elites: 12
70 | popsize: 500
71 | gamma: 0.98
72 | horizon: 8
73 | optimizer: RCE
74 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/build/lib/safety_gym/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/build/lib/safety_gym/random_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import gym
5 | import safety_gym # noqa
6 | import numpy as np # noqa
7 |
8 | def run_random(env_name):
9 | env = gym.make(env_name)
10 | obs = env.reset()
11 | done = False
12 | ep_ret = 0
13 | ep_cost = 0
14 | while True:
15 | if done:
16 | print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost))
17 | ep_ret, ep_cost = 0, 0
18 | obs = env.reset()
19 | assert env.observation_space.contains(obs)
20 | act = env.action_space.sample()
21 | assert env.action_space.contains(act)
22 | obs, reward, done, info = env.step(act)
23 | print(obs['magnetometer'], obs['gyro'])
24 | # print('reward', reward)
25 | ep_ret += reward
26 | ep_cost += info.get('cost', 0)
27 | env.render()
28 |
29 |
30 | if __name__ == '__main__':
31 |
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument('--env', default='Safexp-CarGoal1-v0')
34 | args = parser.parse_args()
35 | run_random(args.env)
36 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/safety_gym.png
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/envs/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs.suite
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/envs/mujoco.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 |
4 | # This file is just to get around a baselines import hack.
5 | # env_type is set based on the final part of the entry_point module name.
6 | # In the regular gym mujoco envs this is 'mujoco'.
7 | # We want baselines to treat these as mujoco envs, so we redirect from here,
8 | # and ensure the registry entries are pointing at this file as well.
9 | from safety_gym.envs.engine import * # noqa
10 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/random_agent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import gym
5 | import safety_gym # noqa
6 | import numpy as np # noqa
7 |
8 | def run_random(env_name):
9 | env = gym.make(env_name)
10 | obs = env.reset()
11 | done = False
12 | ep_ret = 0
13 | ep_cost = 0
14 | while True:
15 | if done:
16 | print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost))
17 | ep_ret, ep_cost = 0, 0
18 | obs = env.reset()
19 | assert env.observation_space.contains(obs)
20 | act = env.action_space.sample()
21 | assert env.action_space.contains(act)
22 | obs, reward, done, info = env.step(act)
23 | print(obs['magnetometer'], obs['gyro'])
24 | # print('reward', reward)
25 | ep_ret += reward
26 | ep_cost += info.get('cost', 0)
27 | env.render()
28 |
29 |
30 | if __name__ == '__main__':
31 |
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument('--env', default='Safexp-CarGoal1-v0')
34 | args = parser.parse_args()
35 | run_random(args.env)
36 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/test/test_envs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import gym
5 | import safety_gym.envs # noqa
6 |
7 |
8 | class TestEnvs(unittest.TestCase):
9 | def check_env(self, env_name):
10 | ''' Run a single environment for a single episode '''
11 | print('running', env_name)
12 | env = gym.make(env_name)
13 | env.reset()
14 | done = False
15 | while not done:
16 | _, _, done, _ = env.step(env.action_space.sample())
17 |
18 | def test_envs(self):
19 | ''' Run all the bench envs '''
20 | for env_spec in gym.envs.registry.all():
21 | if 'Safexp' in env_spec.id:
22 | self.check_env(env_spec.id)
23 |
24 |
25 |
26 | if __name__ == '__main__':
27 | unittest.main()
28 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/test/test_goal.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import unittest
4 | import numpy as np
5 |
6 | from safety_gym.envs.engine import Engine, ResamplingError
7 |
8 |
9 | class TestGoal(unittest.TestCase):
10 | def rollout_env(self, env):
11 | ''' roll an environment until it is done '''
12 | done = False
13 | while not done:
14 | _, _, done, _ = env.step([1,0])
15 |
16 | def test_resample(self):
17 | ''' Episode should end with resampling failure '''
18 | config = {
19 | 'robot_base': 'xmls/point.xml',
20 | 'num_steps': 1001,
21 | 'placements_extents': [-1, -1, 1, 1],
22 | 'goal_size': 1.414,
23 | 'goal_keepout': 1.414,
24 | 'goal_locations': [(1, 1)],
25 | 'robot_keepout': 1.414,
26 | 'robot_locations': [(-1, -1)],
27 | 'robot_rot': np.sin(np.pi / 4),
28 | 'terminate_resample_failure': True,
29 | '_seed': 0,
30 | }
31 | env = Engine(config)
32 | env.reset()
33 | self.assertEqual(env.steps, 0)
34 | # Move the robot towards the goal
35 | self.rollout_env(env)
36 | # Check that the environment terminated early
37 | self.assertLess(env.steps, 1000)
38 |
39 | # Try again with the raise
40 | config['terminate_resample_failure'] = False
41 | env = Engine(config)
42 | env.reset()
43 | # Move the robot towards the goal, which should cause resampling failure
44 | with self.assertRaises(ResamplingError):
45 | self.rollout_env(env)
46 |
47 |
48 | if __name__ == '__main__':
49 | unittest.main()
50 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/xmls/README.md:
--------------------------------------------------------------------------------
1 | # xmls
2 |
3 | These are mujoco XML files which are used as bases for the simulations.
4 |
5 | Some design goals for them:
6 |
7 | - XML should be complete and simulate-able as-is
8 | - Include a floor geom which is a plane
9 | - Include joint sensor for the robot which provide observation
10 | - Include actuators which provide control
11 | - Default positions should all be neutral
12 | - position 0,0,0 should be resting on the floor, not intersecting it
13 | - robot should start at the origin
14 | - Scene should be clear of other objects
15 | - no obstacles or things to manipulate
16 | - only the robot in the scene
17 |
18 | Requirements for the robot
19 | - Position joints should be separate and named `x`, `y`, and `z`
20 | - 0, 0, 0 position should be resting on the floor above the origin at a neutral position
21 | - First 6 sensors should be (in order):
22 | - joint positions for x, y, z (absolute position in the scene)
23 | - joint velocities for x, y, z (absolute velocity in the scene)
24 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup
4 | import sys
5 |
6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
7 | "Safety Gym is designed to work with Python 3.6 and greater. " \
8 | + "Please install it before proceeding."
9 |
10 | setup(
11 | name='safety_gym',
12 | packages=['safety_gym'],
13 | install_requires=[
14 | 'gym~=0.15.3',
15 | 'joblib~=0.14.0',
16 | 'mujoco_py==2.0.2.7',
17 | 'numpy~=1.17.4',
18 | 'xmltodict~=0.12.0',
19 | ],
20 | )
21 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/.gitignore:
--------------------------------------------------------------------------------
1 | *.*~
2 | __pycache__/
3 | *.pkl
4 | data/
5 | **/*.egg-info
6 | .python-version
7 | .idea/
8 | .vscode/
9 | .DS_Store
10 | _build/
11 | data/
12 | .ipynb_checkpoints/
13 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | @Author: Zuxin Liu
3 | @Email: zuxinl@andrew.cmu.edu
4 | @Date: 2020-03-24 10:59:16
5 | @LastEditTime: 2020-05-26 00:19:29
6 | @Description:
7 | '''
8 |
9 | from mbrl.controllers import MPC as MPC
10 | from mbrl.controllers import SafeMPC
11 | from mbrl.models.model import RegressionModel
12 | from mbrl.models.ensemble import RegressionModelEnsemble
13 | from mbrl.models.constraint_model import CostModel
14 |
15 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from .mpc_controller import MPC
2 | from .safe_mpc_controller import SafeMPC
3 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/mbrl/models/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem import CEMOptimizer
2 | from .random import RandomOptimizer
3 | from .rce import RCEOptimizer
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/optimizers/optimizer.py:
--------------------------------------------------------------------------------
1 | '''
2 | @Author: Zuxin Liu
3 | @Email: zuxinl@andrew.cmu.edu
4 | @Date: 2020-03-24 01:02:01
5 | @LastEditTime: 2020-03-24 10:49:27
6 | @Description:
7 | '''
8 |
9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 |
13 |
14 | class Optimizer:
15 | def __init__(self, *args, **kwargs):
16 | pass
17 |
18 | def setup(self, cost_function):
19 | raise NotImplementedError("Must be implemented in subclass.")
20 |
21 | def reset(self):
22 | raise NotImplementedError("Must be implemented in subclass.")
23 |
24 | def obtain_solution(self, *args, **kwargs):
25 | raise NotImplementedError("Must be implemented in subclass.")
26 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg1_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_random.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg1_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_rce.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg2_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_random.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg2_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_rce.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_random.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_rce.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_trpo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpo.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_trpol.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpol.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_random.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_rce.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_trpo_10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpo_10.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_trpol_10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpol_10.gif
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/requirements.txt:
--------------------------------------------------------------------------------
1 | joblib==0.14.1
2 | matplotlib==3.1.3
3 | mpi4py==3.0.3
4 | psutil==5.7.2
5 | PyYAML==5.4
6 | tqdm==4.48.0
7 | seaborn==0.8.1
8 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | @Author: Zuxin Liu
3 | @Email: zuxinl@andrew.cmu.edu
4 | @Date: 2020-05-23 16:02:07
5 | @LastEditTime: 2020-05-23 16:02:44
6 | @Description:
7 | '''
8 |
9 |
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/mpi_pytorch.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import numpy as np
3 | import os
4 | import torch
5 | from mpi4py import MPI
6 | from utils.mpi_tools import broadcast, mpi_avg, num_procs, proc_id
7 |
8 | def setup_pytorch_for_mpi():
9 | """
10 | Avoid slowdowns caused by each separate process's PyTorch using
11 | more than its fair share of CPU resources.
12 | """
13 | #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
14 | if torch.get_num_threads()==1:
15 | return
16 | fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1)
17 | torch.set_num_threads(fair_num_threads)
18 | #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
19 |
20 | def mpi_avg_grads(module):
21 | """ Average contents of gradient buffers across MPI processes. """
22 | if num_procs()==1:
23 | return
24 | for p in module.parameters():
25 | p_grad_numpy = p.grad.numpy() # numpy view of tensor data
26 | avg_p_grad = mpi_avg(p.grad)
27 | p_grad_numpy[:] = avg_p_grad[:]
28 |
29 | def sync_params(module):
30 | """ Sync all parameters of module across all MPI processes. """
31 | if num_procs()==1:
32 | return
33 | for p in module.parameters():
34 | p_numpy = p.data.numpy()
35 | broadcast(p_numpy)
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/run_entrypoint.py:
--------------------------------------------------------------------------------
1 | import zlib
2 | import pickle
3 | import base64
4 |
5 | if __name__ == '__main__':
6 | import argparse
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument('encoded_thunk')
9 | args = parser.parse_args()
10 | thunk = pickle.loads(zlib.decompress(base64.b64decode(args.encoded_thunk)))
11 | thunk()
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | def convert_json(obj):
4 | """ Convert obj to a version which can be serialized with JSON. """
5 | if is_json_serializable(obj):
6 | return obj
7 | else:
8 | if isinstance(obj, dict):
9 | return {convert_json(k): convert_json(v)
10 | for k,v in obj.items()}
11 |
12 | elif isinstance(obj, tuple):
13 | return (convert_json(x) for x in obj)
14 |
15 | elif isinstance(obj, list):
16 | return [convert_json(x) for x in obj]
17 |
18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 | return convert_json(obj.__name__)
20 |
21 | elif hasattr(obj,'__dict__') and obj.__dict__:
22 | obj_dict = {convert_json(k): convert_json(v)
23 | for k,v in obj.__dict__.items()}
24 | return {str(obj): obj_dict}
25 |
26 | return str(obj)
27 |
28 | def is_json_serializable(v):
29 | try:
30 | json.dumps(v)
31 | return True
32 | except:
33 | return False
--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/user_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 |
4 | # Default neural network backend for each algo
5 | # (Must be either 'tf1' or 'pytorch')
6 | DEFAULT_BACKEND = {
7 | 'vpg': 'pytorch',
8 | 'trpo': 'tf1',
9 | 'ppo': 'pytorch',
10 | 'ddpg': 'pytorch',
11 | 'td3': 'pytorch',
12 | 'sac': 'pytorch'
13 | }
14 |
15 | # Where experiment outputs are saved by default:
16 | #DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data')
17 | DEFAULT_DATA_DIR = osp.join(osp.abspath('./'),'data')
18 |
19 | # Whether to automatically insert a date and time stamp into the names of
20 | # save directories:
21 | FORCE_DATESTAMP = False
22 |
23 | # Whether GridSearch provides automatically-generated default shorthands:
24 | DEFAULT_SHORTHAND = True
25 |
26 | # Tells the GridSearch how many seconds to pause for before launching
27 | # experiments.
28 | WAIT_BEFORE_LAUNCH = 5
--------------------------------------------------------------------------------
/Safe-RL/safeRL/.gitignore:
--------------------------------------------------------------------------------
1 | safe_recovery/output
2 | safe_recovery/logging
3 | safe_recovery/saved_models
4 | safe_recovery/old_policies
5 |
6 |
7 |
8 |
9 | *.o
10 | *.pyc
11 |
12 | # Compiled source #
13 | ###################
14 | *.com
15 | *.class
16 | *.dll
17 | *.exe
18 | *.o
19 | *.so
20 |
21 | # Packages #
22 | ############
23 | # it's better to unpack these files and commit the raw source
24 | # git has its own built in compression methods
25 | *.7z
26 | *.dmg
27 | *.gz
28 | *.iso
29 | *.jar
30 | *.rar
31 | *.tar
32 | *.zip
--------------------------------------------------------------------------------
/Safe-RL/safeRL/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "MADRaS"]
2 | path = MADRaS
3 | url = https://github.com/madras-simulator/MADRaS
4 | [submodule "safe-grid-gym"]
5 | path = safe-grid-gym
6 | url = https://github.com/david-lindner/safe-grid-gym
7 | branch = safe_recovery
8 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [2019] [Harshit Sikchi]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/README.md~:
--------------------------------------------------------------------------------
1 | # HCOPE
2 | High-Confidence Off-Policy Evaluation.
3 |
4 |
5 | Python Implementation of HCOPE lower bound evaluation as given in the paper:
6 | Thomas, Philip S., Georgios Theocharous, and Mohammad Ghavamzadeh. "High-Confidence Off-Policy Evaluation." AAAI. 2015.
7 |
8 |
9 | ## Importance Sampling
10 |
11 | Implementation of:
12 | * Simple Importance Sampling
13 | * Per-Decision Importance Sampling
14 | * Normalized Per-Decision Importance Sampling (NPDIS) Estimator
15 | * Weighted Importance Sampling (WIS) Estimator
16 | * Weighted Per-Decision Importance Sampling (WPDIS) Estimator
17 | * Consistent Weighted Per-Decision Importance Sampling (CWPDIS) Estimator
18 |
19 | Comparision of different importance sampling estimators:
20 | 
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/citation.cff:
--------------------------------------------------------------------------------
1 | # YAML 1.2
2 | ---
3 | authors:
4 | -
5 | affiliation: "University of Texas at Austin"
6 | family-names: Sikchi
7 | given-names: Harshit
8 | cff-version: "1.1.0"
9 | license: MIT
10 | message: "If you use this software, please cite it using these metadata."
11 | repository-code: "https://github.com/hari-sikchi/safeRL"
12 | title: safeRL
13 | ...
14 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/importance_sampling/importance_sampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/importance_sampling/importance_sampling.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_+_0.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_+_0.1.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_minus_0.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_minus_0.1.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_random.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_random.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_variance.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/Result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Result.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/Theorem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Theorem.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safe_actions.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions.gif
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safe_actions_instability.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions_instability.gif
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_layer.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_optimization.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_signal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_signal.png
--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/optimizers.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/openai/evolution-strategies-starter.
3 |
4 | from __future__ import absolute_import
5 | from __future__ import division
6 | from __future__ import print_function
7 |
8 | import numpy as np
9 |
10 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES
11 | class Optimizer(object):
12 | def __init__(self, w_policy):
13 | self.w_policy = w_policy.flatten()
14 | self.dim = w_policy.size
15 | self.t = 0
16 |
17 | def update(self, globalg):
18 | self.t += 1
19 | step = self._compute_step(globalg)
20 | ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5)
21 | return self.w_policy + step, ratio
22 |
23 | def _compute_step(self, globalg):
24 | raise NotImplementedError
25 |
26 |
27 | class SGD(Optimizer):
28 | def __init__(self, pi, stepsize):
29 | Optimizer.__init__(self, pi)
30 | self.stepsize = stepsize
31 |
32 | def _compute_step(self, globalg):
33 | step = -self.stepsize * globalg
34 | return step
35 |
36 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/shared_noise.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es
3 |
4 | import ray
5 | import numpy as np
6 |
7 | @ray.remote
8 | def create_shared_noise():
9 | """
10 | Create a large array of noise to be shared by all workers. Used
11 | for avoiding the communication of the random perturbations delta.
12 | """
13 |
14 | seed = 12345
15 | count = 250000000
16 | noise = np.random.RandomState(seed).randn(count).astype(np.float64)
17 | return noise
18 |
19 |
20 | class SharedNoiseTable(object):
21 | def __init__(self, noise, seed = 11):
22 |
23 | self.rg = np.random.RandomState(seed)
24 | self.noise = noise
25 | assert self.noise.dtype == np.float64
26 |
27 | def get(self, i, dim):
28 | return self.noise[i:i + dim]
29 |
30 | def get_mod(self, i, dim,ratio):
31 | return ratio*self.noise[i:i + dim]
32 |
33 |
34 | def sample_index(self, dim):
35 | return self.rg.randint(0, len(self.noise) - dim + 1)
36 |
37 | def get_delta(self, dim):
38 | idx = self.sample_index(dim)
39 | return idx, self.get(idx, dim)
40 |
41 |
42 | def get_delta_mod(self, dim,ratio):
43 | idx = self.sample_index(dim)
44 | return idx, ratio*self.get(idx, dim)
45 |
46 |
--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/utils.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/openai/evolution-strategies-starter.
3 |
4 | import numpy as np
5 | import matplotlib.pyplot as plt
6 |
7 | def itergroups(items, group_size):
8 | assert group_size >= 1
9 | group = []
10 | for x in items:
11 | group.append(x)
12 | if len(group) == group_size:
13 | yield tuple(group)
14 | del group[:]
15 | if group:
16 | yield tuple(group)
17 |
18 |
19 |
20 | def batched_weighted_sum(weights, vecs, batch_size):
21 | total = 0
22 | num_items_summed = 0
23 | for batch_weights, batch_vecs in zip(itergroups(weights, batch_size),
24 | itergroups(vecs, batch_size)):
25 | assert len(batch_weights) == len(batch_vecs) <= batch_size
26 | total += np.dot(np.asarray(batch_weights, dtype=np.float64),
27 | np.asarray(batch_vecs, dtype=np.float64))
28 | num_items_summed += len(batch_weights)
29 | return total, num_items_summed
30 |
31 | def plot_info(param_dict, logdir):
32 | for key, value in param_dict.items():
33 | x = value[0]
34 | y = value[1]
35 | x_name = value[2]
36 | y_name = value[3]
37 | print(x,y)
38 | plt.plot(x, y)
39 | plt.title(key)
40 | plt.xlabel(x_name)
41 | plt.ylabel(y_name)
42 | plt.savefig((logdir + "/plot_" +key + ".png"))
43 | plt.clf()
44 |
45 |
46 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.dockerignore:
--------------------------------------------------------------------------------
1 | examples
2 | htmlcov
3 | .travis.yml
4 | .gitignore
5 | .git
6 | *.pyc
7 | .ipynb_checkpoints
8 | **/__pycache__
9 | safe_learning.egg-info
10 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .idea
3 | .ipynb_checkpoints
4 | htmlcov
5 | .coverage
6 | .cache
7 | safe_learning.egg-info
8 | __pycache__
9 | docs/safe_learning.*
10 | docs/_build
11 | *.swp
12 | *.DS_Store
13 | .pytest_cache
14 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | sudo: required
4 | services:
5 | - docker
6 |
7 | env:
8 | - PYTHON=python2
9 | - PYTHON=python3
10 |
11 | # Setup anaconda
12 | install:
13 | # Disabled since docker pull does not affect cache
14 | # Fixed in Docker 1.13 with --cache-from
15 | # - docker pull befelix/lyapunov-learning-private:${PYTHON} || true
16 | - docker build -f Dockerfile.${PYTHON} -t test-image .
17 | - docker ps -a
18 |
19 | # Run tests
20 | script:
21 | - docker run test-image scripts/test_code.sh
22 |
23 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.dev:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3
2 |
3 | # Install build essentials and clean up
4 | RUN apt-get update --quiet \
5 | && apt-get install -y --no-install-recommends --quiet build-essential \
6 | && apt-get clean \
7 | && rm -rf /var/lib/apt/lists/*
8 |
9 | # Update conda, install packages, and clean up
10 | RUN conda update conda --yes --quiet \
11 | && conda install python=3.5 pip numpy scipy pandas --yes --quiet \
12 | && conda clean --yes --all \
13 | && hash -r
14 |
15 | # Get the requirements files (seperate from the main body)
16 | COPY requirements.txt requirements_dev.txt /reqs/
17 |
18 | # Install requirements and clean up
19 | RUN pip --no-cache-dir install -r /reqs/requirements.txt \
20 | && pip --no-cache-dir install -r /reqs/requirements_dev.txt \
21 | && pip install jupyter jupyterlab dumb-init \
22 | && rm -rf /root/.cache \
23 | && rm -rf /reqs
24 |
25 | # Manually install GPflow and clean up
26 | RUN git clone --depth=1 --branch=0.4.0 https://github.com/GPflow/GPflow.git \
27 | && cd GPflow \
28 | && python setup.py install \
29 | && rm -rf /GPflow
30 |
31 | # Output scrubber for jupyter
32 | ADD scripts/jupyter_output.py /
33 |
34 | RUN jupyter notebook --generate-config \
35 | && cat /jupyter_output.py >> /root/.jupyter/jupyter_notebook_config.py \
36 | && rm /jupyter_output.py
37 |
38 | WORKDIR /code
39 |
40 | # Make sure Ctrl+C commands can be forwarded
41 | ENTRYPOINT ["dumb-init", "--"]
42 |
43 | CMD python setup.py develop \
44 | && jupyter lab --ip="0.0.0.0" --no-browser --allow-root
45 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.python2:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda:4.5.11
2 |
3 | # Install build essentials and clean up
4 | RUN apt-get update --quiet \
5 | && apt-get install -y --no-install-recommends --quiet build-essential \
6 | && apt-get clean \
7 | && rm -rf /var/lib/apt/lists/*
8 |
9 | # Update conda, install packages, and clean up
10 | RUN conda install python=2.7 --yes --quiet \
11 | && conda clean --yes --all \
12 | && hash -r
13 |
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 | && pip install pip==18.1 \
18 | && pip install numpy==1.14.5 \
19 | && pip install -e .[test] --process-dependency-links \
20 | && rm -rf /root/.cache
21 |
22 | WORKDIR /code
23 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.python3:
--------------------------------------------------------------------------------
1 | FROM continuumio/miniconda3:4.5.11
2 |
3 | # Install build essentials and clean up
4 | RUN apt-get update --quiet \
5 | && apt-get install -y --no-install-recommends --quiet build-essential \
6 | && apt-get clean \
7 | && rm -rf /var/lib/apt/lists/*
8 |
9 | # Update conda, install packages, and clean up
10 | RUN conda install python=3.5 --yes --quiet \
11 | # && conda clean --yes --all \
12 | && hash -r
13 |
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 | && pip install pip==18.1 \
18 | && pip install numpy==1.14.5 \
19 | && pip install -e .[test] --process-dependency-links \
20 | && rm -rf /root/.cache
21 |
22 | WORKDIR /code
23 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Felix Berkenkamp
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: help
2 |
3 | help:
4 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
5 |
6 | doc: ## Build documentation (docs/_build/html/index.html)
7 | cd docs && $(MAKE) html
8 |
9 | coverage: ## Construct coverage (htmlcov/index.html)
10 | coverage html
11 |
12 | test-local: ## Test the local installation of the code
13 | ./scripts/test_code.sh
14 |
15 | test: docker ## Test the docker images
16 | docker run safe_learning_py2 make test-local
17 | docker run safe_learning_py3 make test-local
18 |
19 | dev: ## Mount current code as volume and run jupyterlab for development
20 | docker build -f Dockerfile.dev -t safe_learning_dev .
21 | docker run -p 8888:8888 -v $(shell pwd):/code safe_learning_dev
22 |
23 | docker: ## Build the docker images
24 | docker build -f Dockerfile.python2 -t safe_learning_py2 .
25 | docker build -f Dockerfile.python3 -t safe_learning_py3 .
26 |
27 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = SafeLearning
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/_templates/template.rst:
--------------------------------------------------------------------------------
1 | {{ name }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 | .. auto{{ objtype }}:: {{ objname }} {% if objtype == "class" %}
6 | :members:
7 | :inherited-members:
8 | {% endif %}
9 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | *****************
3 |
4 | .. automodule:: safe_learning
5 |
6 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to the Safe Learning documentation!
2 | ===========================================
3 |
4 | .. include:: introduction.rst
5 |
6 | .. toctree::
7 | :caption: Contents
8 | :maxdepth: 3
9 |
10 | api
11 |
12 | Indices and tables
13 | ==================
14 |
15 | * :ref:`genindex`
16 | * :ref:`modindex`
17 | * :ref:`search`
18 |
19 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/introduction.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 |
4 | TODO
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=SafeLearning
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | numpydoc >= 0.6
3 | sphinx_rtd_theme >= 0.1.8
4 | mock
5 |
6 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/examples/README.rst:
--------------------------------------------------------------------------------
1 | Example notebooks for the library
2 | =================================
3 |
4 | Introductions
5 | -------------
6 | - `1d_region_of_attraction_estimate.ipynb <./1d_region_of_attraction_estimate.ipynb>`_ shows how to estimate and learn the region of attraction for a fixed policy.
7 | - `basic_dynamic_programming.ipynb <./basic_dynamic_programming.ipynb>`_ does basic dynamic programming with piecewise linear function approximators for the mountain car example.
8 | - `reinforcement_learning_pendulum.ipynb <./reinforcement_learning_pendulum.ipynb>`_ does approximate policy iteration in an actor-critic framework with neural networks for the inverted pendulum.
9 | - `reinforcement_learning_cartpole.ipynb <./reinforcement_learning_cartpole.ipynb>`_ does the same as above for the cart-pole (i.e., the inverted pendulum on a cart).
10 |
11 | Experiments
12 | -----------
13 | - `1d_example.ipynb <./1d_example.ipynb>`_ contains a 1D example including plots of the sets.
14 | - `inverted_pendulum.ipynb <./inverted_pendulum.ipynb>`_ contains a full neural network example with an inverted pendulum.
15 | - `adaptive_safety_verification.ipynb <./adaptive_safety_verification.ipynb>`_ investigates the benefits of an adaptive discretization in identifying safe sets for the inverted pendulum.
16 | - `lyapunov_function_learning.ipynb <./lyapunov_function_learning.ipynb>`_ demonstrates how a parameterized Lyapunov candidate for the inverted pendulum can be trained with the machine learning approach in [1]_.
17 |
18 | .. [1] S. M. Richards, F. Berkenkamp, A. Krause,
19 | `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems `_. Conference on Robot Learning (CoRL), 2018.
20 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.0,<1.15
2 | scipy>=1.0.0<=1.2.1
3 | gpflow==0.4.0
4 | matplotlib<=4.0.0
5 | scs==2.0.2
6 | cvxpy>=1,<=1.0.15
7 | tensorflow>=1.6.0,<=1.12.0
8 | future<=0.18.0
9 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | mock
2 | flake8>=3.0,<=3.5.0
3 | pytest==4.6.9
4 | pytest-cov==2.8.1
5 | pydocstyle>=2.0,<2.1
6 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/safe_learning/configuration.py:
--------------------------------------------------------------------------------
1 | """General configuration class for dtypes."""
2 |
3 | from __future__ import absolute_import, print_function, division
4 |
5 | import tensorflow as tf
6 |
7 |
8 | class Configuration(object):
9 | """Configuration class."""
10 |
11 | def __init__(self):
12 | """Initialization."""
13 | super(Configuration, self).__init__()
14 |
15 | # Dtype for computations
16 | self.dtype = tf.float64
17 |
18 | # Batch size for stability verification
19 | self.gp_batch_size = 10000
20 |
21 | @property
22 | def np_dtype(self):
23 | """Return the numpy dtype."""
24 | return self.dtype.as_numpy_dtype
25 |
26 | def __repr__(self):
27 | """Print the parameters."""
28 | params = ['Configuration parameters:', '']
29 | for param, value in self.__dict__.items():
30 | params.append('{}: {}'.format(param, value.__repr__()))
31 |
32 | return '\n'.join(params)
33 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/scripts/jupyter_output.py:
--------------------------------------------------------------------------------
1 | def scrub_output_pre_save(model, **kwargs):
2 | """scrub output before saving notebooks"""
3 | # only run on notebooks
4 | if model['type'] != 'notebook':
5 | return
6 | # only run on nbformat v4
7 | if model['content']['nbformat'] != 4:
8 | return
9 |
10 | for cell in model['content']['cells']:
11 | if cell['cell_type'] != 'code':
12 | continue
13 | cell['outputs'] = []
14 | cell['execution_count'] = None
15 |
16 | c.FileContentsManager.pre_save_hook = scrub_output_pre_save
17 |
--------------------------------------------------------------------------------
/Safe-RL/safe_learning/scripts/test_code.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | module="safe_learning"
4 |
5 | get_script_dir () {
6 | SOURCE="${BASH_SOURCE[0]}"
7 | # While $SOURCE is a symlink, resolve it
8 | while [ -h "$SOURCE" ]; do
9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 | SOURCE="$( readlink "$SOURCE" )"
11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 | done
14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 | echo "$DIR"
16 | }
17 |
18 | # Change to script root
19 | cd $(get_script_dir)/..
20 | GREEN='\033[0;32m'
21 | NC='\033[0m'
22 |
23 | # Run style tests
24 | echo -e "${GREEN}Running style tests.${NC}"
25 | flake8 $module --exclude test*.py,__init__.py --ignore=E402,E731,W503 --show-source || { exit 1; }
26 |
27 | # Ignore import errors for __init__ and tests
28 | flake8 $module --filename=__init__.py,test*.py --ignore=F,E402,W503 --show-source || { exit 1; }
29 |
30 | echo -e "${GREEN}Testing docstring conventions.${NC}"
31 | # Test docstring conventions
32 | pydocstyle $module --convention=numpy || { exit 1; }
33 |
34 | # Run unit tests
35 | echo -e "${GREEN}Running unit tests.${NC}"
36 | pytest --doctest-modules --cov --cov-fail-under=80 $module || { exit 1; }
37 |
38 |
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
3 | __pycache__/
4 | .vscode/
5 | result/
6 | old/
7 |
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/GPSG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/GPSG.png
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Akifumi Wachi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/simple_make_rand_settings.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, absolute_import
2 |
3 | import GPy
4 | import numpy as np
5 | import arguments
6 |
7 | from safemdp.grid_world import (draw_gp_sample, compute_S_hat0)
8 |
9 |
10 | args = arguments.safemdp_argparse()
11 |
12 | # Define world
13 | world_shape = args.world_shape
14 | step_size = args.step_size
15 |
16 | # Define GP for safety
17 | noise_safety = args.noise_safety
18 | safety_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.),
19 | variance=1., ARD=True)
20 | safety_lik = GPy.likelihoods.Gaussian(variance=noise_safety ** 2)
21 | safety_lik.constrain_bounded(1e-6, 10000.)
22 |
23 | # Define GP for reward
24 | noise_reward = args.noise_reward
25 | reward_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.),
26 | variance=1., ARD=True)
27 | reward_lik = GPy.likelihoods.Gaussian(variance=noise_reward ** 2)
28 | reward_lik.constrain_bounded(1e-6, 10000.)
29 |
30 | # Safety and Reward functions
31 | safety, _ = draw_gp_sample(safety_kernel, world_shape, step_size)
32 | reward, _ = draw_gp_sample(reward_kernel, world_shape, step_size)
33 |
34 | # Set the minimum value for reward as zero
35 | reward -= min(reward)
36 |
37 | # Safety threhsold, Lipschitz constant, scaling factors for confidence interval
38 | h = args.h
39 |
40 | # Initialize safe sets
41 | S0 = np.zeros((np.prod(world_shape), 5), dtype=bool)
42 | S0[:, 0] = True
43 | S_hat0 = compute_S_hat0(np.nan, world_shape, 4, safety, step_size, h)
44 | start_pos = np.random.choice(np.where(S_hat0)[0])
45 |
46 | # Save the problem settings as a npz file
47 | np.savez('data/simple/random_settings_new', safety=safety, reward=reward,
48 | start_pos=start_pos)
49 |
--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/utils/reward_utilities.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | __all__ = ['RewardObj']
5 |
6 |
7 | class RewardObj(object):
8 | """Reward Object in MDPs.
9 |
10 | Parameters
11 | ----------
12 | gp_r: GPy.core.GPRegression
13 | A Gaussian process model that can be used to determine the reward.
14 | beta_r: float
15 | The confidence interval used by the GP model.
16 | """
17 | def __init__(self, gp_r, beta_r):
18 | super(RewardObj, self).__init__()
19 |
20 | # Scalar for gp confidence intervals
21 | self.beta = beta_r
22 | # GP model
23 | self.gp = gp_r
24 |
25 | def add_gp_observations(self, x_new, y_new):
26 | """Add observations to the gp."""
27 | # Update GP with observations
28 | self.gp.set_XY(np.vstack((self.gp.X, x_new)),
29 | np.vstack((self.gp.Y, y_new)))
30 |
--------------------------------------------------------------------------------
/Safe-RL/safe_rl_papers/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Chi Zhang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/__init__.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.util import deprecation as deprecation
2 | deprecation._PRINT_DEPRECATION_WARNINGS = False
3 |
4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo
5 | from safe_rl.sac.sac import sac
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/pg/trust_region.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from safe_rl.pg.utils import EPS
4 |
5 |
6 | """
7 | Tensorflow utilities for trust region optimization
8 | """
9 |
10 | def flat_concat(xs):
11 | return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0)
12 |
13 | def flat_grad(f, params):
14 | return flat_concat(tf.gradients(xs=params, ys=f))
15 |
16 | def hessian_vector_product(f, params):
17 | # for H = grad**2 f, compute Hx
18 | g = flat_grad(f, params)
19 | x = tf.placeholder(tf.float32, shape=g.shape)
20 | return x, flat_grad(tf.reduce_sum(g*x), params)
21 |
22 | def assign_params_from_flat(x, params):
23 | flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars
24 | splits = tf.split(x, [flat_size(p) for p in params])
25 | new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)]
26 | return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)])
27 |
28 |
29 | """
30 | Conjugate gradient
31 | """
32 |
33 | def cg(Ax, b, cg_iters=10):
34 | x = np.zeros_like(b)
35 | r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start.
36 | p = r.copy()
37 | r_dot_old = np.dot(r,r)
38 | for _ in range(cg_iters):
39 | z = Ax(p)
40 | alpha = r_dot_old / (np.dot(p, z) + EPS)
41 | x += alpha * p
42 | r -= alpha * z
43 | r_dot_new = np.dot(r,r)
44 | p = r + (r_dot_new / r_dot_old) * p
45 | r_dot_old = r_dot_new
46 | return x
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/pg/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.signal
3 |
4 | EPS = 1e-8
5 |
6 | def combined_shape(length, shape=None):
7 | if shape is None:
8 | return (length,)
9 | return (length, shape) if np.isscalar(shape) else (length, *shape)
10 |
11 | def keys_as_sorted_list(dict):
12 | return sorted(list(dict.keys()))
13 |
14 | def values_as_sorted_list(dict):
15 | return [dict[k] for k in keys_as_sorted_list(dict)]
16 |
17 | def discount_cumsum(x, discount):
18 | """
19 | magic from rllab for computing discounted cumulative sums of vectors.
20 |
21 | input:
22 | vector x,
23 | [x0,
24 | x1,
25 | x2]
26 |
27 | output:
28 | [x0 + discount * x1 + discount^2 * x2,
29 | x1 + discount * x2,
30 | x2]
31 | """
32 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
33 |
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/load_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import joblib
4 | import os
5 | import os.path as osp
6 | import tensorflow as tf
7 | from safe_rl.utils.logx import restore_tf_graph
8 |
9 | def load_policy(fpath, itr='last', deterministic=False):
10 |
11 | # handle which epoch to load from
12 | if itr=='last':
13 | saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11]
14 | itr = '%d'%max(saves) if len(saves) > 0 else ''
15 | else:
16 | itr = '%d'%itr
17 |
18 | # load the things!
19 | sess = tf.Session(graph=tf.Graph())
20 | model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr))
21 |
22 | # get the correct op for executing actions
23 | if deterministic and 'mu' in model.keys():
24 | # 'deterministic' is only a valid option for SAC policies
25 | print('Using deterministic action op.')
26 | action_op = model['mu']
27 | else:
28 | print('Using default action op.')
29 | action_op = model['pi']
30 |
31 | # make function for producing an action given a single state
32 | get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0]
33 |
34 | # try to load environment from save
35 | # (sometimes this will fail because the environment could not be pickled)
36 | try:
37 | state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl'))
38 | env = state['env']
39 | except:
40 | env = None
41 |
42 | return env, get_action, sess
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/readme.md:
--------------------------------------------------------------------------------
1 | # Utils
2 |
3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo).
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/run_utils.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os.path as osp
3 |
4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data')
5 |
6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True):
7 |
8 | # Make base path
9 | ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ''
10 | relpath = ''.join([ymd_time, exp_name])
11 |
12 | if seed is not None:
13 | # Make a seed-specific subfolder in the experiment directory.
14 | if datestamp:
15 | hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
16 | subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)])
17 | else:
18 | subfolder = ''.join([exp_name, '_s', str(seed)])
19 | relpath = osp.join(relpath, subfolder)
20 |
21 | data_dir = data_dir or DEFAULT_DATA_DIR
22 | logger_kwargs = dict(output_dir=osp.join(data_dir, relpath),
23 | exp_name=exp_name)
24 | return logger_kwargs
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | def convert_json(obj):
4 | """ Convert obj to a version which can be serialized with JSON. """
5 | if is_json_serializable(obj):
6 | return obj
7 | else:
8 | if isinstance(obj, dict):
9 | return {convert_json(k): convert_json(v)
10 | for k,v in obj.items()}
11 |
12 | elif isinstance(obj, tuple):
13 | return (convert_json(x) for x in obj)
14 |
15 | elif isinstance(obj, list):
16 | return [convert_json(x) for x in obj]
17 |
18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 | return convert_json(obj.__name__)
20 |
21 | elif hasattr(obj,'__dict__') and obj.__dict__:
22 | obj_dict = {convert_json(k): convert_json(v)
23 | for k,v in obj.__dict__.items()}
24 | return {str(obj): obj_dict}
25 |
26 | return str(obj)
27 |
28 | def is_json_serializable(v):
29 | try:
30 | json.dumps(v)
31 | return True
32 | except:
33 | return False
--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup
4 | import sys
5 |
6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
7 | "Safety Starter Agents is designed to work with Python 3.6 and greater. " \
8 | + "Please install it before proceeding."
9 |
10 | setup(
11 | name='safe_rl',
12 | packages=['safe_rl'],
13 | install_requires=[
14 | 'gym~=0.15.3',
15 | 'joblib==0.14.0',
16 | 'matplotlib==3.1.1',
17 | 'mpi4py==3.0.2',
18 | 'mujoco_py==2.0.2.7',
19 | 'numpy~=1.17.4',
20 | 'seaborn==0.8.1',
21 | 'tensorflow==1.13.1',
22 | ],
23 | )
24 |
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 |
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/README.md:
--------------------------------------------------------------------------------
1 | # vertex-net
2 | This repository contains source code of the paper:
3 |
4 | Liyuan Zheng, Yuanyuan Shi, Lillian J. Ratliff, and Baosen Zhang, "Safe Reinforcement Learning of Control-Affine Systems with Vertex Networks",
5 | [[ArXiv]](https://arxiv.org/abs/2003.09488)
6 |
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/algos/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/envs/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/nets/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/policy_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | use_cuda = torch.cuda.is_available()
6 | device = torch.device("cuda" if use_cuda else "cpu")
7 |
8 |
9 | class PolicyNetwork(nn.Module):
10 | def __init__(self, env, obs_dim, action_dim, hidden_dim, init_w=3e-3):
11 | super(PolicyNetwork, self).__init__()
12 |
13 | self.env = env
14 |
15 | self.linear1 = nn.Linear(obs_dim, hidden_dim)
16 | self.linear2 = nn.Linear(hidden_dim, hidden_dim)
17 | self.linear3 = nn.Linear(hidden_dim, action_dim)
18 |
19 | self.linear3.weight.data.uniform_(-init_w, init_w)
20 | self.linear3.bias.data.uniform_(-init_w, init_w)
21 |
22 | def forward(self, state):
23 | x = F.relu(self.linear1(state))
24 | x = F.relu(self.linear2(x))
25 | x = self.env.max_action * torch.tanh(self.linear3(x))
26 | return x
27 |
28 | def get_action(self, state):
29 | state = torch.FloatTensor(state).unsqueeze(0).to(device)
30 | action = self.forward(state)
31 | return action.detach().cpu().numpy()[0]
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/value_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class ValueNetwork(nn.Module):
7 | def __init__(self, obs_dim, action_dim, hidden_dim, init_w=3e-3):
8 | super(ValueNetwork, self).__init__()
9 |
10 | self.linear1 = nn.Linear(obs_dim + action_dim, hidden_dim)
11 | self.linear2 = nn.Linear(hidden_dim, hidden_dim)
12 | self.linear3 = nn.Linear(hidden_dim, 1)
13 |
14 | self.linear3.weight.data.uniform_(-init_w, init_w)
15 | self.linear3.bias.data.uniform_(-init_w, init_w)
16 |
17 | def forward(self, state, action):
18 | x = torch.cat((state, action), dim=1)
19 | x = F.relu(self.linear1(x))
20 | x = F.relu(self.linear2(x))
21 | x = self.linear3(x)
22 | return x
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/vertex_policy_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | use_cuda = torch.cuda.is_available()
6 | device = torch.device("cuda" if use_cuda else "cpu")
7 |
8 |
9 | class VertexPolicyNetwork(nn.Module):
10 | def __init__(self, env, obs_dim, num_vertex, hidden_dim, init_w=3e-3):
11 | super(VertexPolicyNetwork, self).__init__()
12 |
13 | self.env = env
14 |
15 | self.linear1 = nn.Linear(obs_dim, hidden_dim)
16 | self.linear2 = nn.Linear(hidden_dim, hidden_dim)
17 | self.linear3 = nn.Linear(hidden_dim, num_vertex)
18 |
19 | self.linear3.weight.data.uniform_(-init_w, init_w)
20 | self.linear3.bias.data.uniform_(-init_w, init_w)
21 |
22 | def forward(self, state):
23 | x = F.relu(self.linear1(state))
24 | x = F.relu(self.linear2(x))
25 | x = F.softmax(self.linear3(x), dim=1)
26 | action_vertex = self.env.get_action_vertex(state.numpy())
27 | action_vertex = torch.FloatTensor(action_vertex).to(device)
28 | x = torch.bmm(x.unsqueeze(1), action_vertex).squeeze(1)
29 | # x = torch.sum(x * action_vertex, dim=1).unsqueeze(1)
30 | return x
31 |
32 | def get_action(self, state):
33 | state = torch.FloatTensor(state).unsqueeze(0).to(device)
34 | action = self.forward(state)
35 | return action.detach().cpu().numpy()[0]
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/utils/__init__.py
--------------------------------------------------------------------------------
/Safe-RL/vertex-net/utils/replay_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 |
5 | class ReplayBuffer:
6 | def __init__(self, capacity):
7 | self.capacity = capacity
8 | self.buffer = []
9 | self.position = 0
10 |
11 | def push(self, state, action, reward, next_state, done):
12 | if len(self.buffer) < self.capacity:
13 | self.buffer.append(None)
14 | self.buffer[self.position] = (state, action, reward, next_state, done)
15 | self.position = (self.position + 1) % self.capacity
16 |
17 | def sample(self, batch_size):
18 | batch = random.sample(self.buffer, batch_size)
19 | state, action, reward, next_state, done = map(np.stack, zip(*batch))
20 | return state, action, reward, next_state, done
21 |
22 | def __len__(self):
23 | return len(self.buffer)
--------------------------------------------------------------------------------