├── README.md └── Safe-RL ├── AlwaysSafe ├── .gitignore ├── LICENSE ├── Pipfile ├── README.md ├── agents │ ├── __init__.py │ ├── abs_opt_cmdp.py │ └── opt_cmdp.py ├── planners │ ├── __init__.py │ ├── abs_lp_optimistic.py │ ├── lp.py │ └── lp_optimistic.py ├── scripts │ ├── .gitignore │ ├── __init__.py │ ├── cliff_walking.py │ ├── factored.py │ └── simple.py ├── tests │ ├── __init__.py │ ├── test_abs_opt_cmdp.py │ ├── test_lp_agent.py │ ├── test_lp_optimistic.py │ ├── test_lp_optimistic_abs.py │ ├── test_opt_cmdp.py │ └── test_training.py └── util │ ├── __init__.py │ ├── grb.py │ ├── mdp.py │ └── training.py ├── AutomotiveSafeRL ├── .gitignore ├── Project.toml ├── README.md ├── RNNFiltering │ ├── RNNFiltering.jl │ ├── bagging_training.jl │ ├── data_generation.jl │ ├── datagen.sh │ ├── generate_data.sh │ ├── generate_dataset.jl │ ├── load_model_weights.jl │ ├── model_loading.jl │ ├── scp_model.sh │ ├── train.sh │ ├── train_single.sh │ ├── train_tracking.jl │ └── visualize_prediction.ipynb ├── evaluation │ ├── evaluation.jl │ ├── evaluation_functions.jl │ ├── evaluation_script.sh │ ├── helpers.jl │ └── parallel_evaluation.jl ├── notebooks │ ├── baseline.ipynb │ ├── baseline_policy.ipynb │ ├── car_mdp.ipynb │ ├── crosswalk.ipynb │ ├── decomposition.ipynb │ ├── decomposition2.ipynb │ ├── evaluation_scenarios.ipynb │ ├── graphs.ipynb │ ├── interactive_evaluation.ipynb │ ├── joint_mask.ipynb │ ├── joint_problem.ipynb │ ├── ped_mdp.ipynb │ ├── pedcar_mdp.ipynb │ ├── plot_results.ipynb │ ├── plots.ipynb │ ├── profiling.ipynb │ ├── qmdp_approximation.ipynb │ ├── test.ipynb │ └── tracking.ipynb ├── old_scripts │ ├── accepting_states.jl │ ├── baseline_script.jl │ ├── carmdp_product.jl │ ├── carmdp_script.jl │ ├── carmdp_vi_until.jl │ ├── evaluation_script.jl │ ├── fast_pedcar_vi.jl │ ├── joint_eval.jl │ ├── jointmdp_script.jl │ ├── pedcar_local_vi.jl │ ├── pedcar_script.jl │ ├── pedcar_sync.jl │ ├── pedcar_vi.jl │ ├── pedcar_vi_benchmark.jl │ ├── pedcar_vi_eval.jl │ ├── pedmdp_local_vi.jl │ ├── pedmdp_script.jl │ └── pedmdp_vi_until.jl ├── src │ ├── baseline_policy.jl │ ├── decomposed_tracking.jl │ ├── decomposition.jl │ ├── masked_dqn.jl │ ├── masking.jl │ ├── qmdp_approximation.jl │ ├── render_helpers.jl │ └── util.jl ├── test │ ├── runtests.jl │ ├── test_car_mdp.jl │ ├── test_discretization.jl │ ├── test_interpolation.jl │ └── test_pedestrian_mdp.jl └── training_scripts │ ├── carmdp_training.jl │ ├── dqn_jointeval.jl │ ├── jointmdp_training.jl │ ├── pedcar_dqn.jl │ ├── pedcar_eval.jl │ ├── pedcar_training.jl │ ├── pedcar_vi.jl │ ├── pedmdp_training.jl │ ├── process_utility.jl │ ├── sparse_vi.jl │ ├── training.sh │ ├── training.tex │ └── until_dqn.jl ├── Constraint_RL_MPC ├── .idea │ ├── Constraint_RL_MPC.iml │ ├── misc.xml │ ├── modules.xml │ ├── vcs.xml │ └── workspace.xml ├── Abgabe │ ├── Buffer │ │ ├── ReplayBuffer.py │ │ └── __pycache__ │ │ │ └── ReplayBuffer.cpython-35.pyc │ ├── Disturbances │ │ ├── external_disturbances_old.mat │ │ ├── external_disturbances_randn.mat │ │ └── external_disturbances_uniform.mat │ ├── Model │ │ ├── Linear_Env.py │ │ └── __pycache__ │ │ │ └── Linear_Env.cpython-35.pyc │ ├── Neural_Network │ │ ├── Actor_Model.py │ │ ├── Critic_Model.py │ │ ├── NeuralNetwork.py │ │ └── __pycache__ │ │ │ ├── Actor_Model.cpython-35.pyc │ │ │ ├── Critic_Model.cpython-35.pyc │ │ │ └── NeuralNetwork.cpython-35.pyc │ ├── Normalize │ │ ├── MinMax.py │ │ └── __pycache__ │ │ │ └── MinMax.cpython-35.pyc │ ├── Pre_training │ │ ├── Immediate_constraint_functions.py │ │ ├── Test_immediate_constraint_functions.py │ │ ├── __pycache__ │ │ │ └── constraints.cpython-35.pyc │ │ ├── constraints.py │ │ ├── constraints_test_E_low_weights.h5f │ │ ├── constraints_test_E_up_weights.h5f │ │ ├── constraints_test_T_low_weights.h5f │ │ ├── constraints_test_T_up_weights.h5f │ │ └── readme.txt │ ├── Training_MPC │ │ ├── MPC.py │ │ ├── Main_MPC.py │ │ ├── Main_System_Identification.py │ │ ├── SI_MPC_weights.h5f │ │ ├── SI_MinMax.npy │ │ ├── __pycache__ │ │ │ └── MPC.cpython-35.pyc │ │ └── readme.txt │ └── Training_RL │ │ ├── DDPG.py │ │ ├── Main_RL.py │ │ ├── __pycache__ │ │ └── DDPG.cpython-35.pyc │ │ ├── ddpg_Test1_5_weights_actor.h5f │ │ ├── ddpg_Test1_5_weights_critic.h5f │ │ ├── ddpg_Test2_5_weights_actor.h5f │ │ ├── ddpg_Test2_5_weights_critic.h5f │ │ ├── ddpg_Test3_5_weights_actor.h5f │ │ ├── ddpg_Test3_5_weights_critic.h5f │ │ └── readme.txt └── README.md ├── LeaveNoTrace ├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── coach_util.py ├── demo.py ├── env_util.py ├── envs │ ├── __init__.py │ ├── assets │ │ ├── cliff_cheetah.xml │ │ ├── cliff_walker.xml │ │ ├── peg_insertion.xml │ │ └── pusher.xml │ ├── cliff_envs.py │ ├── frozen_lake.py │ ├── hopper.py │ ├── peg_insertion.py │ └── pusher.py ├── lnt.py └── plot.png ├── PCPO └── iclr_2020_code_submission.zip ├── RL-Safety-Algorithms ├── LICENSE ├── README.md ├── experiments │ ├── benchmark_circle_tasks.py │ ├── benchmark_gather_tasks.py │ ├── benchmark_reach_tasks.py │ ├── benchmark_run_tasks.py │ └── safety_settings.py ├── rl_safety_algorithms │ ├── __init__.py │ ├── algs │ │ ├── __init__.py │ │ ├── core.py │ │ ├── cpo │ │ │ ├── __init__.py │ │ │ ├── cpo.py │ │ │ └── defaults.py │ │ ├── iwpg │ │ │ ├── __init__.py │ │ │ ├── defaults.py │ │ │ └── iwpg.py │ │ ├── lag-trpo │ │ │ ├── __init__.py │ │ │ ├── defaults.py │ │ │ └── lag-trpo.py │ │ ├── npg │ │ │ ├── __init__.py │ │ │ ├── defaults.py │ │ │ └── npg.py │ │ ├── pdo │ │ │ ├── __init__.py │ │ │ ├── defaults.py │ │ │ └── pdo.py │ │ ├── trpo │ │ │ ├── __init__.py │ │ │ ├── defaults.py │ │ │ └── trpo.py │ │ ├── utils.py │ │ └── vtrace.py │ ├── benchmark.py │ ├── common │ │ ├── __init__.py │ │ ├── experiment_analysis.py │ │ ├── loggers.py │ │ ├── model.py │ │ ├── mpi_tools.py │ │ ├── multi_processing_utils.py │ │ ├── online_mean_std.py │ │ ├── trainer.py │ │ └── utils.py │ ├── play.py │ └── train.py ├── setup.py └── tests │ ├── test_algs_mpi.py │ ├── test_algs_single_thread.py │ ├── test_gae.py │ ├── test_mean_std.py │ ├── test_mean_std_mpi.py │ └── test_trust_region_utils.py ├── Safe-MBPO ├── .gitignore ├── LICENSE ├── README.md ├── config │ ├── ant.json │ ├── cheetah-no-flip.json │ ├── hopper.json │ └── humanoid.json ├── main.py ├── requirements.txt └── src │ ├── __init__.py │ ├── checkpoint.py │ ├── cli.py │ ├── config.py │ ├── defaults.py │ ├── dynamics.py │ ├── log.py │ ├── normalization.py │ ├── policy.py │ ├── sampling.py │ ├── shared.py │ ├── smbpo.py │ ├── squashed_gaussian.py │ ├── ssac.py │ ├── torch_util.py │ ├── train.py │ └── util.py ├── Safe-RL-Benchmark ├── .dockerignore ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.rst ├── SafeRLBench │ ├── __init__.py │ ├── algo │ │ ├── README.rst │ │ ├── __init__.py │ │ ├── a3c.py │ │ ├── policygradient.py │ │ ├── q_learning.py │ │ ├── safeopt.py │ │ └── test.py │ ├── base.py │ ├── bench.py │ ├── configuration.py │ ├── envs │ │ ├── README.rst │ │ ├── __init__.py │ │ ├── _quadrocopter │ │ │ ├── __init__.py │ │ │ ├── quadrocopter_classes.py │ │ │ ├── quadrotor_dynamics.py │ │ │ ├── quaternions.py │ │ │ └── transformations.py │ │ ├── general_mountaincar.py │ │ ├── gym_wrap.py │ │ ├── linear_car.py │ │ ├── mdp.py │ │ ├── quadrocopter.py │ │ └── test.py │ ├── error.py │ ├── measure.py │ ├── monitor.py │ ├── policy │ │ ├── __init__.py │ │ ├── controller.py │ │ ├── linear_policy.py │ │ ├── neural_network.py │ │ └── test.py │ ├── spaces │ │ ├── __init__.py │ │ ├── bounded_space.py │ │ ├── discrete_space.py │ │ ├── rd_space.py │ │ └── test.py │ └── test │ │ ├── test_bench.py │ │ ├── test_configuration.py │ │ ├── test_integration.py │ │ └── test_measure.py ├── docs │ ├── Makefile │ ├── algorithm.rst │ ├── api │ │ ├── algo.rst │ │ ├── bench.rst │ │ ├── envs.rst │ │ ├── measure.rst │ │ ├── misc.rst │ │ ├── policy.rst │ │ ├── spaces.rst │ │ └── srb.rst │ ├── conf.py │ ├── environment.rst │ ├── index.rst │ └── toc.rst ├── examples │ ├── GettingStarted.ipynb │ └── SafeOpt.ipynb ├── misc │ ├── Dockerfile.python2 │ └── Dockerfile.python3 ├── requirements.txt ├── requirements_dev.txt ├── setup.py ├── test_code.sh └── tox.ini ├── Safe-Reinforcement-Learning └── README.md ├── Safe_reinforcement_learning ├── README.md ├── Safe_RL_LQR_experiment.m ├── iterate_calculate.m ├── poster.pdf ├── quadconstr.m ├── quadhess.m └── quadobj.m ├── Shield-Hybrid-Systems ├── .gitignore ├── Manifest.toml ├── Project.toml ├── README.md ├── Shared Code │ ├── BBBarbaricReachabilityFunction.jl │ ├── BBRigorousReachabilityFunction.jl │ ├── BBShieldSynthesis.jl │ ├── BBSquares.jl │ ├── Ball.jl │ ├── CCBarbaricReachabilityFunction.jl │ ├── Cruise.jl │ ├── DC-DC Converter.jl │ ├── DCShielding.jl │ ├── ExperimentUtilities.jl │ ├── FlatUI.jl │ ├── Get libbbshield.jl │ ├── Get libccshield.jl │ ├── Get libdcshield.jl │ ├── Get libopshield.jl │ ├── Get librwshield.jl │ ├── OPShielding.jl │ ├── OilPump.jl │ ├── PlotsDefaults.jl │ ├── RWShieldSynthesis.jl │ ├── RWSquares.jl │ ├── RandomWalk.jl │ ├── ShieldSynthesis.jl │ ├── Squares.jl │ ├── libbbshield │ │ ├── shield.c │ │ └── shield_dump (sample).c │ ├── libccshield │ │ ├── postshield.c │ │ ├── preshield.c │ │ └── shield_dump (sample).c │ ├── libdcshield │ │ └── shield.c │ ├── libopshield │ │ ├── shield.c │ │ └── shield_dump (sample).c │ └── librwshield │ │ ├── shield.c │ │ └── shield_dump (sample).c ├── fig-BBGranularityCost │ ├── Blueprints │ │ ├── BB__Shielded.xml │ │ └── TrainSaveEvaluateSingle.q │ ├── ExtractQueryResults.jl │ ├── Figure from CSV.jl │ ├── Get libbbshield.jl │ ├── Run Experiment.jl │ └── Synthesize Set of Shields.jl ├── fig-BBShieldRobustness │ ├── Check Robustness of Shields.jl │ ├── Get libbbshield.jl │ ├── Run Experiment.jl │ └── StatisticalChecking.jl ├── fig-BBShieldingResultsGroup │ ├── All Queries.py │ ├── Blueprints │ │ ├── BB__PostShielded.xml │ │ ├── BB__PreShielded.xml │ │ ├── BB__ShieldedLayabout.xml │ │ ├── BB__Unshielded.xml │ │ ├── PostShielded.q │ │ ├── PreShielded.q │ │ ├── ShieldedLayabout.q │ │ └── UnShielded.q │ ├── Example.png │ ├── ReadMe.md │ ├── ReadResults.jl │ └── Run Experiment.jl ├── fig-BarbaricMethodAccuracy │ ├── Example.png │ ├── Reliability of Barbaric Method.jl │ └── Run Experiment.jl ├── fig-CCShieldingResultsGroup │ ├── All Queries.py │ ├── Blueprints │ │ ├── CC__PostShieldedDeterministic.xml │ │ ├── CC__PostShieldedNondeterministic.xml │ │ ├── CC__Shielded.xml │ │ ├── CC__Unshielded.xml │ │ ├── LoadEvaluate.q │ │ ├── MinimizeCostEvaluate.q │ │ ├── MinimizeInterventionsEvaluate.q │ │ ├── NoStrategyEvaluate.q │ │ ├── TrainSaveEvaluate.q │ │ └── TrainSaveEvaluateSingle.q │ ├── Example.png │ ├── PostShield Strategy.jl │ ├── ReadMe.md │ ├── ReadResults.jl │ └── Run Experiment.jl ├── fig-DCShieldingResultsGroup │ ├── All Queries.py │ ├── Blueprints │ │ ├── DC__PostShielded.xml │ │ ├── DC__PreShielded.xml │ │ ├── DC__ShieldedLayabout.xml │ │ ├── DC__Unshielded.xml │ │ ├── PostShielded.q │ │ ├── PreShielded.q │ │ ├── ShieldedLayabout.q │ │ └── Unshielded.q │ ├── ReadMe.md │ ├── ReadResults.jl │ └── Run Experiment.jl ├── fig-DifferenceRigorousBarbaric │ ├── Example.png │ └── Run Experiment.jl ├── fig-NoRecovery │ ├── BB No Recovery.jl │ ├── Example.png │ └── Run Experiment.jl ├── fig-OPShieldingResultsGroup │ ├── All Queries.py │ ├── Blueprints │ │ ├── OP__PostShielded.xml │ │ ├── OP__PreShielded.xml │ │ ├── OP__ShieldedLayabout.xml │ │ └── OP__Unshielded.xml │ ├── Example.png │ ├── OPStrategyVisualisation.jl │ ├── ReadMe.md │ ├── ReadResults.jl │ └── Run Experiment.jl ├── fig-RWShieldingResultsGroup │ ├── All Queries.py │ ├── Blueprints │ │ ├── PostShielded.q │ │ ├── PreShielded.q │ │ ├── RW__PostShielded.xml │ │ ├── RW__PreShielded.xml │ │ ├── RW__ShieldedLayabout.xml │ │ ├── RW__Unshielded.xml │ │ ├── ShieldedLayabout.q │ │ └── Unshielded.q │ ├── Example.png │ ├── RandomWalk Shield.jl │ ├── ReadMe.md │ ├── ReadResults.jl │ └── Run Experiment.jl ├── run_all.sh ├── tab-BBSynthesis │ ├── Blueprints │ │ ├── BB__PreShielded.xml │ │ └── TrainSaveCheckSafety.q │ ├── CheckSafetyOfPreshielded.jl │ ├── Example.png │ ├── ReadMe.md │ ├── Run Experiment.jl │ ├── Statistical Checking of Shield.jl │ ├── Synthesize Set of Shields.jl │ └── Table from CSVs.jl ├── tab-CCSynthesis │ ├── Blueprints │ │ ├── CC__PreShielded.xml │ │ └── TrainSaveCheckSafety.q │ ├── CC Statistical Checking of Shield.jl │ ├── CC Synthesize Set of Shields.jl │ ├── CheckSafetyOfPreshielded.jl │ ├── Example.png │ ├── ReadMe.md │ ├── Run Experiment.jl │ └── Table from CSVs.jl ├── tab-DCSynthesis │ ├── Blueprints │ │ ├── DC__PreShielded.xml │ │ └── TrainSaveCheckSafety.q │ ├── CheckSafetyOfPreshielded.jl │ ├── DC Statistical Checking of Shield.jl │ ├── DC Synthesize Set of Shields.jl │ ├── DCShield.jl │ ├── Run Experiment.jl │ └── Table from CSVs.jl ├── tab-OPSynthesis │ ├── Blueprints │ │ ├── OP__PreShielded.xml │ │ └── TrainSaveCheckSafety.q │ ├── CheckSafetyOfPreshielded.jl │ ├── OP Statistical Checking of Shield.jl │ ├── OP Synthesize Set of Shields.jl │ ├── OPShield.jl │ ├── Run Experiment.jl │ └── Table from CSVs.jl └── tab-RWSynthesis │ ├── Blueprints │ ├── RW__PreShielded.xml │ └── TrainSaveCheckSafety.q │ ├── CheckSafetyOfPreshielded.jl │ ├── Example.png │ ├── RW Statistical Checking of Shield.jl │ ├── RW Synthesize Set of Shields.jl │ ├── ReadMe.md │ ├── Run Experiment.jl │ └── Table from CSVs.jl ├── safe-mbrl ├── .gitignore ├── baseline │ ├── LICENSE │ ├── README.md │ ├── safe_rl │ │ ├── __init__.py │ │ ├── pg │ │ │ ├── agents.py │ │ │ ├── algos.py │ │ │ ├── buffer.py │ │ │ ├── network.py │ │ │ ├── run_agent.py │ │ │ ├── trust_region.py │ │ │ └── utils.py │ │ ├── sac │ │ │ ├── __init__.py │ │ │ └── sac.py │ │ └── utils │ │ │ ├── load_utils.py │ │ │ ├── logx.py │ │ │ ├── mpi_tf.py │ │ │ ├── mpi_tools.py │ │ │ ├── readme.md │ │ │ ├── run_utils.py │ │ │ └── serialization_utils.py │ ├── scripts │ │ ├── experiment.py │ │ ├── plot.py │ │ └── test_policy.py │ └── setup.py ├── config.yml ├── data │ ├── cg1 │ │ ├── cpo │ │ │ └── cpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── ensemble-cem │ │ │ ├── ensemble-cem_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-cem_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-cem_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-random │ │ │ ├── ensemble-random_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-random_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-random_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-rce │ │ │ ├── ensemble-rce_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-rce_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-rce_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── trpo-Lagrangian │ │ │ └── trpo-Lagrangian │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── trpo │ │ │ └── trpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ └── weights │ │ │ ├── config.yml │ │ │ └── progress.txt │ ├── cg2 │ │ ├── cpo │ │ │ └── cpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── ensemble-cem │ │ │ ├── ensemble-cem_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-cem_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-cem_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-random │ │ │ ├── ensemble-random_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-random_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-random_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-rce │ │ │ ├── ensemble-rce_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-rce_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-rce_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── trpo-Lagrangian │ │ │ └── trpo-Lagrangian │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── trpo │ │ │ └── trpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ └── weights │ │ │ ├── config.yml │ │ │ └── progress.txt │ ├── figures │ │ ├── TestFigure3.png │ │ ├── pg1-Cost.png │ │ ├── pg1-Reward.png │ │ ├── pg2-Cost.png │ │ └── pg2-Reward.png │ ├── pg1 │ │ ├── cpo │ │ │ └── cpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── ensemble-cem │ │ │ ├── ensemble-cem_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-cem_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-cem_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-random │ │ │ ├── ensemble-random_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-random_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-random_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── ensemble-rce │ │ │ ├── ensemble-rce_s0 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ ├── ensemble-rce_s10 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ │ └── ensemble-rce_s100 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── fix-dynamic-model-compare-optimizer │ │ │ ├── model-ensemble-with-cem │ │ │ │ └── model-ensemble-with-cem_s1000 │ │ │ │ │ ├── config.yml │ │ │ │ │ └── progress.txt │ │ │ ├── model-ensemble-with-random │ │ │ │ └── model-ensemble-with-random_s1000 │ │ │ │ │ ├── config.yml │ │ │ │ │ └── progress.txt │ │ │ └── model-ensemble-with-ts │ │ │ │ └── model-ensemble-with-ts_s1000 │ │ │ │ ├── config.yml │ │ │ │ └── progress.txt │ │ ├── trpo-Lagrangian │ │ │ └── trpo-Lagrangian │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ ├── trpo │ │ │ └── trpo │ │ │ │ ├── config.json │ │ │ │ └── progress.txt │ │ └── weights │ │ │ ├── config.yml │ │ │ └── progress.txt │ └── pg2 │ │ ├── cpo │ │ ├── a-target10 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── b-target7.5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── c-target5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── d-target2.5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── e-target0.5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ └── f-target0.01 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── ensemble-cem │ │ ├── ensemble-cem_s0 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── ensemble-cem_s10 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ └── ensemble-cem_s100 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── ensemble-random │ │ ├── ensemble-random_s0 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── ensemble-random_s10 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ └── ensemble-random_s100 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── ensemble-rce │ │ ├── ensemble-rce_s0 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── ensemble-rce_s10 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ └── ensemble-rce_s100 │ │ │ ├── config.yml │ │ │ └── progress.txt │ │ ├── trpo │ │ └── trpo │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── trpo_lagrangian │ │ ├── a-target10 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── b-target7.5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── c-target5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── d-taget2-5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ ├── f-target0.5 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ └── g-target0.01 │ │ │ ├── config.json │ │ │ └── progress.txt │ │ └── weights │ │ ├── config.yml │ │ └── progress.txt ├── env │ ├── LICENSE │ ├── README.md │ ├── build │ │ └── lib │ │ │ └── safety_gym │ │ │ ├── __init__.py │ │ │ └── random_agent.py │ ├── dist │ │ └── safety_gym-0.0.0-py3.6.egg │ ├── safety_gym.png │ ├── safety_gym │ │ ├── __init__.py │ │ ├── bench │ │ │ ├── bench_utils.py │ │ │ └── characteristic_scores.json │ │ ├── envs │ │ │ ├── __init__.py │ │ │ ├── engine.py │ │ │ ├── mujoco.py │ │ │ ├── suite-origin.py │ │ │ ├── suite.py │ │ │ └── world.py │ │ ├── random_agent.py │ │ ├── test │ │ │ ├── test_bench.py │ │ │ ├── test_button.py │ │ │ ├── test_determinism.py │ │ │ ├── test_engine.py │ │ │ ├── test_envs.py │ │ │ ├── test_goal.py │ │ │ └── test_obs.py │ │ └── xmls │ │ │ ├── README.md │ │ │ ├── car-origin.xml │ │ │ ├── car.xml │ │ │ ├── car_vel.xml │ │ │ ├── doggo.xml │ │ │ ├── point-origin.xml │ │ │ ├── point.xml │ │ │ └── rover4We.xml │ └── setup.py ├── mbrl │ ├── .gitignore │ ├── __init__.py │ ├── controllers │ │ ├── __init__.py │ │ ├── mpc_controller.py │ │ └── safe_mpc_controller.py │ ├── models │ │ ├── __init__.py │ │ ├── base.py │ │ ├── constraint_model.py │ │ ├── ensemble.py │ │ └── model.py │ └── optimizers │ │ ├── __init__.py │ │ ├── cem.py │ │ ├── optimizer.py │ │ ├── random.py │ │ └── rce.py ├── media │ ├── cg1_random.gif │ ├── cg1_rce.gif │ ├── cg2_random.gif │ ├── cg2_rce.gif │ ├── pg1_random.gif │ ├── pg1_rce.gif │ ├── pg1_trpo.gif │ ├── pg1_trpol.gif │ ├── pg2_random.gif │ ├── pg2_rce.gif │ ├── pg2_trpo_10.gif │ └── pg2_trpol_10.gif ├── readme.md ├── requirements.txt ├── run.py ├── script │ ├── count.py │ └── plot.py └── utils │ ├── __init__.py │ ├── env_utils.py │ ├── logx.py │ ├── mpi_pytorch.py │ ├── mpi_tools.py │ ├── plot.py │ ├── run_entrypoint.py │ ├── run_utils.py │ ├── serialization_utils.py │ └── user_config.py ├── safeRL ├── .gitignore ├── .gitmodules ├── HCOPE │ ├── filter.py │ ├── hcope.py │ ├── hcope_debug.py │ ├── hcope_test.py │ └── policies.py ├── LICENSE.txt ├── README.md ├── README.md~ ├── citation.cff ├── importance_sampling │ ├── importance_sampling.png │ └── importance_sampling.py ├── results │ ├── IS_dist_+_0.1.png │ ├── IS_dist_minus_0.1.png │ ├── IS_dist_random.png │ ├── IS_variance.png │ ├── Result.png │ ├── Theorem.png │ ├── safe_actions.gif │ ├── safe_actions_instability.gif │ ├── safety_layer.png │ ├── safety_optimization.png │ └── safety_signal.png └── safe_exploration │ ├── filter.py │ ├── learn_safety_function.py │ ├── logz.py │ ├── lqr_env.py │ ├── optimizers.py │ ├── plotSafetyFuct.py │ ├── policies_safe.py │ ├── run_policy_contrained.py │ ├── shared_noise.py │ ├── train_safe_explorer.py │ └── utils.py ├── safe_learning ├── .dockerignore ├── .gitignore ├── .travis.yml ├── Dockerfile.dev ├── Dockerfile.python2 ├── Dockerfile.python3 ├── LICENSE ├── Makefile ├── README.rst ├── docs │ ├── Makefile │ ├── _templates │ │ └── template.rst │ ├── api.rst │ ├── conf.py │ ├── index.rst │ ├── introduction.rst │ ├── make.bat │ └── requirements.txt ├── examples │ ├── 1d_example.ipynb │ ├── 1d_region_of_attraction_estimate.ipynb │ ├── README.rst │ ├── adaptive_safety_verification.ipynb │ ├── basic_dynamic_programming.ipynb │ ├── inverted_pendulum.ipynb │ ├── lyapunov_function_learning.ipynb │ ├── plotting.py │ ├── reinforcement_learning_cartpole.ipynb │ ├── reinforcement_learning_pendulum.ipynb │ └── utilities.py ├── requirements.txt ├── requirements_dev.txt ├── safe_learning │ ├── __init__.py │ ├── configuration.py │ ├── functions.py │ ├── lyapunov.py │ ├── reinforcement_learning.py │ ├── tests │ │ ├── test_functions.py │ │ ├── test_lyapunov.py │ │ ├── test_rl.py │ │ └── test_utilities.py │ └── utilities.py ├── scripts │ ├── jupyter_output.py │ └── test_code.sh └── setup.py ├── safe_near_optimal_mdp ├── .gitignore ├── GPSG.png ├── LICENSE ├── README.md ├── arguments.py ├── data │ └── simple │ │ └── random_settings.npz ├── gp_safety_gym.py ├── main_oracle.py ├── main_safemdp.py ├── main_seo.py ├── main_sno_mdp.py ├── simple_make_rand_settings.py ├── test │ └── test_gp_safety_gym.py └── utils │ ├── mdp_utilities.py │ ├── reward_utilities.py │ └── safety_utilities.py ├── safe_rl_papers ├── LICENSE └── README.md ├── safety-starter-agents ├── .gitignore ├── LICENSE ├── README.md ├── safe_rl │ ├── __init__.py │ ├── pg │ │ ├── agents.py │ │ ├── algos.py │ │ ├── buffer.py │ │ ├── network.py │ │ ├── run_agent.py │ │ ├── trust_region.py │ │ └── utils.py │ ├── sac │ │ ├── __init__.py │ │ └── sac.py │ └── utils │ │ ├── load_utils.py │ │ ├── logx.py │ │ ├── mpi_tf.py │ │ ├── mpi_tools.py │ │ ├── readme.md │ │ ├── run_utils.py │ │ └── serialization_utils.py ├── scripts │ ├── experiment.py │ ├── plot.py │ └── test_policy.py └── setup.py └── vertex-net ├── .gitignore ├── README.md ├── __init__.py ├── algos ├── __init__.py └── ddpy.py ├── envs ├── __init__.py ├── hovercraft.py └── pendulum.py ├── nets ├── __init__.py ├── policy_net.py ├── value_net.py └── vertex_policy_net.py ├── run_hovercraft.py ├── run_pendulum.py └── utils ├── __init__.py └── replay_buffer.py /Safe-RL/AlwaysSafe/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .idea 3 | notebooks/ 4 | results/ 5 | Pipfile.lock 6 | __pycache__ 7 | *.pyc 8 | 9 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Thiago D. Simão 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.python.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | matplotlib = "*" 8 | pandas = "*" 9 | tqdm = "*" 10 | gym = "*" 11 | cvxpy = "*" 12 | 13 | [packages.gym_factored] 14 | git = "git://github.com/tdsimao/gym-factored.git" 15 | editable = true 16 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/agents/__init__.py: -------------------------------------------------------------------------------- 1 | from .opt_cmdp import OptCMDPAgent 2 | from .abs_opt_cmdp import AbsOptCMDPAgent 3 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/planners/__init__.py: -------------------------------------------------------------------------------- 1 | from .lp import LinearProgrammingPlanner 2 | from .lp_optimistic import OptimisticLinearProgrammingPlanner 3 | from .abs_lp_optimistic import AbsOptimisticLinearProgrammingPlanner 4 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/scripts/.gitignore: -------------------------------------------------------------------------------- 1 | sandbox 2 | -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/scripts/__init__.py -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/tests/__init__.py -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/util/__init__.py -------------------------------------------------------------------------------- /Safe-RL/AlwaysSafe/util/grb.py: -------------------------------------------------------------------------------- 1 | try: 2 | from gurobipy import Model, quicksum, GRB, GurobiError 3 | GUROBI_FOUND = True 4 | except ModuleNotFoundError as e: 5 | GUROBI_FOUND = False 6 | 7 | 8 | def solve_gurobi_lp(model, verbose=False, check_if_infeasible=False): 9 | if not verbose: 10 | model.Params.OutputFlag = 0 11 | model.optimize() 12 | 13 | if model.status == GRB.Status.INF_OR_UNBD: 14 | # Turn presolve off to determine whether model is infeasible or unbounded 15 | model.setParam(GRB.Param.Presolve, 0) 16 | model.optimize() 17 | 18 | if model.status == GRB.Status.OPTIMAL: 19 | # model.write('model.lp') 20 | # model.write('model.sol') 21 | if verbose: 22 | print('Optimal objective: {}'.format(model.objVal)) 23 | return model 24 | elif model.status == GRB.Status.UNBOUNDED: 25 | model.write('model_unbounded.lp') 26 | raise GurobiError(model.status, 27 | 'Optimization stopped (UNBOUNDED), check the file model_unbounded.lp') 28 | elif model.status == GRB.Status.INFEASIBLE: 29 | if check_if_infeasible: 30 | model.write('model_infeasible.lp') 31 | model.computeIIS() 32 | model.write("model.ilp") 33 | raise GurobiError(model.status, 34 | 'Optimization stopped (INFEASIBLE), check files model_infeasible.lp and model.ilp') 35 | return model 36 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/.gitignore: -------------------------------------------------------------------------------- 1 | *.out 2 | *.jld 3 | *.jld2 4 | **/log* 5 | **/.ipynb_checkpoints 6 | *.lab 7 | *.tra 8 | *.csv 9 | *.webm 10 | *.bson 11 | *.hoa -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/datagen.sh: -------------------------------------------------------------------------------- 1 | nohup julia1.0 generate_dataset.jl --seed=1 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen1.jodhpur.out & 2 | nohup julia1.0 generate_dataset.jl --seed=2 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen2.jodhpur.out & 3 | nohup julia1.0 generate_dataset.jl --seed=3 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen3.jodhpur.out & 4 | nohup julia1.0 generate_dataset.jl --seed=4 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen4.jodhpur.out & 5 | nohup julia1.0 generate_dataset.jl --seed=5 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen5.jodhpur.out & 6 | 7 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/generate_data.sh: -------------------------------------------------------------------------------- 1 | nohup julia1.0 generate_dataset.jl --folder=/scratch/boutonm/ --ntrain=3000 --nval=500 --seed=1 > datagen.jodhpur.out & 2 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/model_loading.jl: -------------------------------------------------------------------------------- 1 | using Flux 2 | using StaticArrays 3 | using ProgressMeter 4 | using POMDPs 5 | using POMDPToolbox 6 | using AutomotiveDrivingModels 7 | using AutomotivePOMDPs 8 | using AutomotiveSensors 9 | using PedCar 10 | using BSON: @load 11 | 12 | mdp = PedCarMDP(pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7) 13 | pomdp = UrbanPOMDP(env=mdp.env, 14 | sensor = GaussianSensor(false_positive_rate=0.05, 15 | pos_noise = LinearNoise(min_noise=0.5, increase_rate=0.05), 16 | vel_noise = LinearNoise(min_noise=0.5, increase_rate=0.05)), 17 | ego_goal = LaneTag(2, 1), 18 | max_cars=1, 19 | max_peds=1, 20 | car_birth=0.7, 21 | ped_birth=0.7, 22 | obstacles=false, # no fixed obstacles 23 | lidar=false, 24 | ego_start=20, 25 | ΔT=0.5) 26 | 27 | rng = MersenneTwister(1) 28 | policy = RandomPolicy(rng, pomdp, VoidUpdater()) 29 | 30 | 31 | @load "model_1.bson" model 32 | @load "weights_1.bson" weights 33 | 34 | @time mean(loss(val_X[i], val_Y[i]) for i=1:length(val_X)) 35 | 36 | function loss(x, y) 37 | l = mean(Flux.mse.(model.(x), y)) 38 | truncate!(model) 39 | reset!(model) 40 | return l 41 | end 42 | 43 | loss.(val_X, val_Y) 44 | 45 | xs = Flux.batchseq(val_X) 46 | ys = Flux.batchseq(val_Y) 47 | loss(xs, ys) 48 | 49 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/scp_model.sh: -------------------------------------------------------------------------------- 1 | scp boutonm@bethpage:/home/boutonm/AutomotiveSafeRL/training_scripts/RNNFiltering/*.bson . 2 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/train.sh: -------------------------------------------------------------------------------- 1 | nohup julia1.0 bagging_training.jl --resume 10 --seed 10 > nn_1.jodhpur.out & 2 | nohup julia1.0 bagging_training.jl --resume 20 --seed 20 > nn_2.jodhpur.out & 3 | nohup julia1.0 bagging_training.jl --resume 30 --seed 30 > nn_3.jodhpur.out & 4 | nohup julia1.0 bagging_training.jl --resume 40 --seed 40 > nn_4.jodhpur.out & 5 | nohup julia1.0 bagging_training.jl --resume 50 --seed 50 > nn_5.jodhpur.out & 6 | #nohup julia1.0 bagging_training.jl --seed 6 > nn_6.jodhpur.out & 7 | #nohup julia1.0 bagging_training.jl --seed 7 > nn_7.jodhpur.out & 8 | #nohup julia1.0 bagging_training.jl --seed 8 > nn_8.jodhpur.out & 9 | #nohup julia1.0 bagging_training.jl --seed 9 > nn_9.jodhpur.out & 10 | #nohup julia1.0 bagging_training.jl --seed 10 > nn_10.jodhpur.out & 11 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/RNNFiltering/train_single.sh: -------------------------------------------------------------------------------- 1 | nohup julia1.0 train_tracking.jl --seed=1 --entity=car > car1.jodhpur.out & 2 | nohup julia1.0 train_tracking.jl --seed=2 --entity=car > car2.jodhpur.out & 3 | nohup julia1.0 train_tracking.jl --seed=3 --entity=car > car3.jodhpur.out & 4 | nohup julia1.0 train_tracking.jl --seed=4 --entity=car > car4.jodhpur.out & 5 | nohup julia1.0 train_tracking.jl --seed=5 --entity=car > car5.jodhpur.out & 6 | 7 | nohup julia1.0 train_tracking.jl --seed=1 --entity=ped > ped1.jodhpur.out & 8 | nohup julia1.0 train_tracking.jl --seed=2 --entity=ped > ped2.jodhpur.out & 9 | nohup julia1.0 train_tracking.jl --seed=3 --entity=ped > ped3.jodhpur.out & 10 | nohup julia1.0 train_tracking.jl --seed=4 --entity=ped > ped4.jodhpur.out & 11 | nohup julia1.0 train_tracking.jl --seed=5 --entity=ped > ped5.jodhpur.out & 12 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_product.jl: -------------------------------------------------------------------------------- 1 | rng = MersenneTwister(1) 2 | using AutomotivePOMDPs 3 | using MDPModelChecking 4 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel 5 | using DiscreteValueIteration 6 | using ProgressMeter, Parameters, JLD 7 | 8 | params = UrbanParams(nlanes_main=1, 9 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 10 | crosswalk_length = [14.0, 14., 14.0], 11 | crosswalk_width = [4.0, 4.0, 3.1], 12 | stop_line = 22.0) 13 | env = UrbanEnv(params=params); 14 | 15 | mdp = CarMDP(env = env, vel_res=2.0, pos_res=3.0); 16 | 17 | function MDPModelChecking.labels(mdp::CarMDP, s::CarMDPState) 18 | if s.crash 19 | return ["crash"] 20 | elseif s.ego.posF.s >= get_end(mdp.env.roadway[mdp.ego_goal]) && 21 | get_lane(mdp.env.roadway, s.ego).tag == mdp.ego_goal 22 | return ["goal"] 23 | else 24 | return ["!crash", "!goal"] 25 | end 26 | end 27 | 28 | property = "!crash U goal" 29 | 30 | solver = ModelCheckingSolver(property=property, solver=ValueIterationSolver()) 31 | 32 | policy = solve(solver, mdp, verbose=true) 33 | 34 | JLD.save("carmdp.jld", "policy", policy) 35 | JLD.save("car_acc_states.jld", "accepting_states", policy.mdp.accepting_states) 36 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_vi_until.jl: -------------------------------------------------------------------------------- 1 | rng = MersenneTwister(1) 2 | @everywhere begin 3 | using AutomotivePOMDPs 4 | using MDPModelChecking 5 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel 6 | using DiscreteValueIteration 7 | using ProgressMeter, Parameters, JLD 8 | end 9 | params = UrbanParams(nlanes_main=1, 10 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 11 | crosswalk_length = [14.0, 14., 14.0], 12 | crosswalk_width = [4.0, 4.0, 3.1], 13 | stop_line = 22.0) 14 | env = UrbanEnv(params=params); 15 | 16 | mdp = CarMDP(env = env, pos_res=2., vel_res=3., car_birth=0.7) 17 | 18 | # reachability analysis 19 | mdp.collision_cost = 0. 20 | mdp.γ = 1. 21 | mdp.goal_reward = 1. 22 | 23 | solver = ParallelValueIterationSolver(n_procs=7) 24 | 25 | policy = solve(solver, mdp, verbose=true) 26 | JLD.save("car_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy) 27 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/old_scripts/pedcar_vi_benchmark.jl: -------------------------------------------------------------------------------- 1 | @everywhere begin 2 | using POMDPs, POMDPToolbox, DiscreteValueIteration 3 | using AutomotivePOMDPs, AutomotiveDrivingModels 4 | end 5 | rng = MersenneTwister(1) 6 | 7 | params = UrbanParams(nlanes_main=1, 8 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 9 | crosswalk_length = [14.0, 14., 14.0], 10 | crosswalk_width = [4.0, 4.0, 3.1], 11 | stop_line = 22.0) 12 | env = UrbanEnv(params=params); 13 | 14 | mdp = PedCarMDP(env=env, pos_res=6.0, vel_res=3.0, ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0)) 15 | # reachability analysis 16 | mdp.collision_cost = 0. 17 | mdp.γ = 1. 18 | mdp.goal_reward = 1. 19 | 20 | solver = ParallelValueIterationSolver(n_procs=8, max_iterations=4, belres=1e-4) 21 | policy = solve(solver, mdp, verbose=true) 22 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/old_scripts/pedmdp_vi_until.jl: -------------------------------------------------------------------------------- 1 | rng = MersenneTwister(1) 2 | @everywhere begin 3 | using AutomotivePOMDPs 4 | using MDPModelChecking 5 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel 6 | using DiscreteValueIteration 7 | using ProgressMeter, Parameters, JLD 8 | end 9 | params = UrbanParams(nlanes_main=1, 10 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 11 | crosswalk_length = [14.0, 14., 14.0], 12 | crosswalk_width = [4.0, 4.0, 3.1], 13 | stop_line = 22.0) 14 | env = UrbanEnv(params=params); 15 | 16 | mdp = PedMDP(env = env, pos_res=1., vel_res=1., ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0)) 17 | 18 | # reachability analysis 19 | mdp.collision_cost = 0. 20 | mdp.γ = 1. 21 | mdp.goal_reward = 1. 22 | 23 | solver = ParallelValueIterationSolver(n_procs=7) 24 | 25 | policy = solve(solver, mdp, verbose=true) 26 | JLD.save("ped_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy) 27 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/test/runtests.jl: -------------------------------------------------------------------------------- 1 | using Base.Test 2 | using Parameters 3 | #dep 4 | include("../AutomotivePOMDPs/AutomotivePOMDPs.jl") 5 | using AutomotivePOMDPs 6 | using POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel 7 | using GridInterpolations, StaticArrays 8 | include("mdp_models/discretization.jl") 9 | include("mdp_models/pedestrian_mdp/pomdp_types.jl") 10 | include("mdp_models/pedestrian_mdp/state_space.jl") 11 | 12 | rng = MersenneTwister(1) 13 | 14 | include("test_discretization.jl") 15 | include("test_pedestrian_mdp.jl") 16 | include("test_interpolation.jl") 17 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/test/test_car_mdp.jl: -------------------------------------------------------------------------------- 1 | 2 | function test_stateindexing(mdp::CarMDP) 3 | state_space = states(mdp) 4 | for (i, s) in enumerate(state_space) 5 | if i != stateindex(mdp, s) 6 | return false 7 | end 8 | end 9 | return true 10 | end 11 | 12 | 13 | 14 | params = UrbanParams(nlanes_main=1, 15 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 16 | crosswalk_length = [10.0, 10., 10.0], 17 | crosswalk_width = [4.0, 4.0, 3.1], 18 | stop_line = 22.0) 19 | env = UrbanEnv(params=params); 20 | 21 | mdp = CarMDP(env = env); 22 | 23 | @test test_stateindexing(mdp) 24 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/test/test_pedestrian_mdp.jl: -------------------------------------------------------------------------------- 1 | 2 | 3 | function test_stateindexing(mdp::PedMDP) 4 | state_space = states(mdp) 5 | for (i, s) in enumerate(state_space) 6 | if i != stateindex(mdp, s) 7 | return false 8 | end 9 | end 10 | return true 11 | end 12 | 13 | 14 | 15 | params = UrbanParams(nlanes_main=1, 16 | crosswalk_pos = [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)], 17 | crosswalk_length = [10.0, 10., 10.0], 18 | crosswalk_width = [4.0, 4.0, 3.1], 19 | stop_line = 22.0) 20 | env = UrbanEnv(params=params); 21 | 22 | mdp = PedMDP(env = env); 23 | 24 | @test test_stateindexing(mdp) 25 | -------------------------------------------------------------------------------- /Safe-RL/AutomotiveSafeRL/training_scripts/training.sh: -------------------------------------------------------------------------------- 1 | nohup julia jointmdp_training.jl --log log_nm100 --cost 2 > log100.out & 2 | nohup julia jointmdp_training.jl --log log_nm101 --cost 3 > log101.out & 3 | nohup julia jointmdp_training.jl --log log_nm102 --cost 0.5 > log102.out & 4 | nohup julia jointmdp_training.jl --log log_nm103 --cost 10 > log103.out & 5 | nohup julia jointmdp_training.jl --log log_nm104 --cost 20 > log104.out & 6 | 7 | #nohup julia jointmdp_script.jl --log log60 --goal 1 > log60.out & 8 | #nohup julia jointmdp_script.jl --log log61 --goal 1.5 > log61.out & 9 | #nohup julia jointmdp_script.jl --log log62 --goal 2 > log62.out & 10 | #nohup julia jointmdp_script.jl --log log63 --goal 3 > log63.out & 11 | #nohup julia jointmdp_script.jl --log log64 --goal 5 > log64.out & 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/.idea/Constraint_RL_MPC.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/ReplayBuffer.py: -------------------------------------------------------------------------------- 1 | from collections import deque 2 | import random 3 | 4 | 5 | class ReplayBuffer(): 6 | 7 | def __init__(self, buffer_size): 8 | self.buffer_size = buffer_size 9 | self.num_experiences = 0 10 | self.buffer = deque() 11 | 12 | def size(self): 13 | return self.buffer_size 14 | 15 | def add_with_dist(self, state, action, reward, new_state, done, dist): 16 | experience = (state, action, reward, new_state, done, dist) 17 | if self.num_experiences < self.buffer_size: 18 | self.buffer.append(experience) 19 | self.num_experiences += 1 20 | else: 21 | self.buffer.popleft() 22 | self.buffer.append(experience) 23 | 24 | def add(self, state, action, reward, new_state, done): 25 | experience = (state, action, reward, new_state, done) 26 | if self.num_experiences < self.buffer_size: 27 | self.buffer.append(experience) 28 | self.num_experiences += 1 29 | else: 30 | self.buffer.popleft() 31 | self.buffer.append(experience) 32 | 33 | def sample(self, batch_size): 34 | # Randomly sample batch_size examples 35 | if self.num_experiences < batch_size: 36 | return random.sample(self.buffer, self.num_experiences) 37 | else: 38 | return random.sample(self.buffer, batch_size) 39 | 40 | def erase(self): 41 | self.buffer = deque() 42 | self.num_experiences = 0 43 | 44 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/MinMax.py: -------------------------------------------------------------------------------- 1 | """ 2 | Min - max normalization 3 | """ 4 | 5 | 6 | def minmax_norm(x, min_x, max_x): 7 | """ 8 | This function normalizes data 9 | :param x: input data 10 | :param min_x: minimum value 11 | :param max_x: output data 12 | :return: normalized input data x_norm 13 | """ 14 | x_norm = (x - min_x)/(max_x - min_x) 15 | 16 | return x_norm 17 | 18 | 19 | def minmax_norm_back(x_norm, min_x, max_x): 20 | """ 21 | This function denormalizes data 22 | :param x_norm: input data 23 | :param min_x: minimum value 24 | :param max_x: output data 25 | :return: real input data x 26 | """ 27 | x = x_norm * (max_x - min_x) + min_x 28 | 29 | return x 30 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | ##### Immediate_constraint_functions.py ################################################################## 3 | 4 | Pre-training phase to learn immediate constraint functions 5 | has to be evaluated for every constraint 6 | 7 | 8 | PARAMETER: 9 | 10 | num_samples = number of samples per episode 11 | num_episodes = number of episodes 12 | 13 | state_flag = 0-> Temperature low, 1-> Energy low, 2-> Temperature up, 3-> Energy up, define safety signal 14 | 15 | # define network parameters 16 | num_in 17 | num_out 18 | num_hidden 19 | activation 20 | activation_out 21 | optimizer 22 | 23 | OUTPUT: 24 | 25 | network weights of the trained network is saved in the same folder 26 | 27 | 28 | ##### Test_Immediate_constraint_functions.py ############################################################## 29 | 30 | evaluation of safety layer, to make sure that the constraints are working 31 | loads neural network weights, so they have to be trained before 32 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | ##### Main_System_Identification.py ################################################ 3 | 4 | trains neural network for system identification of the model 5 | has to be evaluated first 6 | 7 | PARAMETER: 8 | 9 | num_samples = number of samples per episode 10 | num_episodes = number of episodes 11 | 12 | # network parameter 13 | num_hidden 14 | activation 15 | activation_out 16 | optimizer 17 | 18 | # model paramter 19 | Q 20 | R 21 | 22 | dist_flag = 0-> train without disturbances 1-> train with disturbances 23 | 24 | 25 | OUTPUT: 26 | 27 | network weights of the trained network is saved in the same folder 28 | evolution of error is plotted 29 | 30 | 31 | ##### Main_MPC.py ################################################################## 32 | 33 | Execution of the MPC algorithm with the trained network 34 | 35 | 36 | PARAMETER: 37 | 38 | network parameters have to be the same as the SI parameters 39 | 40 | N = prediction horizon 41 | S = samples to be evaluated 42 | 43 | OUTPUT: 44 | 45 | evolution of states and inputs is plotted 46 | 47 | 48 | -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f -------------------------------------------------------------------------------- /Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/readme.txt: -------------------------------------------------------------------------------- 1 | 2 | ##### Main_RL.py ################################################################## 3 | 4 | Training phase and test phase of the DDPG algorithm 5 | it can be evaluated together with MPC, if enabled 6 | 7 | 8 | PARAMETER: 9 | 10 | num_samples = number of samples per episode 11 | num_episodes = number of episodes 12 | 13 | episodesTrain = number of episodes for the training 14 | episodesTest = number of episodes for the test 15 | stepsEpisodes = number of samples per episode training 16 | stepsEpisodes_test = number of samples per episode training 17 | 18 | future_steps_tracing = number of steps the tracing trajectory is used from the future -> 0 = Nonw 19 | buffersize = size of replay buffer 20 | 21 | disturbance = 0 -> no disturbance , 1 -> added disturbance 22 | future_steps_dist = number of steps the disturbance is used from the future -> 0 = None 23 | 24 | # parameter of the noise process 25 | sigma 26 | theta 27 | mu 28 | 29 | constraints = Flag constraints [None, SafetyLayer, Rewardshaping] 30 | 31 | # Environmental details 32 | Q 33 | R 34 | ENV_NAME = Name where the weights are saved 35 | 36 | 37 | # MPC parameters 38 | do_MPC = Flag whether MPC should be evaluated 39 | N = Prediction horizon for MPC result 40 | 41 | OUTPUT: 42 | 43 | network weights of the trained network is saved in the same folder 44 | evolution of states and inputs is ploted 45 | 46 | 47 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "coach"] 2 | path = coach 3 | url = https://github.com/ben-eysenbach/coach.git 4 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google.com/conduct/). 29 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/envs/__init__.py -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/envs/frozen_lake.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv as _FrozenLakeEnv 2 | from gym import spaces 3 | import numpy as np 4 | 5 | 6 | class FrozenLakeEnv(_FrozenLakeEnv): 7 | """Modified version of FrozenLake-v0. 8 | 9 | 1. Convert integer states to one hot encoding. 10 | 2. Make the goal state reversible 11 | """ 12 | def __init__(self, map_name): 13 | super(FrozenLakeEnv, self).__init__(map_name=map_name, 14 | is_slippery=False) 15 | self.observation_space = spaces.Box(low=np.zeros(self.nS), 16 | high=np.ones(self.nS)) 17 | # Make the goal state not terminate 18 | goal_s = self.nS - 1 19 | left_s = goal_s - 1 20 | up_s = goal_s - int(np.sqrt(self.nS)) 21 | 22 | self.P[goal_s] = { 23 | 0: [(1.0, left_s, 0.0, False)], 24 | 1: [(1.0, goal_s, 1.0, True)], 25 | 2: [(1.0, goal_s, 1.0, True)], 26 | 3: [(1.0, up_s, 0.0, True)], 27 | } 28 | 29 | def _s_to_one_hot(self, s): 30 | one_hot = np.zeros(self.nS) 31 | one_hot[s] = 1. 32 | return one_hot 33 | 34 | def step(self, a): 35 | (s, r, done, info) = super(FrozenLakeEnv, self).step(a) 36 | done = (s == self.nS - 1) # Assume we can't detect dangerous states 37 | one_hot = self._s_to_one_hot(s) 38 | r -= 1 # Make the reward be in {-1, 0} 39 | return (one_hot, r, done, info) 40 | 41 | def reset(self): 42 | s = super(FrozenLakeEnv, self).reset() 43 | one_hot = self._s_to_one_hot(s) 44 | return one_hot 45 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/envs/hopper.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.hopper import HopperEnv as _HopperEnv 2 | 3 | 4 | class HopperEnv(_HopperEnv): 5 | """Modified version of Hopper-v1.""" 6 | 7 | def step(self, action): 8 | (obs, r, done, info) = super(HopperEnv, self).step(action) 9 | return (obs, r, False, info) 10 | -------------------------------------------------------------------------------- /Safe-RL/LeaveNoTrace/plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/plot.png -------------------------------------------------------------------------------- /Safe-RL/PCPO/iclr_2020_code_submission.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/PCPO/iclr_2020_code_submission.zip -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Sven Gronauer, Technical University Munich (TUM) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/README.md: -------------------------------------------------------------------------------- 1 | # RL-Safety-Algorithms 2 | 3 | Algorithms for Safe Reinforcement Learning Problems that were tested and 4 | benchmarked in the 5 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym). 6 | 7 | ## Installation 8 | 9 | Install this repository with: 10 | 11 | ``` 12 | git clone https://github.com/SvenGronauer/RL-Safety-Algorithms.git 13 | 14 | cd RL-Safety-Algorithms 15 | 16 | pip install -e . 17 | ``` 18 | 19 | 20 | ## Getting Started 21 | 22 | Works with every environment that is compatible with the OpenAI Gym interface: 23 | 24 | ``` 25 | python -m rl_safety_algorithms.train --alg trpo --env MountainCarContinuous-v0 26 | ``` 27 | 28 | For an open-source framework to benchmark and test safety, we recommend the 29 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym). To train an 30 | algorithms such as Constrained Policy Optimization, run: 31 | 32 | ``` 33 | python -m rl_safety_algorithms.train --alg cpo --env SafetyBallCircle-v0 34 | ``` 35 | 36 | ## Benchmark 37 | 38 | In order to benchmark tasks from the 39 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym), 40 | we have prepared scripts in the `experiments` directory. 41 | 42 | ``` 43 | cd experiments/ 44 | python benchmark_circle_tasks.py 45 | ``` 46 | 47 | In our experiments, we used a Threadripper 3990X CPU with 64 physical CPU cores, 48 | thus, we ran the experiments with the following flag for optimal MPI usage: 49 | 50 | ``` 51 | python benchmark_circle_tasks.py --num-cores 64 52 | ``` 53 | 54 | Plots from experiment runs can be also taken from the 55 | [Bullet-Safety-Gym Benchmarks](https://github.com/SvenGronauer/Bullet-Safety-Gym/blob/master/docs/benchmark.md) -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/experiments/benchmark_circle_tasks.py: -------------------------------------------------------------------------------- 1 | from rl_safety_algorithms.benchmark import Benchmark 2 | import bullet_safety_gym # noqa 3 | from safety_settings import alg_setup, argument_parser 4 | 5 | 6 | def main(args): 7 | env_specific_kwargs = { 8 | 'SafetyBallCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000}, 9 | 'SafetyCarCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000}, 10 | 'SafetyDroneCircle-v0': {'epochs': 1000, 'steps_per_epoch': 64000}, 11 | 'SafetyAntCircle-v0': {'epochs': 1500, 'steps_per_epoch': 64000}, 12 | } 13 | bench = Benchmark( 14 | alg_setup, 15 | env_ids=list(env_specific_kwargs.keys()), 16 | log_dir=args.log_dir, 17 | num_cores=args.num_cores, 18 | num_runs=args.num_runs, 19 | env_specific_kwargs=env_specific_kwargs, 20 | use_mpi=True, 21 | init_seed=args.seed, 22 | ) 23 | bench.run() 24 | 25 | 26 | if __name__ == '__main__': 27 | args = argument_parser() 28 | main(args) 29 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/experiments/benchmark_gather_tasks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from rl_safety_algorithms.benchmark import Benchmark 4 | import bullet_safety_gym # noqa 5 | from safety_settings import alg_setup, argument_parser 6 | 7 | 8 | def main(args): 9 | env_specific_kwargs = { 10 | 'SafetyBallGather-v0': {'epochs': 500, 'cost_limit': 0.2, 11 | 'steps_per_epoch': 32000}, 12 | 'SafetyCarGather-v0': {'epochs': 500, 'cost_limit': 0.2, 13 | 'steps_per_epoch': 32000}, 14 | 'SafetyDroneGather-v0': {'epochs': 1000, 'cost_limit': 0.2, 15 | 'steps_per_epoch': 64000}, 16 | 'SafetyAntGather-v0': {'epochs': 1000, 'cost_limit': 0.2, 17 | 'steps_per_epoch': 64000} 18 | } 19 | bench = Benchmark( 20 | alg_setup, 21 | env_ids=list(env_specific_kwargs.keys()), 22 | log_dir=args.log_dir, 23 | num_cores=args.num_cores, 24 | num_runs=args.num_runs, 25 | env_specific_kwargs=env_specific_kwargs, 26 | use_mpi=True, 27 | init_seed=args.seed, 28 | ) 29 | bench.run() 30 | 31 | 32 | if __name__ == '__main__': 33 | args = argument_parser() 34 | main(args) 35 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/experiments/benchmark_reach_tasks.py: -------------------------------------------------------------------------------- 1 | from rl_safety_algorithms.benchmark import Benchmark 2 | import bullet_safety_gym # noqa 3 | from safety_settings import alg_setup, argument_parser 4 | 5 | 6 | def main(args): 7 | env_specific_kwargs = { 8 | 'SafetyBallReach-v0': {'epochs': 500, 'steps_per_epoch': 32000, 9 | 'cost_limit': 10}, # terminates after 250 steps 10 | 'SafetyCarReach-v0': {'epochs': 1000, 'steps_per_epoch': 32000, 11 | 'cost_limit': 10}, # terminates after 500 steps 12 | 'SafetyDroneReach-v0': {'epochs': 1000, 'steps_per_epoch': 64000, 13 | 'cost_limit': 10}, # terminates after 500 steps 14 | 'SafetyAntReach-v0': {'epochs': 1500, 'steps_per_epoch': 64000}, 15 | } 16 | bench = Benchmark( 17 | alg_setup, 18 | env_ids=list(env_specific_kwargs.keys()), 19 | log_dir=args.log_dir, 20 | num_cores=args.num_cores, 21 | num_runs=args.num_runs, 22 | env_specific_kwargs=env_specific_kwargs, 23 | use_mpi=True, 24 | init_seed=args.seed 25 | ) 26 | bench.run() 27 | 28 | 29 | if __name__ == '__main__': 30 | args = argument_parser() 31 | main(args) 32 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/experiments/benchmark_run_tasks.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from rl_safety_algorithms.benchmark import Benchmark 4 | import bullet_safety_gym # noqa 5 | from safety_settings import alg_setup, argument_parser 6 | 7 | 8 | def main(args): 9 | env_specific_kwargs = { 10 | 'SafetyBallRun-v0': {'epochs': 100, 'steps_per_epoch': 32000}, 11 | 'SafetyCarRun-v0': {'epochs': 200, 'steps_per_epoch': 32000}, 12 | 'SafetyDroneRun-v0': {'epochs': 500, 'steps_per_epoch': 64000}, 13 | 'SafetyAntRun-v0': {'epochs': 500, 'steps_per_epoch': 64000}, 14 | } 15 | bench = Benchmark( 16 | alg_setup, 17 | env_ids=list(env_specific_kwargs.keys()), 18 | log_dir=args.log_dir, 19 | num_cores=args.num_cores, 20 | num_runs=args.num_runs, 21 | env_specific_kwargs=env_specific_kwargs, 22 | use_mpi=True, 23 | init_seed=args.seed, 24 | ) 25 | bench.run() 26 | 27 | 28 | if __name__ == '__main__': 29 | args = argument_parser() 30 | main(args) 31 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/experiments/safety_settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | 5 | alg_setup = { 6 | 'trpo': {"target_kl": [0.001, 0.01]}, 7 | 'lag-trpo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2], 8 | 'lambda_lr': [0.001, 0.01, 0.1]}, # SGD is default 9 | 'cpo': {'target_kl': [1.0e-4, 5.0e-4, 1.0e-3], 'lam_c': [0.50, 0.90, 0.95]}, 10 | 'pdo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2], 11 | 'lambda_lr': [0.001, 0.01, 0.1]}, # Adam is default 12 | } 13 | 14 | 15 | def get_alg_setup(): 16 | return alg_setup 17 | 18 | 19 | def argument_parser(): 20 | n_cpus = os.cpu_count() 21 | parser = argparse.ArgumentParser( 22 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 23 | ) 24 | parser.add_argument('--num-cores', '-c', type=int, default=n_cpus, 25 | help='Number of parallel processes generated.') 26 | parser.add_argument('--num-runs', '-r', type=int, default=4, 27 | help='Number of total runs that are executed.') 28 | parser.add_argument('--log-dir', type=str, default='/var/tmp/ga87zej', 29 | help='Define a custom directory for logging.') 30 | parser.add_argument('--seed', type=int, default=0, 31 | help='Define the initial seed.') 32 | args = parser.parse_args() 33 | return args 34 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/defaults.py: -------------------------------------------------------------------------------- 1 | def defaults(): 2 | return dict( 3 | actor='mlp', 4 | ac_kwargs={ 5 | 'pi': {'hidden_sizes': (64, 64), 6 | 'activation': 'tanh'}, 7 | 'val': {'hidden_sizes': (64, 64), 8 | 'activation': 'tanh'} 9 | }, 10 | adv_estimation_method='gae', 11 | epochs=300, # 9.8M steps 12 | gamma=0.99, 13 | lam_c=0.95, 14 | steps_per_epoch=64 * 1000, # default: 64k 15 | target_kl=0.0001, 16 | use_exploration_noise_anneal=True 17 | ) 18 | 19 | 20 | def locomotion(): 21 | """Default hyper-parameters for Bullet's locomotion environments.""" 22 | params = defaults() 23 | params['epochs'] = 312 24 | params['max_ep_len'] = 1000 25 | params['steps_per_epoch'] = 32 * 1000 26 | params['vf_lr'] = 3e-4 # default choice is Adam 27 | return params 28 | 29 | 30 | # Hack to circumvent kwarg errors with the official PyBullet Envs 31 | def gym_locomotion_envs(): 32 | params = locomotion() 33 | return params 34 | 35 | 36 | def gym_manipulator_envs(): 37 | """Default hyper-parameters for Bullet's manipulation environments.""" 38 | params = defaults() 39 | params['epochs'] = 312 40 | params['max_ep_len'] = 150 41 | params['steps_per_epoch'] = 32 * 1000 42 | params['vf_lr'] = 3e-4 # default choice is Adam 43 | return params 44 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/defaults.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define default parameters for Importance-weighted Policy Gradient (IWPG) 3 | algorithm. 4 | """ 5 | 6 | 7 | def defaults(): 8 | return dict( 9 | actor='mlp', 10 | ac_kwargs={ 11 | 'pi': {'hidden_sizes': (64, 64), 12 | 'activation': 'tanh'}, 13 | 'val': {'hidden_sizes': (64, 64), 14 | 'activation': 'tanh'} 15 | }, 16 | adv_estimation_method='gae', 17 | epochs=300, 18 | gamma=0.99, 19 | steps_per_epoch=32 * 1000, 20 | # Early stopping criterion adds robustness towards hyper-parameters 21 | # see "Successful ingredients" Paper 22 | use_kl_early_stopping=True, 23 | ) 24 | 25 | 26 | def locomotion(): 27 | """Default hyper-parameters for Bullet's locomotion environments.""" 28 | params = defaults() 29 | params['epochs'] = 312 30 | params['max_ep_len'] = 1000 31 | params['pi_lr'] = 3e-4 # default choice is Adam 32 | params['steps_per_epoch'] = 8 * 1000 33 | params['vf_lr'] = 3e-4 # default choice is Adam 34 | return params 35 | 36 | 37 | # Hack to circumvent kwarg errors with the official PyBullet Envs 38 | def gym_locomotion_envs(): 39 | params = locomotion() 40 | return params 41 | 42 | 43 | def gym_manipulator_envs(): 44 | """Default hyper-parameters for Bullet's manipulation environments.""" 45 | params = defaults() 46 | params['epochs'] = 312 47 | params['max_ep_len'] = 150 48 | params['pi_lr'] = 3e-4 # default choice is Adam 49 | params['steps_per_epoch'] = 32 * 1000 50 | params['vf_lr'] = 3e-4 # default choice is Adam 51 | return params 52 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/defaults.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define default parameters for Lagrangian-TRPO algorithm. 3 | """ 4 | 5 | 6 | def defaults(): 7 | return dict( 8 | actor='mlp', 9 | ac_kwargs={ 10 | 'pi': {'hidden_sizes': (64, 64), 11 | 'activation': 'tanh'}, 12 | 'val': {'hidden_sizes': (64, 64), 13 | 'activation': 'tanh'} 14 | }, 15 | adv_estimation_method='gae', 16 | epochs=300, 17 | gamma=0.99, 18 | steps_per_epoch=64 * 1000, 19 | use_exploration_noise_anneal=True 20 | ) 21 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/defaults.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define default parameters for NPG algorithm. 3 | """ 4 | 5 | 6 | def defaults(): 7 | return dict( 8 | actor='mlp', 9 | ac_kwargs={ 10 | 'pi': {'hidden_sizes': (64, 64), 11 | 'activation': 'tanh'}, 12 | 'val': {'hidden_sizes': (64, 64), 13 | 'activation': 'tanh'} 14 | }, 15 | adv_estimation_method='gae', 16 | epochs=300, 17 | gamma=0.99, 18 | steps_per_epoch=64 * 1000, 19 | target_kl=0.01, 20 | ) 21 | 22 | 23 | def bullet(): 24 | """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0.""" 25 | return defaults() 26 | 27 | 28 | def gym_locomotion_envs(): 29 | """Default hyper-parameters for Bullet's locomotion environments.""" 30 | params = defaults() 31 | params['epochs'] = 312 32 | params['max_ep_len'] = 1000 33 | params['pi_lr'] = 1e-4 # default choice is Adam 34 | params['steps_per_epoch'] = 32 * 1000 35 | return params 36 | 37 | 38 | def gym_manipulator_envs(): 39 | params = defaults() 40 | params['epochs'] = 312 41 | params['max_ep_len'] = 150 42 | params['pi_lr'] = 1e-4 # default choice is Adam 43 | params['steps_per_epoch'] = 32 * 1000 44 | return params 45 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/defaults.py: -------------------------------------------------------------------------------- 1 | def defaults(): 2 | return dict( 3 | actor='mlp', 4 | ac_kwargs={ 5 | 'pi': {'hidden_sizes': (64, 64), 6 | 'activation': 'tanh'}, 7 | 'val': {'hidden_sizes': (64, 64), 8 | 'activation': 'tanh'} 9 | }, 10 | adv_estimation_method='gae', 11 | epochs=300, # 9.8M steps 12 | gamma=0.99, 13 | lambda_lr=0.001, 14 | lambda_optimizer='Adam', 15 | steps_per_epoch=64 * 1000, 16 | target_kl=0.001, 17 | use_exploration_noise_anneal=True 18 | ) 19 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/defaults.py: -------------------------------------------------------------------------------- 1 | """ 2 | Define default parameters for NPG algorithm. 3 | """ 4 | 5 | 6 | def defaults(): 7 | return dict( 8 | actor='mlp', 9 | ac_kwargs={ 10 | 'pi': {'hidden_sizes': (64, 64), 11 | 'activation': 'tanh'}, 12 | 'val': {'hidden_sizes': (64, 64), 13 | 'activation': 'tanh'} 14 | }, 15 | adv_estimation_method='gae', 16 | epochs=300, 17 | gamma=0.99, 18 | steps_per_epoch=64 * 1000, # default: 64k 19 | target_kl=0.01, 20 | use_exploration_noise_anneal=True 21 | ) 22 | 23 | 24 | def bullet(): 25 | """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0.""" 26 | return defaults() 27 | 28 | 29 | def gym_locomotion_envs(): 30 | """Default hyper-parameters for Bullet's locomotion environments.""" 31 | params = defaults() 32 | params['epochs'] = 312 33 | params['max_ep_len'] = 1000 34 | params['steps_per_epoch'] = 64 * 1000 35 | return params 36 | 37 | 38 | def gym_manipulator_envs(): 39 | params = defaults() 40 | params['epochs'] = 312 41 | params['max_ep_len'] = 150 42 | params['steps_per_epoch'] = 32 * 1000 43 | return params 44 | 45 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import sys 3 | 4 | if sys.version_info.major != 3: 5 | raise TypeError( 6 | 'This Python is only compatible with Python 3, but you are running ' 7 | 'Python {}. The installation will likely fail.'.format( 8 | sys.version_info.major)) 9 | 10 | with open("README.md", "r") as fh: 11 | long_description = fh.read() 12 | 13 | setuptools.setup( 14 | name="rl_safety_algorithms", # this is the name displayed in 'pip list' 15 | version="0.1", 16 | author="Sven Gronauer", 17 | author_email="sven.gronauer@tum.de", 18 | description="Algorithms for Safe Reinforcement Learning Problems.", 19 | install_requires=[ 20 | 'mpi4py', # can be skipped if you want to use single threads 21 | 'numpy', 22 | 'torch' 23 | ], 24 | long_description=long_description, 25 | long_description_content_type="text/markdown", 26 | url="https://github.com/sven.gronauer", 27 | packages=setuptools.find_packages(), 28 | classifiers=[ 29 | "Programming Language :: Python :: 3", 30 | "License :: OSI Approved :: MIT License", 31 | "Operating System :: OS Independent", 32 | ], 33 | ) 34 | -------------------------------------------------------------------------------- /Safe-RL/RL-Safety-Algorithms/tests/test_algs_single_thread.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import gym 3 | import pybullet_envs # noqa 4 | import rl_safety_algorithms.common.utils as U 5 | from rl_safety_algorithms.algs import core 6 | import inspect 7 | import sys 8 | from rl_safety_algorithms.common.loggers import setup_logger_kwargs 9 | 10 | 11 | class TestAlgorithms(unittest.TestCase): 12 | 13 | @staticmethod 14 | def check_alg(alg_name, env_id): 15 | """" Run one epoch update with algorithm.""" 16 | print(f'Run {alg_name}.') 17 | defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id) 18 | defaults['epochs'] = 1 19 | defaults['num_mini_batches'] = 4 20 | defaults['steps_per_epoch'] = 1000 21 | defaults['verbose'] = False 22 | 23 | defaults['logger_kwargs'] = setup_logger_kwargs( 24 | exp_name='unittest', 25 | seed=0, 26 | base_dir='/var/tmp/', 27 | datestamp=True, 28 | level=0, 29 | use_tensor_board=True, 30 | verbose=False) 31 | alg = U.get_alg_class(alg_name, env_id, **defaults) 32 | # sanity check of argument passing 33 | assert alg.alg == alg_name, f'Expected {alg_name} but got {alg.alg}' 34 | # return learn_fn(env_id, **defaults) 35 | ac, env = alg.learn() 36 | 37 | return ac, env 38 | 39 | def test_algorithms(self): 40 | """ Run all the specified algorithms.""" 41 | algs = ['iwpg', 'npg', 'trpo', 'lag-trpo', 'pdo', 'cpo'] 42 | for alg in algs: 43 | ac, env = self.check_alg(alg, 'HopperBulletEnv-v0') 44 | self.assertTrue(isinstance(env, gym.Env)) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Garrett Thomas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/README.md: -------------------------------------------------------------------------------- 1 | # Safe-MBPO 2 | Code for the NeurIPS 2021 paper "Safe Reinforcement Learning by Imagining the Near Future" by Garrett Thomas, Yuping Luo, and Tengyu Ma. 3 | 4 | Some code is borrowed from [Force](https://github.com/gwthomas/force). 5 | 6 | ## Installation 7 | We are using Python 3.8. The required packages can be installed via 8 | 9 | pip install -r requirements.txt 10 | 11 | You also must set the `ROOT_DIR` in `code/defaults.py`. 12 | This is where experiments' logs and checkpoints will be placed. 13 | 14 | Once setup is complete, run the code using the following command: 15 | 16 | python main.py -c config/ENV.json 17 | 18 | where ENV is replaced appropriately. To override a specific hyperparameter, add `-s PARAM VALUE` where `PARAM` is a string. 19 | Use `.` to specify hierarchical structure in the config, e.g. `-s alg_cfg.horizon 10`. -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/config/ant.json: -------------------------------------------------------------------------------- 1 | { 2 | "env_name": "ant", 3 | "alg_cfg": { 4 | "sac_cfg": { 5 | "target_entropy": -4.0 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/config/cheetah-no-flip.json: -------------------------------------------------------------------------------- 1 | { 2 | "env_name": "cheetah-no-flip", 3 | "alg_cfg": { 4 | "sac_cfg": { 5 | "target_entropy": -3.0 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/config/hopper.json: -------------------------------------------------------------------------------- 1 | { 2 | "env_name": "hopper", 3 | "alg_cfg": { 4 | "sac_cfg": { 5 | "target_entropy": -1.0 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/config/humanoid.json: -------------------------------------------------------------------------------- 1 | { 2 | "env_name": "humanoid", 3 | "alg_cfg": { 4 | "sac_cfg": { 5 | "target_entropy": -2.0 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | tqdm 4 | h5py 5 | opencv-python 6 | torch==1.4.0 7 | gym==0.17.2 8 | mujoco-py==2.0.2.13 -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe-MBPO/src/__init__.py -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/src/defaults.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | PRECISION = 2 4 | OPTIMIZER = torch.optim.Adam 5 | BATCH_SIZE = 256 6 | ACTOR_LR = 3e-4 7 | CRITIC_LR = 1e-3 8 | 9 | # ROOT_DIR = None # set a path (directory) where experiments should be saved 10 | ROOT_DIR = '/tiger/u/gwthomas/data/smbpo' -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/src/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .torch_util import Module 4 | 5 | 6 | class Normalizer(Module): 7 | def __init__(self, dim, epsilon=1e-6): 8 | super().__init__() 9 | self.dim = dim 10 | self.epsilon = epsilon 11 | self.register_buffer('mean', torch.zeros(dim)) 12 | self.register_buffer('std', torch.zeros(dim)) 13 | 14 | def fit(self, X): 15 | assert torch.is_tensor(X) 16 | assert X.dim() == 2 17 | assert X.shape[1] == self.dim 18 | self.mean.data.copy_(X.mean(dim=0)) 19 | self.std.data.copy_(X.std(dim=0)) 20 | 21 | def forward(self, x): 22 | return (x - self.mean) / (self.std + self.epsilon) 23 | 24 | def unnormalize(self, normal_X): 25 | return self.mean + (self.std * normal_X) -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/src/shared.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from gym.wrappers import RescaleAction 4 | 5 | from .sampling import SampleBuffer 6 | 7 | 8 | def get_env(env_name, wrap_torch=True): 9 | from .env.torch_wrapper import TorchWrapper 10 | from .env.hopper_no_bonus import HopperNoBonusEnv 11 | from .env.cheetah_no_flip import CheetahNoFlipEnv 12 | from .env.ant_no_bonus import AntNoBonusEnv 13 | from .env.humanoid_no_bonus import HumanoidNoBonusEnv 14 | envs = { 15 | 'hopper': HopperNoBonusEnv, 16 | 'cheetah-no-flip': CheetahNoFlipEnv, 17 | 'ant': AntNoBonusEnv, 18 | 'humanoid': HumanoidNoBonusEnv 19 | } 20 | env = envs[env_name]() 21 | if not (np.all(env.action_space.low == -1.0) and np.all(env.action_space.high == 1.0)): 22 | env = RescaleAction(env, -1.0, 1.0) 23 | if wrap_torch: 24 | env = TorchWrapper(env) 25 | return env 26 | 27 | 28 | class SafetySampleBuffer(SampleBuffer): 29 | COMPONENT_NAMES = (*SampleBuffer.COMPONENT_NAMES, 'violations') 30 | 31 | def __init__(self, *args, **kwargs): 32 | super().__init__(*args, **kwargs) 33 | self._create_buffer('violations', torch.bool, []) -------------------------------------------------------------------------------- /Safe-RL/Safe-MBPO/src/squashed_gaussian.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn.functional as F 3 | from torch import distributions as pd 4 | 5 | 6 | # Borrowed from https://github.com/denisyarats/pytorch_sac 7 | 8 | class TanhTransform(pd.transforms.Transform): 9 | env_name = pd.constraints.real 10 | coenv_name = pd.constraints.interval(-1.0, 1.0) 11 | bijective = True 12 | sign = +1 13 | 14 | def __init__(self, cache_size=1): 15 | super().__init__(cache_size=cache_size) 16 | 17 | @staticmethod 18 | def atanh(x): 19 | return 0.5 * (x.log1p() - (-x).log1p()) 20 | 21 | def __eq__(self, other): 22 | return isinstance(other, TanhTransform) 23 | 24 | def _call(self, x): 25 | return x.tanh() 26 | 27 | def _inverse(self, y): 28 | # We do not clamp to the boundary here as it may degrade the performance of certain algorithms. 29 | # one should use `cache_size=1` instead 30 | return self.atanh(y) 31 | 32 | def log_abs_det_jacobian(self, x, y): 33 | # We use a formula that is more numerically stable, see details in the following link 34 | # https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7 35 | return 2. * (math.log(2.) - x - F.softplus(-2. * x)) 36 | 37 | 38 | class SquashedGaussian(pd.transformed_distribution.TransformedDistribution): 39 | def __init__(self, loc, scale, validate_args=None): 40 | base_dist = pd.Normal(loc, scale) 41 | super().__init__(base_dist, TanhTransform(), validate_args=validate_args) 42 | 43 | @property 44 | def mean(self): 45 | mu = self.base_dist.loc 46 | for transform in self.transforms: 47 | mu = transform(mu) 48 | return mu -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/.dockerignore: -------------------------------------------------------------------------------- 1 | examples 2 | htmlcov 3 | .travis.yml 4 | .gitignore 5 | .git 6 | *.pyc 7 | .ipynb_checkpoints 8 | __pycache__ 9 | SafeRLBench.egg-info 10 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .ipynb_checkpoints 3 | .DS_Store 4 | .idea 5 | .coverage 6 | covhtml 7 | MANIFEST 8 | _build 9 | 10 | *.pyc 11 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: required 4 | 5 | services: 6 | - docker 7 | 8 | env: 9 | - PYTHON=python2 10 | - PYTHON=python3 11 | 12 | # Setup docker container 13 | install: 14 | - docker build -f misc/Dockerfile.${PYTHON} -t test-image . 15 | - docker ps -a 16 | - ci_env=`bash <(curl -s https://codecov.io/env)` 17 | 18 | # Run tests 19 | script: 20 | - docker run test-image flake8 SafeRLBench --exclude "test*.py,__init__.py,_quadrocopter" --ignore=E402,W503 --show-source 21 | - docker run test-image flake8 SafeRLBench --filename="__init__.py,test*.py" --ignore=F,E402,W503 --show-source 22 | - docker run test-image pydocstyle SafeRLBench --match='(?!__init__).*\.py' 23 | - docker run $ci_env test-image /bin/bash -c "nosetests --with-doctest --with-coverage --cover-package=SafeRLBench --verbosity=2 SafeRLBench ; bash <(curl -s https://codecov.io/bash)" 24 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Felix Berkenkamp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import logging 4 | 5 | from .configuration import SRBConfig 6 | 7 | # Initialize configuration 8 | config = SRBConfig(logging.getLogger(__name__)) 9 | 10 | from .monitor import AlgoMonitor, EnvMonitor 11 | from .base import EnvironmentBase, Space, AlgorithmBase, Policy, ProbPolicy 12 | from .bench import Bench, BenchConfig 13 | from . import algo 14 | from . import envs 15 | from . import policy 16 | from . import spaces 17 | from . import error 18 | from . import measure 19 | 20 | # Add things to all 21 | __all__ = ['EnvironmentBase', 22 | 'Space', 23 | 'AlgorithmBase', 24 | 'Policy', 25 | 'ProbPolicy', 26 | 'AlgoMonitor', 27 | 'EnvMonitor', 28 | 'SRBConfig', 29 | 'Bench', 30 | 'BenchConfig', 31 | 'envs', 32 | 'algo', 33 | 'policy', 34 | 'spaces', 35 | 'measure', 36 | 'error'] 37 | 38 | 39 | # Import test after __all__ (no documentation) 40 | # from numpy.testing import Tester 41 | # test = Tester().test 42 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/__init__.py: -------------------------------------------------------------------------------- 1 | """Algorithm Module. 2 | 3 | =================== ========================================= 4 | Algorithm 5 | ============================================================= 6 | A3C Asynchronous Actor-Critic Agents 7 | PolicyGradient Different Policy Gradient Implementations 8 | DiscreteQLearning Q-Learning using a table 9 | SafeOpt Bayesian Optimization with SafeOpt 10 | SafeOptSwarm Bayesion Optimization with SafeOptSwarm 11 | =================== ========================================= 12 | """ 13 | 14 | from .policygradient import PolicyGradient 15 | from .safeopt import SafeOpt, SafeOptSwarm 16 | from .a3c import A3C 17 | from .q_learning import DiscreteQLearning 18 | 19 | __all__ = ['PolicyGradient', 'SafeOpt', 'A3C', 'DiscreteQLearning', 20 | 'SafeOptSwarm'] 21 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/test.py: -------------------------------------------------------------------------------- 1 | """Algorithm Tests.""" 2 | 3 | from SafeRLBench.algo import PolicyGradient, A3C 4 | from SafeRLBench.envs import LinearCar 5 | from .policygradient import CentralFDEstimator, estimators 6 | 7 | from SafeRLBench.policy import NeuralNetwork 8 | 9 | from unittest2 import TestCase 10 | from mock import MagicMock, Mock 11 | 12 | 13 | class TestPolicyGradient(TestCase): 14 | """PolicyGradientTestClass.""" 15 | 16 | def test_pg_init(self): 17 | """Test: POLICYGRADIENT: initialization.""" 18 | env_mock = MagicMock() 19 | pol_mock = Mock() 20 | 21 | for key, item in estimators.items(): 22 | pg = PolicyGradient(env_mock, pol_mock, estimator=key) 23 | self.assertIsInstance(pg.estimator, item) 24 | 25 | pg = PolicyGradient(env_mock, pol_mock, estimator=CentralFDEstimator) 26 | self.assertIsInstance(pg.estimator, CentralFDEstimator) 27 | 28 | self.assertRaises(ImportError, PolicyGradient, 29 | env_mock, pol_mock, CentralFDEstimator(env_mock)) 30 | 31 | 32 | class TestA3C(TestCase): 33 | """A3C Test Class.""" 34 | 35 | def test_a3c_init(self): 36 | """Test: A3C: initialization.""" 37 | a3c = A3C(LinearCar(), NeuralNetwork([2, 6, 1])) 38 | 39 | fields = ['environment', 'policy', 'max_it', 'num_workers', 'rate', 40 | 'done', 'policy', 'p_net', 'v_net', 'workers', 'threads', 41 | 'global_counter', 'sess'] 42 | 43 | for field in fields: 44 | assert hasattr(a3c, field) 45 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .general_mountaincar import GeneralMountainCar 4 | from .linear_car import LinearCar 5 | from .gym_wrap import GymWrap 6 | from .quadrocopter import Quadrocopter 7 | from .mdp import MDP 8 | 9 | __all__ = [ 10 | 'GeneralMountainCar', 11 | 'LinearCar', 12 | 'GymWrap', 13 | 'Quadrocopter', 14 | 'MDP' 15 | ] 16 | 17 | # TODO: Envs: Add module docs in __init__ file. 18 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/_quadrocopter/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | from .quadrotor_dynamics import QuadrotorDynamics 4 | from .quadrocopter_classes import StateVector 5 | 6 | __all__ = ['QuadrotorDynamics', 'StateVector'] 7 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_policy import LinearPolicy, NoisyLinearPolicy 2 | from .linear_policy import DiscreteLinearPolicy 3 | from .neural_network import NeuralNetwork 4 | from .controller import NonLinearQuadrocopterController 5 | 6 | __all__ = [ 7 | 'LinearPolicy', 8 | 'NoisyLinearPolicy', 9 | 'DiscreteLinearPolicy', 10 | 'NeuralNetwork', 11 | 'NonLinearQuadrocopterController' 12 | ] 13 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | from .rd_space import RdSpace 4 | from .bounded_space import BoundedSpace 5 | from .discrete_space import DiscreteSpace 6 | 7 | __all__ = ['RdSpace', 'BoundedSpace', 'DiscreteSpace'] 8 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/discrete_space.py: -------------------------------------------------------------------------------- 1 | """Discrete space implementation.""" 2 | 3 | from SafeRLBench import Space 4 | 5 | import numpy as np 6 | 7 | 8 | class DiscreteSpace(Space): 9 | """Discrete Space. 10 | 11 | Let d be the dimension of the space, then it will contain elements 12 | {0, 1, ... , dim-1}. 13 | 14 | Examples 15 | -------- 16 | Create a `DiscreteSpace` with three states: 17 | >>> from SafeRLBench.spaces import DiscreteSpace 18 | >>> discrete_space = DiscreteSpace(3) 19 | """ 20 | 21 | def __init__(self, dim): 22 | """Initialize `DiscreteSpace`. 23 | 24 | Parameters 25 | ---------- 26 | dim : int 27 | Number of states. 28 | """ 29 | assert dim > 0, ("If you need a discrete space without elements, you " 30 | + "do not need this class.") 31 | self._dim = dim 32 | 33 | def contains(self, x): 34 | """Check if element is part of the space.""" 35 | return (isinstance(x, int) and x >= 0 and x < self._dim) 36 | 37 | def sample(self): 38 | """Sample an element of the space.""" 39 | return np.random.randint(self._dim) 40 | 41 | @property 42 | def dimension(self): 43 | """Return dimension of the space.""" 44 | return self._dim 45 | 46 | def __repr__(self): 47 | return 'DiscreteSpace(dim=%d)' % self._dim 48 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/rd_space.py: -------------------------------------------------------------------------------- 1 | """R^d with any shape.""" 2 | import numpy as np 3 | from SafeRLBench import Space 4 | 5 | 6 | class RdSpace(Space): 7 | """R^d Vectorspace.""" 8 | 9 | def __init__(self, shape): 10 | """Initialize with shape.""" 11 | self.shape = shape 12 | self._dim = None 13 | 14 | def contains(self, x): 15 | """Check if element is contained.""" 16 | return isinstance(x, np.ndarray) and x.shape == self.shape 17 | 18 | def sample(self): 19 | """Return arbitrary element.""" 20 | return np.ones(self.shape) 21 | 22 | @property 23 | def dimension(self): 24 | """Return dimension of the space.""" 25 | if self._dim is None: 26 | d = 1 27 | for i in range(len(self.shape)): 28 | d *= self.shape[i] 29 | self._dim = d 30 | return self._dim 31 | 32 | def __repr__(self): 33 | return 'RdSpace(shape=%s)' % str(self.shape) 34 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/test.py: -------------------------------------------------------------------------------- 1 | """Tests for spaces module.""" 2 | from __future__ import absolute_import 3 | 4 | from functools import partial 5 | import inspect 6 | 7 | from numpy import array 8 | import SafeRLBench.spaces as spaces 9 | 10 | 11 | """Dictionary storing initialization arguments for classes.""" 12 | class_arguments = { 13 | spaces.BoundedSpace: [array([-1, -2]), array([1, 0])], 14 | spaces.RdSpace: [(3, 2)], 15 | spaces.DiscreteSpace: [5] 16 | } 17 | 18 | 19 | class TestSpaces(object): 20 | """Wrap spaces tests.""" 21 | 22 | classes = [] 23 | 24 | @classmethod 25 | def setUpClass(cls): 26 | """Initialize classes list.""" 27 | for name, c in inspect.getmembers(spaces): 28 | if inspect.isclass(c): 29 | cls.classes.append(c) 30 | 31 | def exhaustive_tests(self): 32 | """Check: Spaces tests initial values for testing.""" 33 | for c in self.classes: 34 | if c not in class_arguments: 35 | assert(False) 36 | 37 | def generate_tests(self): 38 | """Generate tests for spaces implementations.""" 39 | for c in self.classes: 40 | if c in class_arguments: 41 | check = partial(self.check_contains) 42 | check.description = ('Test: ' + c.__name__.upper() 43 | + ': implementation.') 44 | yield check, c 45 | 46 | def check_contains(self, c): 47 | """Check if contains and element is implemented.""" 48 | space = c(*class_arguments[c]) 49 | try: 50 | x = space.sample() 51 | b = space.contains(x) 52 | except NotImplementedError: 53 | assert(False) 54 | assert(b) 55 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = SafeRLBench 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/algorithm.rst: -------------------------------------------------------------------------------- 1 | Algorithms 2 | ========== 3 | 4 | .. include:: ../SafeRLBench/algo/README.rst 5 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/algo.rst: -------------------------------------------------------------------------------- 1 | Algorithm Module 2 | ================ 3 | 4 | This module contains implementations of different algorithms. Please refer to 5 | the class documentation for detailed instructions on how to use them. 6 | 7 | .. contents:: Contents 8 | :local: 9 | 10 | AlgorithmBase 11 | ------------- 12 | 13 | .. autoclass:: SafeRLBench.AlgorithmBase 14 | :members: 15 | 16 | A3C 17 | --- 18 | 19 | .. autoclass:: SafeRLBench.algo.A3C 20 | :members: 21 | 22 | Policy Gradient 23 | --------------- 24 | 25 | .. autoclass:: SafeRLBench.algo.PolicyGradient 26 | :members: 27 | 28 | Q-Learning 29 | ---------- 30 | 31 | .. autoclass:: SafeRLBench.algo.DiscreteQLearning 32 | :members: 33 | 34 | SafeOpt 35 | ------- 36 | 37 | .. autoclass:: SafeRLBench.algo.SafeOpt 38 | :members: 39 | 40 | SafeOptSwarm 41 | ------------ 42 | 43 | .. autoclass:: SafeRLBench.algo.SafeOptSwarm 44 | :members: 45 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/bench.rst: -------------------------------------------------------------------------------- 1 | Benchmark 2 | ========= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Bench 8 | ----- 9 | 10 | .. autoclass:: SafeRLBench.Bench 11 | :members: 12 | 13 | BenchConfig 14 | ----------- 15 | 16 | .. autoclass:: SafeRLBench.BenchConfig 17 | :members: 18 | 19 | BenchRun 20 | -------- 21 | 22 | .. autoclass:: SafeRLBench.bench.BenchRun 23 | :members: 24 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/envs.rst: -------------------------------------------------------------------------------- 1 | Environment Module 2 | ================== 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | EnvironmentBase 8 | --------------- 9 | 10 | .. autoclass:: SafeRLBench.EnvironmentBase 11 | :members: 12 | 13 | GeneralMountainCar 14 | ------------------ 15 | 16 | .. autoclass:: SafeRLBench.envs.GeneralMountainCar 17 | :members: 18 | 19 | GymWrap 20 | ------- 21 | 22 | .. autoclass:: SafeRLBench.envs.GymWrap 23 | :members: 24 | 25 | LinearCar 26 | --------- 27 | 28 | .. autoclass:: SafeRLBench.envs.LinearCar 29 | :members: 30 | 31 | MDP 32 | --- 33 | 34 | .. autoclass:: SafeRLBench.envs.MDP 35 | :members: 36 | 37 | Quadrocopter 38 | ------------ 39 | 40 | .. autoclass:: SafeRLBench.envs.Quadrocopter 41 | :members: 42 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/measure.rst: -------------------------------------------------------------------------------- 1 | Measure Module 2 | ============== 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Measure 8 | ------- 9 | 10 | .. autoclass:: SafeRLBench.measure.Measure 11 | :members: 12 | 13 | BestPerformance 14 | --------------- 15 | 16 | .. autoclass:: SafeRLBench.measure.BestPerformance 17 | :members: 18 | 19 | SafetyMeasure 20 | ------------- 21 | 22 | .. autoclass:: SafeRLBench.measure.SafetyMeasure 23 | :members: 24 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/misc.rst: -------------------------------------------------------------------------------- 1 | Miscellaneous 2 | ============= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Configuration 8 | ------------- 9 | 10 | .. autoclass:: SafeRLBench.SRBConfig 11 | :members: 12 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/policy.rst: -------------------------------------------------------------------------------- 1 | Policy Module 2 | ============= 3 | 4 | .. contents:: 5 | :local: 6 | 7 | Bases 8 | ----- 9 | 10 | Deterministic Policy Base 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | 13 | .. autoclass:: SafeRLBench.Policy 14 | :members: 15 | 16 | Probabilistic Policy Base 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. autoclass:: SafeRLBench.ProbPolicy 20 | :members: 21 | 22 | Linear Policies 23 | --------------- 24 | 25 | LinearPolicy 26 | ~~~~~~~~~~~~ 27 | 28 | .. autoclass:: SafeRLBench.policy.LinearPolicy 29 | :members: 30 | 31 | DiscreteLinearPolicy 32 | ~~~~~~~~~~~~~~~~~~~~ 33 | 34 | .. autoclass:: SafeRLBench.policy.DiscreteLinearPolicy 35 | :members: 36 | 37 | NoisyLinearPolicy 38 | ~~~~~~~~~~~~~~~~~ 39 | 40 | .. autoclass:: SafeRLBench.policy.NoisyLinearPolicy 41 | :members: 42 | 43 | NonLinearQuadrocopterController 44 | ------------------------------- 45 | 46 | .. autoclass:: SafeRLBench.policy.NonLinearQuadrocopterController 47 | :members: 48 | 49 | NeuralNetwork 50 | ------------- 51 | 52 | .. autoclass:: SafeRLBench.policy.NeuralNetwork 53 | :members: 54 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/spaces.rst: -------------------------------------------------------------------------------- 1 | Spaces Module 2 | ============= 3 | 4 | .. contents:: Contents 5 | :local: 6 | 7 | Space 8 | ----- 9 | 10 | .. autoclass:: SafeRLBench.Space 11 | :members: 12 | 13 | BoundedSpace 14 | ------------ 15 | 16 | .. autoclass:: SafeRLBench.spaces.BoundedSpace 17 | :members: 18 | 19 | DiscreteSpace 20 | ------------- 21 | 22 | .. autoclass:: SafeRLBench.spaces.DiscreteSpace 23 | :members: 24 | 25 | RdSpace 26 | ------- 27 | 28 | .. autoclass:: SafeRLBench.spaces.RdSpace 29 | :members: 30 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/api/srb.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. toctree:: 5 | 6 | algo 7 | envs 8 | policy 9 | spaces 10 | measure 11 | bench 12 | misc 13 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/environment.rst: -------------------------------------------------------------------------------- 1 | Environments 2 | ============ 3 | 4 | .. include:: ../SafeRLBench/envs/README.rst 5 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/index.rst: -------------------------------------------------------------------------------- 1 | .. SafeRLBench documentation master file, created by 2 | sphinx-quickstart on Mon Mar 27 16:08:01 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. include:: ../README.rst 7 | 8 | .. include:: toc.rst 9 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/docs/toc.rst: -------------------------------------------------------------------------------- 1 | Content 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | algorithm 8 | environment 9 | api/srb 10 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python2: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | 3 | ENV TF_CPP_MIN_LOG_LEVEL=2 4 | 5 | # Install build essentials and clean up 6 | RUN apt-get update --quiet \ 7 | && apt-get install -y --no-install-recommends --quiet build-essential \ 8 | && apt-get clean 9 | 10 | # Fix matlab issues. 11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \ 12 | && apt-get clean 13 | 14 | # Update conda, install packages, and clean up 15 | RUN conda update conda --yes --quiet \ 16 | && conda install python=2.7 pip numpy scipy nose --yes --quiet \ 17 | && conda clean --yes --all \ 18 | && hash -r 19 | 20 | # Get the requirements files (seperate from the main body) 21 | COPY requirements.txt requirements_dev.txt /code/ 22 | 23 | # Install requirements and clean up 24 | RUN pip --no-cache-dir install -r code/requirements.txt \ 25 | && rm -rf /root/.cache 26 | 27 | # Install dev requirements and clean up 28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \ 29 | && rm -rf /root/.cache 30 | 31 | # Install extra python2 requirements 32 | RUN pip --no-cache-dir install futures multiprocessing \ 33 | && rm -rf /root/.cache 34 | 35 | # Install SafeOpt 36 | RUN git clone https://github.com/befelix/SafeOpt.git \ 37 | && cd SafeOpt \ 38 | && python setup.py install \ 39 | && rm -rf /SafeOpt 40 | 41 | # Copy the main code 42 | COPY . /code 43 | RUN cd /code && python setup.py develop 44 | 45 | WORKDIR /code 46 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python3: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | ENV TF_CPP_MIN_LOG_LEVEL=2 4 | 5 | # Install build essentials and clean up 6 | RUN apt-get update --quiet \ 7 | && apt-get install -y --no-install-recommends --quiet build-essential \ 8 | && apt-get clean 9 | 10 | # Fix matlab issues. 11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \ 12 | && apt-get clean 13 | 14 | # Update conda, install packages, and clean up 15 | RUN conda update conda --yes --quiet \ 16 | && conda install python=3.5 pip numpy scipy nose --yes --quiet \ 17 | && conda clean --yes --all \ 18 | && hash -r 19 | 20 | # Get the requirements files (seperate from the main body) 21 | COPY requirements.txt requirements_dev.txt /code/ 22 | 23 | # Install requirements and clean up 24 | RUN pip --no-cache-dir install -r code/requirements.txt \ 25 | && rm -rf /root/.cache 26 | 27 | # Install dev requirements and clean up 28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \ 29 | && rm -rf /root/.cache 30 | 31 | # Install SafeOpt 32 | RUN git clone https://github.com/befelix/SafeOpt.git \ 33 | && cd SafeOpt \ 34 | && python setup.py install \ 35 | && rm -rf /SafeOpt 36 | 37 | # Copy the main code 38 | COPY . /code 39 | RUN cd /code && python setup.py develop 40 | 41 | WORKDIR /code 42 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.7 2 | scipy >= 0.19.0 3 | six >= 1.10 4 | futures >= 3.0.5 5 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | gym >= 0.8.0 2 | tensorflow >= 1.0.0 3 | GPy >= 1.6.1 4 | 5 | # Style testing 6 | flake8 >= 3.3.0 7 | pep8 >= 1.7.0 8 | pep8-naming >= 0.4.1 9 | pydocstyle >= 1.1.1 10 | 11 | # Unittesting 12 | nose >= 1.3.7 13 | nose-exclude >= 0.5.0 14 | coverage >= 4.3.4 15 | unittest2 >= 1.1.0 16 | mock >= 2.0.0 17 | 18 | # Documentation 19 | sphinx >= 1.5.3 20 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='SafeRLBench', 5 | version='1.0.1', 6 | author='Nicolas Ochsner', 7 | author_email='ochsnern@student.ethz.ch', 8 | packages=[ 9 | 'SafeRLBench', 10 | 'SafeRLBench.algo', 11 | 'SafeRLBench.envs', 12 | 'SafeRLBench.spaces', 13 | 'SafeRLBench.policy', 14 | ], 15 | description='Safe Reinforcement Learning Benchmark', 16 | keywords='reinforcement-learning benchmark', 17 | url='https://github.com/befelix/Safe-RL-Benchmark', 18 | install_requires=[ 19 | 'numpy >= 1.7', 20 | 'scipy >= 0.19.0', 21 | 'six >= 1.10', 22 | 'futures >= 3.0.5;python_version<"3.2"' 23 | ], 24 | extras_require={ 25 | 'gym': ['gym >= 0.8.0'], 26 | 'safeopt': ['GPy >= 1.6.1', 'safeopt >= 0.1'], 27 | 'neural': ['tensorflow >= 1.0.0'], 28 | }, 29 | dependency_links=[ 30 | 'git+https://github.com/befelix/SafeOpt/tarball/master#egg=safeopt-0.1' 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/test_code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | module="SafeRLBench" 4 | 5 | get_script_dir () { 6 | SOURCE="${BASH_SOURCE[0]}" 7 | # While $SOURCE is a symlink, resolve it 8 | while [ -h "$SOURCE" ]; do 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | SOURCE="$( readlink "$SOURCE" )" 11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory 12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" 13 | done 14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 15 | echo "$DIR" 16 | } 17 | 18 | # tensorflow environment variable 19 | export TF_CPP_MIN_LOG_LEVEL='3' 20 | 21 | # Change to script root 22 | cd $(get_script_dir) 23 | GREEN='\033[0;32m' 24 | NC='\033[0m' 25 | 26 | BOLD=$(tput bold) 27 | NORMAL=$(tput sgr0) 28 | 29 | # Run style tests 30 | echo -e "${GREEN}${BOLD}Running style tests:${NC}" 31 | flake8 $module --exclude test*.py,__init__.py,_quadrocopter --show-source 32 | 33 | # Ignore import errors for __init__ and tests 34 | flake8 $module --filename=__init__.py,test*.py --ignore=F --show-source 35 | 36 | echo -e "${GREEN}${BOLD}Testing docstring conventions:${NC}" 37 | # Test docstring conventions 38 | pydocstyle $module --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__" 39 | 40 | echo -e "${GREEN}${BOLD}Running unit tests in current environment.${NC}" 41 | nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=$module $module 2>&1 | grep -v "^Level " 42 | 43 | # Export html 44 | coverage html 45 | -------------------------------------------------------------------------------- /Safe-RL/Safe-RL-Benchmark/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py35 3 | 4 | [testenv] 5 | deps = 6 | nose 7 | numpy 8 | theano 9 | mock 10 | unittest2 11 | commands = nosetests 12 | 13 | [flake8] 14 | ignore = E402,W503,D105,D413 15 | exclude = 16 | SafeRLBench/envs/_quadrocopter* 17 | 18 | [pydocstyle] 19 | add_ignore = D203,D105,D413 20 | match_dir = '[^\.\_].*' 21 | 22 | [coverage:run] 23 | omit = 24 | */_quadrocopter* 25 | -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/README.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | Codes for the constrained Linear-Quadratic Regulator (LQR) experiment. 3 | ## Reference 4 | Ming Yu, Zhuoran Yang, Mladen Kolar, and Zhaoran Wang. Convergent Policy Optimization for Safe Reinforcement Learning. In NeurIPS 2019. 5 | ## Run codes 6 | Run "Safe_RL_LQR_experiment.m" 7 | -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/iterate_calculate.m: -------------------------------------------------------------------------------- 1 | function X = iterate_calculate( Init, M, N ) 2 | % this function iteratively solve for the following equation for X: 3 | % X = M + N'*X*N 4 | % starting from Init 5 | 6 | X = Init; diff = 1; iter = 0; 7 | while diff > 1e-3 8 | iter = iter + 1; 9 | X_old = X; 10 | X = M + N'*X*N; 11 | diff = norm(X_old - X); 12 | end 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe_reinforcement_learning/poster.pdf -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/quadconstr.m: -------------------------------------------------------------------------------- 1 | function [y,yeq,grady,gradyeq] = quadconstr(x,H,k,d) 2 | jj = length(H); % jj is the number of inequality constraints 3 | y = zeros(1,jj); 4 | for i = 1:jj 5 | y(i) = 1/2*x'*H{i}*x + k{i}'*x + d{i}; 6 | end 7 | yeq = []; 8 | 9 | if nargout > 2 10 | grady = zeros(length(x),jj); 11 | for i = 1:jj 12 | grady(:,i) = H{i}*x + k{i}; 13 | end 14 | end 15 | gradyeq = []; -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/quadhess.m: -------------------------------------------------------------------------------- 1 | function hess = quadhess(x,lambda,Q,H) 2 | hess = Q; 3 | jj = length(H); % jj is the number of inequality constraints 4 | for i = 1:jj 5 | hess = hess + lambda.ineqnonlin(i)*H{i}; 6 | end -------------------------------------------------------------------------------- /Safe-RL/Safe_reinforcement_learning/quadobj.m: -------------------------------------------------------------------------------- 1 | function [y,grady] = quadobj(x,Q,f,c) 2 | y = 1/2*x'*Q*x + f'*x + c; 3 | if nargout > 1 4 | grady = Q*x + f; 5 | end -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/.gitignore: -------------------------------------------------------------------------------- 1 | ## Julia ignores ## 2 | 3 | # Files generated by invoking Julia with --code-coverage 4 | *.jl.cov 5 | *.jl.*.cov 6 | 7 | # Files generated by invoking Julia with --track-allocation 8 | *.jl.mem 9 | 10 | # System-specific files and directories generated by the BinaryProvider and BinDeps packages 11 | # They contain absolute paths specific to the host computer, and so should not be committed 12 | deps/deps.jl 13 | deps/build.log 14 | deps/downloads/ 15 | deps/usr/ 16 | deps/src/ 17 | 18 | # Build artifacts for creating documentation generated by the Documenter package 19 | docs/build/ 20 | docs/site/ 21 | 22 | # File generated by Pkg, the package manager, based on a corresponding Project.toml 23 | # It records a fixed state of all packages used by the project. As such, it should not be 24 | # committed for packages, but should be committed for applications that require a static 25 | # environment. 26 | 27 | # Manifest.toml 28 | 29 | 30 | ## C ## 31 | 32 | *.o 33 | *.so 34 | 35 | 36 | ## Additional Ignores ## 37 | 38 | # Don't want to commit changes to these 39 | shield_dump.c 40 | 41 | # My sync program keeps pooping in the corners 42 | *.insyncdl 43 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" 3 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" 4 | Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" 5 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 6 | Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" 7 | Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" 8 | GridShielding = "d6812381-bd27-4ab8-a35f-a1c7ba1f8c22" 9 | HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" 10 | InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" 11 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 12 | LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" 13 | Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" 14 | MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" 15 | Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e" 16 | NaturalSort = "c020b1a1-e9b0-503a-9c33-f039bfc54a85" 17 | PProf = "e4faabce-9ead-11e9-39d9-4379958e3056" 18 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" 19 | Pluto = "c3e4b0f8-55cb-11ea-2926-15256bba5781" 20 | PlutoLinks = "0ff47ea0-7a50-410d-8455-4348d5de0420" 21 | PlutoSerialization = "89dfed0f-77d6-439b-aaac-839db4b25fb8" 22 | PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8" 23 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" 24 | ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" 25 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 26 | ReachabilityAnalysis = "1e97bd63-91d1-579d-8e8d-501d2b57c93f" 27 | Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" 28 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" 29 | StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" 30 | Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" 31 | Unzip = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d" 32 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/Shared Code/PlotsDefaults.jl: -------------------------------------------------------------------------------- 1 | # This may be the shortest code-file I've ever authored. 2 | Plots.default(fontfamily="serif-roman") 3 | 4 | halfpage = 300 5 | 6 | squeezed = (halfpage, 150) 7 | regular = (halfpage, 220) 8 | tall = (halfpage, 400) -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/Blueprints/TrainSaveEvaluateSingle.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded 11 | 12 | /* formula 4 */ 13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded 14 | 15 | /* formula 5 */ 16 | E[<=120;%checks%] (max:interventions) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/ExtractQueryResults.jl: -------------------------------------------------------------------------------- 1 | struct UppaalQueryFailedException <: Exception 2 | message::AbstractString 3 | end 4 | 5 | function extract_query_results(query_results::AbstractString) 6 | results = [] 7 | open(query_results) do file 8 | for line in eachline(file) 9 | m_mean = match(r"mean=([\d.e-]+)", line) 10 | aborted = occursin(r"EXCEPTION: |is time-locked.|-- Aborted.", line) 11 | 12 | if aborted 13 | throw(UppaalQueryFailedException(line)) 14 | end 15 | 16 | if m_mean === nothing 17 | continue 18 | end 19 | 20 | push!(results, m_mean[1]) 21 | end 22 | end 23 | 24 | results 25 | end -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBShieldRobustness/StatisticalChecking.jl: -------------------------------------------------------------------------------- 1 | function evaluate_safety(mechanics, policy, number_of_runs; 2 | run_duration=120, 3 | min_v_on_impact=1, 4 | unlucky=false) 5 | 6 | safety_violations_observed = 0 7 | unsafe_trace = [] 8 | rand_step = eps() 9 | 10 | for run in 1:number_of_runs 11 | v, p = 0, rand(7:rand_step:10) 12 | # Simulate the ball for run_duration seconds 13 | vs, ps, ts = simulate_sequence(mechanics, v, p, policy, run_duration, 14 | min_v_on_impact=min_v_on_impact, 15 | unlucky=unlucky) 16 | # See if it ends at v=0, p=0 17 | if last(vs) == 0 && last(ps) == 0 18 | safety_violations_observed += 1 19 | end 20 | end 21 | (; safety_violations_observed, number_of_runs) 22 | end 23 | 24 | # It does not choose a random policy. It returns a policy that acts randomly. 25 | function random_policy(hit_chance) 26 | return (v, p) -> 27 | if rand(0:eps():1) <= hit_chance 28 | "hit" 29 | else 30 | "nohit" 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PostShielded.q: -------------------------------------------------------------------------------- 1 | //Load a strategy using deterrence in {1000, 100, 10, 0}, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy Deterrence1000 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence1000.strategy.json") 5 | 6 | /* formula 2 */ 7 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence1000 8 | 9 | /* formula 3 */ 10 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence1000 11 | 12 | /* formula 4 */ 13 | E[<=120;%checks%] (max:interventions) under Deterrence1000 14 | 15 | /* formula 5 */ 16 | strategy Deterrence100 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence100.strategy.json") 17 | 18 | /* formula 6 */ 19 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence100 20 | 21 | /* formula 7 */ 22 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence100 23 | 24 | /* formula 8 */ 25 | E[<=120;%checks%] (max:interventions) under Deterrence100 26 | 27 | /* formula 9 */ 28 | strategy Deterrence10 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence10.strategy.json") 29 | 30 | /* formula 10 */ 31 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence10 32 | 33 | /* formula 11 */ 34 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence10 35 | 36 | /* formula 12 */ 37 | E[<=120;%checks%] (max:interventions) under Deterrence10 38 | 39 | /* formula 13 */ 40 | strategy Deterrence0 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence0.strategy.json") 41 | 42 | /* formula 14 */ 43 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence0 44 | 45 | /* formula 15 */ 46 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence0 47 | 48 | /* formula 16 */ 49 | E[<=120;%checks%] (max:interventions) under Deterrence0 50 | 51 | 52 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PreShielded.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded 11 | 12 | /* formula 4 */ 13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded 14 | 15 | /* formula 5 */ 16 | E[<=120;%checks%] (max:interventions) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/ShieldedLayabout.q: -------------------------------------------------------------------------------- 1 | //Evaluate the queries with no strategy applied 2 | 3 | /* formula 2 */ 4 | E[<=120;%checks%] (max:LearnerPlayer.fired) 5 | 6 | /* formula 3 */ 7 | E[<=120;%checks%] (max:(number_deaths > 0)) 8 | 9 | /* formula 4 */ 10 | E[<=120;%checks%] (max:interventions) 11 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeCostEvaluate.q: -------------------------------------------------------------------------------- 1 | 2 | /* formula 1 */ 3 | strategy MinCost = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120 4 | 5 | /* formula 2 */ 6 | saveStrategy("%resultsdir%/MinCost.strategy.json", MinCost) 7 | 8 | /* formula 3 */ 9 | E[<=120;%checks%] (max: D/1000) under MinCost 10 | 11 | /* formula 4 */ 12 | E[<=120;%checks%] (max:(rDistance <= 0)) under MinCost 13 | 14 | /* formula 5 */ 15 | E[<=120;%checks%] (max: interventions) under MinCost 16 | 17 | 18 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeInterventionsEvaluate.q: -------------------------------------------------------------------------------- 1 | 2 | /* formula 1 */ 3 | strategy MinInterventions = minE (interventions) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120 4 | 5 | /* formula 2 */ 6 | saveStrategy("%resultsdir%/MinInterventions.strategy.json", MinInterventions) 7 | 8 | /* formula 3 */ 9 | E[<=120;%checks%] (max: D/1000) under MinInterventions 10 | 11 | /* formula 4 */ 12 | E[<=120;%checks%] (max:(rDistance <= 0)) under MinInterventions 13 | 14 | /* formula 5 */ 15 | E[<=120;%checks%] (max: interventions) under MinInterventions 16 | 17 | 18 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/NoStrategyEvaluate.q: -------------------------------------------------------------------------------- 1 | //Evaluate the queries with no strategy applied 2 | 3 | /* formula 1 */ 4 | E[<=120;%checks%] (max: D/1000) 5 | 6 | /* formula 2 */ 7 | E[<=120;%checks%] (max:(rDistance <= 0)) 8 | 9 | /* formula 3 */ 10 | E[<=120;%checks%] (max:interventions) 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/TrainSaveEvaluateSingle.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | // HACK: Since this query file is only used for PreShield, haven't implemented a way to count interventions. It will be zero, because I need a number to be printed. 3 | 4 | /* formula 1 */ 5 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120 6 | 7 | /* formula 2 */ 8 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 9 | 10 | /* formula 3 */ 11 | E[<=120;%checks%] (max: D/1000) under PreShielded 12 | 13 | /* formula 4 */ 14 | E[<=120;%checks%] (max:(rDistance <= 0)) under PreShielded 15 | 16 | /* formula 5 */ 17 | E[<=120;2] (max: 0) 18 | 19 | 20 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/PreShielded.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0) under PreShielded 11 | 12 | /* formula 4 */ 13 | E[<=120;%checks%] (max:number_deaths > 0) under PreShielded 14 | 15 | /* formula 5 */ 16 | E[<=120;%checks%] (max:interventions) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/ShieldedLayabout.q: -------------------------------------------------------------------------------- 1 | //Evaluate the queries with no strategy applied 2 | 3 | /* formula 2 */ 4 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0) 5 | 6 | /* formula 3 */ 7 | E[<=120;%checks%] (max:number_deaths > 0) 8 | 9 | /* formula 4 */ 10 | E[<=120;%checks%] (max:interventions) 11 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PostShielded.q: -------------------------------------------------------------------------------- 1 | //Load a strategy using deterrence in {1000, 100, 10, 0}, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy Deterrence1000 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence1000.strategy.json") 5 | 6 | /* formula 2 */ 7 | E[#<=30;%checks%] (max:total_cost) under Deterrence1000 8 | 9 | /* formula 3 */ 10 | E[#<=30;%checks%00] (max:t>1) under Deterrence1000 11 | 12 | /* formula 4 */ 13 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence1000 14 | 15 | /* formula 5 */ 16 | strategy Deterrence100 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence100.strategy.json") 17 | 18 | /* formula 6 */ 19 | E[#<=30;%checks%] (max:total_cost) under Deterrence100 20 | 21 | /* formula 7 */ 22 | E[#<=30;%checks%00] (max:t>1) under Deterrence100 23 | 24 | /* formula 8 */ 25 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence100 26 | 27 | /* formula 9 */ 28 | strategy Deterrence10 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence10.strategy.json") 29 | 30 | /* formula 10 */ 31 | E[#<=30;%checks%] (max:total_cost) under Deterrence10 32 | 33 | /* formula 11 */ 34 | E[#<=30;%checks%00] (max:t>1) under Deterrence10 35 | 36 | /* formula 12 */ 37 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence10 38 | 39 | 40 | /* formula 13 */ 41 | strategy Deterrence0 = loadStrategy {} -> {x, t} ("%resultsdir%/Deterrence10.strategy.json") 42 | 43 | /* formula 14 */ 44 | E[#<=30;%checks%] (max:total_cost) under Deterrence0 45 | 46 | /* formula 15 */ 47 | E[#<=30;%checks%00] (max:t>1) under Deterrence0 48 | 49 | /* formula 16 */ 50 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence0 51 | 52 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PreShielded.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | E[#<=30;%checks%] (max:total_cost) under PreShielded 11 | 12 | /* formula 4 */ 13 | E[#<=30;%checks%00] (max:t>1) under PreShielded 14 | 15 | /* formula 5 */ 16 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/ShieldedLayabout.q: -------------------------------------------------------------------------------- 1 | //Evaluate the queries with no strategy applied 2 | 3 | /* formula 1 */ 4 | E[#<=30;%checks%] (max:total_cost) 5 | 6 | /* formula 2 */ 7 | E[#<=30;%checks%00] (max:t>1) 8 | 9 | /* formula 3 */ 10 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) 11 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Blueprints/TrainSaveCheckSafety.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then check its safety. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | Pr[<=120] (<> number_deaths > 0) under PreShielded 11 | 12 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Synthesize and Test Shields 2 | 3 | Synthesize shields and test their safety against different random agent. 4 | "Shield" is used as shorthand for a nondeterministic strategy that can be used to shield a learning agent or another strategy. 5 | 6 | A random agent is defined by it's `hit_chance` such that it will choose randomly between actions `(hit, nohit)` with probabilities `(1-hit_chance, hit_chance)`. 7 | 8 | Shields are synthesised using either a "barbaric" or "rigorous" reachability method. 9 | The rigorous method makes use of the library `ReachabilityAnalysis.jl` to over-approximate possible outcomes of the system. This gives theoretical guarantees for the safety, at the cost of more compute time and a less optimistic shield. 10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 11 | 12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "Run Experiment.jl"` from within this folder. 13 | 14 | Some of files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 15 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Blueprints/TrainSaveCheckSafety.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | Pr[<=120] (<> rDistance <= 0) under PreShielded 11 | 12 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Synthesize and Test Cruise Control Shields 2 | 3 | Synthesize shields and test their safety on a random agent. 4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy. 5 | 6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used. 7 | It is the random agent with uniform chance of picking any action. 8 | 9 | Shields are synthesised using the "barbaric" reachability method only. 10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 11 | 12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder. 13 | 14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code found in `Shared Code`. 15 | 16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 17 | -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-DCSynthesis/Blueprints/TrainSaveCheckSafety.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | Pr[<=120] (<> number_deaths > 0) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-OPSynthesis/Blueprints/TrainSaveCheckSafety.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then check its safety. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (aov) [<=120] {p} -> {t, v}: <> elapsed >= 120 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | Pr[<=120] (<>(number_deaths > 0)) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Blueprints/TrainSaveCheckSafety.q: -------------------------------------------------------------------------------- 1 | // Train a single strategy, save it, then evaluate it. 2 | 3 | /* formula 1 */ 4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1 5 | 6 | /* formula 2 */ 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded) 8 | 9 | /* formula 3 */ 10 | Pr[#<=30] (<> t>1) under PreShielded -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png -------------------------------------------------------------------------------- /Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Synthesize and Test Cruise Control Shields 2 | 3 | Synthesize shields and test their safety on a random agent. 4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy. 5 | 6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used. 7 | It is the random agent with uniform chance of picking any action. 8 | 9 | Shields are synthesised using the "barbaric" reachability method only. 10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 11 | 12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder. 13 | 14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code found in `Shared Code`. 15 | 16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 17 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/.gitignore: -------------------------------------------------------------------------------- 1 | *.*~ 2 | __pycache__/ 3 | *.pkl 4 | **/*.egg-info 5 | .python-version 6 | .idea/ 7 | .vscode/ 8 | .DS_Store 9 | _build/ 10 | data/*ppo* 11 | *.pickle 12 | .ipynb_checkpoints/ 13 | *.ckpt 14 | #*.png 15 | *.pt 16 | */simple_save/* -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/__init__.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.util import deprecation as deprecation 2 | deprecation._PRINT_DEPRECATION_WARNINGS = False 3 | 4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo 5 | from safe_rl.sac.sac import sac -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/pg/trust_region.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from safe_rl.pg.utils import EPS 4 | 5 | 6 | """ 7 | Tensorflow utilities for trust region optimization 8 | """ 9 | 10 | def flat_concat(xs): 11 | return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0) 12 | 13 | def flat_grad(f, params): 14 | return flat_concat(tf.gradients(xs=params, ys=f)) 15 | 16 | def hessian_vector_product(f, params): 17 | # for H = grad**2 f, compute Hx 18 | g = flat_grad(f, params) 19 | x = tf.placeholder(tf.float32, shape=g.shape) 20 | return x, flat_grad(tf.reduce_sum(g*x), params) 21 | 22 | def assign_params_from_flat(x, params): 23 | flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars 24 | splits = tf.split(x, [flat_size(p) for p in params]) 25 | new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)] 26 | return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)]) 27 | 28 | 29 | """ 30 | Conjugate gradient 31 | """ 32 | 33 | def cg(Ax, b, cg_iters=10): 34 | x = np.zeros_like(b) 35 | r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start. 36 | p = r.copy() 37 | r_dot_old = np.dot(r,r) 38 | for _ in range(cg_iters): 39 | z = Ax(p) 40 | alpha = r_dot_old / (np.dot(p, z) + EPS) 41 | x += alpha * p 42 | r -= alpha * z 43 | r_dot_new = np.dot(r,r) 44 | p = r + (r_dot_new / r_dot_old) * p 45 | r_dot_old = r_dot_new 46 | return x -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/pg/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.signal 3 | 4 | EPS = 1e-8 5 | 6 | def combined_shape(length, shape=None): 7 | if shape is None: 8 | return (length,) 9 | return (length, shape) if np.isscalar(shape) else (length, *shape) 10 | 11 | def keys_as_sorted_list(dict): 12 | return sorted(list(dict.keys())) 13 | 14 | def values_as_sorted_list(dict): 15 | return [dict[k] for k in keys_as_sorted_list(dict)] 16 | 17 | def discount_cumsum(x, discount): 18 | """ 19 | magic from rllab for computing discounted cumulative sums of vectors. 20 | 21 | input: 22 | vector x, 23 | [x0, 24 | x1, 25 | x2] 26 | 27 | output: 28 | [x0 + discount * x1 + discount^2 * x2, 29 | x1 + discount * x2, 30 | x2] 31 | """ 32 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 33 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/utils/load_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import joblib 4 | import os 5 | import os.path as osp 6 | import tensorflow as tf 7 | from safe_rl.utils.logx import restore_tf_graph 8 | 9 | def load_policy(fpath, itr='last', deterministic=False): 10 | 11 | # handle which epoch to load from 12 | if itr=='last': 13 | saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11] 14 | itr = '%d'%max(saves) if len(saves) > 0 else '' 15 | else: 16 | itr = '%d'%itr 17 | 18 | # load the things! 19 | sess = tf.Session(graph=tf.Graph()) 20 | model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr)) 21 | 22 | # get the correct op for executing actions 23 | if deterministic and 'mu' in model.keys(): 24 | # 'deterministic' is only a valid option for SAC policies 25 | print('Using deterministic action op.') 26 | action_op = model['mu'] 27 | else: 28 | print('Using default action op.') 29 | action_op = model['pi'] 30 | 31 | # make function for producing an action given a single state 32 | get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0] 33 | 34 | # try to load environment from save 35 | # (sometimes this will fail because the environment could not be pickled) 36 | try: 37 | state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl')) 38 | env = state['env'] 39 | except: 40 | env = None 41 | 42 | return env, get_action, sess -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/utils/readme.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo). -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/utils/run_utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os.path as osp 3 | 4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data') 5 | 6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True): 7 | 8 | # Make base path 9 | ymd_time = time.strftime("%Y-%m-%d_") if datestamp else '' 10 | relpath = ''.join([ymd_time, exp_name]) 11 | 12 | if seed is not None: 13 | # Make a seed-specific subfolder in the experiment directory. 14 | if datestamp: 15 | hms_time = time.strftime("%Y-%m-%d_%H-%M-%S") 16 | subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)]) 17 | else: 18 | subfolder = ''.join([exp_name, '_s', str(seed)]) 19 | relpath = osp.join(relpath, subfolder) 20 | 21 | data_dir = data_dir or DEFAULT_DATA_DIR 22 | logger_kwargs = dict(output_dir=osp.join(data_dir, relpath), 23 | exp_name=exp_name) 24 | return logger_kwargs -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/safe_rl/utils/serialization_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def convert_json(obj): 4 | """ Convert obj to a version which can be serialized with JSON. """ 5 | if is_json_serializable(obj): 6 | return obj 7 | else: 8 | if isinstance(obj, dict): 9 | return {convert_json(k): convert_json(v) 10 | for k,v in obj.items()} 11 | 12 | elif isinstance(obj, tuple): 13 | return (convert_json(x) for x in obj) 14 | 15 | elif isinstance(obj, list): 16 | return [convert_json(x) for x in obj] 17 | 18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__): 19 | return convert_json(obj.__name__) 20 | 21 | elif hasattr(obj,'__dict__') and obj.__dict__: 22 | obj_dict = {convert_json(k): convert_json(v) 23 | for k,v in obj.__dict__.items()} 24 | return {str(obj): obj_dict} 25 | 26 | return str(obj) 27 | 28 | def is_json_serializable(v): 29 | try: 30 | json.dumps(v) 31 | return True 32 | except: 33 | return False -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/baseline/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import sys 5 | 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \ 7 | "Safety Starter Agents is designed to work with Python 3.6 and greater. " \ 8 | + "Please install it before proceeding." 9 | 10 | setup( 11 | name='safe_rl', 12 | packages=['safe_rl'], 13 | install_requires=[ 14 | 'gym~=0.15.3', 15 | 'joblib==0.14.0', 16 | 'matplotlib==3.1.1', 17 | 'mpi4py==3.0.2', 18 | 'mujoco_py==2.0.2.7', 19 | 'numpy~=1.17.4', 20 | 'seaborn==0.8.1', 21 | 'tensorflow==1.15.4', 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/cg1/ensemble-cem/ensemble-cem_s10/config.yml: -------------------------------------------------------------------------------- 1 | arguments: 2 | config: ./data/config.yml 3 | correct: 0 4 | dir: data/cg1/ 5 | ensemble: 5 6 | episode: 10 7 | epoch: 70 8 | level: 1 9 | load: null 10 | name: ensemble-cem 11 | obs_stack: false 12 | optimizer: cem 13 | render: false 14 | robot: car 15 | save: false 16 | seed: 10 17 | task: goal 18 | test: false 19 | cost_config: 20 | batch: 2000 21 | load: false 22 | load_folder: null 23 | max_ratio: 3 24 | model_param: 25 | boosting_type: gbdt 26 | learning_rate: 0.3 27 | max_depth: 8 28 | n_estimators: 400 29 | n_jobs: 1 30 | num_leaves: 12 31 | safe_buffer_size: 50000 32 | save: false 33 | save_folder: null 34 | unsafe_buffer_size: 10000 35 | dynamic_config: 36 | activation: relu 37 | batch_size: 256 38 | buffer_size: 500000 39 | data_split: 0.8 40 | hidden_sizes: 41 | - 1024 42 | - 1024 43 | - 1024 44 | learning_rate: 0.001 45 | load: false 46 | load_folder: null 47 | n_epochs: 70 48 | save: false 49 | save_folder: null 50 | test_freq: 5 51 | test_ratio: 0.15 52 | exp_name: ensemble-cem 53 | mpc_config: 54 | CCE: 55 | alpha: 0.1 56 | epsilon: 0.01 57 | init_mean: 0 58 | init_var: 1 59 | max_iters: 8 60 | minimal_elites: 5 61 | num_elites: 12 62 | popsize: 500 63 | CEM: 64 | alpha: 0.1 65 | epsilon: 0.01 66 | init_mean: 0 67 | init_var: 1 68 | max_iters: 8 69 | num_elites: 12 70 | popsize: 500 71 | RANDOM: 72 | popsize: 5000 73 | gamma: 0.98 74 | horizon: 8 75 | optimizer: CEM 76 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/cg1/weights/config.yml: -------------------------------------------------------------------------------- 1 | arguments: 2 | config: ./config.yml 3 | dir: data/cg1 4 | ensemble: 0 5 | episode: 10 6 | epoch: 80 7 | level: 1 8 | load: null 9 | name: rce 10 | optimizer: rce 11 | render: false 12 | robot: car 13 | save: true 14 | seed: 1 15 | test: false 16 | cost_config: 17 | batch: 2000 18 | load: false 19 | load_folder: null 20 | max_ratio: 3 21 | model_param: 22 | boosting_type: gbdt 23 | learning_rate: 0.3 24 | max_depth: 8 25 | n_estimators: 400 26 | n_jobs: 1 27 | num_leaves: 12 28 | safe_buffer_size: 50000 29 | save: true 30 | save_folder: data/cg1/rce/rce_s1 31 | unsafe_buffer_size: 10000 32 | dynamic_config: 33 | activation: relu 34 | batch_size: 256 35 | buffer_size: 500000 36 | data_split: 0.8 37 | hidden_sizes: 38 | - 1024 39 | - 1024 40 | - 1024 41 | learning_rate: 0.001 42 | load: false 43 | load_folder: null 44 | n_ensembles: 4 45 | n_epochs: 70 46 | save: true 47 | save_folder: data/cg1/rce/rce_s1 48 | test_freq: 5 49 | test_ratio: 0.15 50 | exp_name: rce 51 | mpc_config: 52 | CEM: 53 | alpha: 0.1 54 | epsilon: 0.01 55 | init_mean: 0 56 | init_var: 1 57 | max_iters: 8 58 | num_elites: 12 59 | popsize: 500 60 | RANDOM: 61 | popsize: 5000 62 | RCE: 63 | alpha: 0.1 64 | epsilon: 0.01 65 | init_mean: 0 66 | init_var: 1 67 | max_iters: 8 68 | minimal_elites: 5 69 | num_elites: 12 70 | popsize: 500 71 | gamma: 0.98 72 | horizon: 8 73 | optimizer: RCE 74 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/cg2/weights/config.yml: -------------------------------------------------------------------------------- 1 | arguments: 2 | config: ./config.yml 3 | dir: data/cg2 4 | ensemble: 0 5 | episode: 10 6 | epoch: 80 7 | level: 2 8 | load: null 9 | name: rce 10 | optimizer: rce 11 | render: false 12 | robot: car 13 | save: true 14 | seed: 1 15 | test: false 16 | cost_config: 17 | batch: 2000 18 | load: false 19 | load_folder: null 20 | max_ratio: 3 21 | model_param: 22 | boosting_type: gbdt 23 | learning_rate: 0.3 24 | max_depth: 8 25 | n_estimators: 400 26 | n_jobs: 1 27 | num_leaves: 12 28 | safe_buffer_size: 50000 29 | save: true 30 | save_folder: data/cg2/rce/rce_s1 31 | unsafe_buffer_size: 10000 32 | dynamic_config: 33 | activation: relu 34 | batch_size: 256 35 | buffer_size: 500000 36 | data_split: 0.8 37 | hidden_sizes: 38 | - 1024 39 | - 1024 40 | - 1024 41 | learning_rate: 0.001 42 | load: false 43 | load_folder: null 44 | n_ensembles: 4 45 | n_epochs: 70 46 | save: true 47 | save_folder: data/cg2/rce/rce_s1 48 | test_freq: 5 49 | test_ratio: 0.15 50 | exp_name: rce 51 | mpc_config: 52 | CEM: 53 | alpha: 0.1 54 | epsilon: 0.01 55 | init_mean: 0 56 | init_var: 1 57 | max_iters: 8 58 | num_elites: 12 59 | popsize: 500 60 | RANDOM: 61 | popsize: 5000 62 | RCE: 63 | alpha: 0.1 64 | epsilon: 0.01 65 | init_mean: 0 66 | init_var: 1 67 | max_iters: 8 68 | minimal_elites: 5 69 | num_elites: 12 70 | popsize: 500 71 | gamma: 0.98 72 | horizon: 8 73 | optimizer: RCE 74 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/figures/TestFigure3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/TestFigure3.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/figures/pg1-Cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Cost.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/figures/pg1-Reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Reward.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/figures/pg2-Cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Cost.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/figures/pg2-Reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Reward.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/pg1/weights/config.yml: -------------------------------------------------------------------------------- 1 | arguments: 2 | config: ./config.yml 3 | dir: data/pg1 4 | ensemble: 0 5 | episode: 10 6 | epoch: 80 7 | level: 1 8 | load: null 9 | name: rce 10 | optimizer: rce 11 | render: false 12 | robot: point 13 | save: true 14 | seed: 1 15 | test: false 16 | cost_config: 17 | batch: 2000 18 | load: false 19 | load_folder: null 20 | max_ratio: 3 21 | model_param: 22 | boosting_type: gbdt 23 | learning_rate: 0.3 24 | max_depth: 8 25 | n_estimators: 400 26 | n_jobs: 1 27 | num_leaves: 12 28 | safe_buffer_size: 50000 29 | save: true 30 | save_folder: data/pg1/rce/rce_s1 31 | unsafe_buffer_size: 10000 32 | dynamic_config: 33 | activation: relu 34 | batch_size: 256 35 | buffer_size: 500000 36 | data_split: 0.8 37 | hidden_sizes: 38 | - 1024 39 | - 1024 40 | - 1024 41 | learning_rate: 0.001 42 | load: false 43 | load_folder: null 44 | n_ensembles: 4 45 | n_epochs: 70 46 | save: true 47 | save_folder: data/pg1/rce/rce_s1 48 | test_freq: 5 49 | test_ratio: 0.15 50 | exp_name: rce 51 | mpc_config: 52 | CEM: 53 | alpha: 0.1 54 | epsilon: 0.01 55 | init_mean: 0 56 | init_var: 1 57 | max_iters: 8 58 | num_elites: 12 59 | popsize: 500 60 | RANDOM: 61 | popsize: 5000 62 | RCE: 63 | alpha: 0.1 64 | epsilon: 0.01 65 | init_mean: 0 66 | init_var: 1 67 | max_iters: 8 68 | minimal_elites: 5 69 | num_elites: 12 70 | popsize: 500 71 | gamma: 0.98 72 | horizon: 8 73 | optimizer: RCE 74 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/data/pg2/weights/config.yml: -------------------------------------------------------------------------------- 1 | arguments: 2 | config: ./config.yml 3 | dir: data/pg2 4 | ensemble: 0 5 | episode: 10 6 | epoch: 80 7 | level: 2 8 | load: null 9 | name: rce 10 | optimizer: rce 11 | render: false 12 | robot: point 13 | save: true 14 | seed: 1 15 | test: false 16 | cost_config: 17 | batch: 2000 18 | load: false 19 | load_folder: null 20 | max_ratio: 3 21 | model_param: 22 | boosting_type: gbdt 23 | learning_rate: 0.3 24 | max_depth: 8 25 | n_estimators: 400 26 | n_jobs: 1 27 | num_leaves: 12 28 | safe_buffer_size: 50000 29 | save: true 30 | save_folder: data/pg2/rce/rce_s1 31 | unsafe_buffer_size: 10000 32 | dynamic_config: 33 | activation: relu 34 | batch_size: 256 35 | buffer_size: 500000 36 | data_split: 0.8 37 | hidden_sizes: 38 | - 1024 39 | - 1024 40 | - 1024 41 | learning_rate: 0.001 42 | load: false 43 | load_folder: null 44 | n_ensembles: 4 45 | n_epochs: 70 46 | save: true 47 | save_folder: data/pg2/rce/rce_s1 48 | test_freq: 5 49 | test_ratio: 0.15 50 | exp_name: rce 51 | mpc_config: 52 | CEM: 53 | alpha: 0.1 54 | epsilon: 0.01 55 | init_mean: 0 56 | init_var: 1 57 | max_iters: 8 58 | num_elites: 12 59 | popsize: 500 60 | RANDOM: 61 | popsize: 5000 62 | RCE: 63 | alpha: 0.1 64 | epsilon: 0.01 65 | init_mean: 0 66 | init_var: 1 67 | max_iters: 8 68 | minimal_elites: 5 69 | num_elites: 12 70 | popsize: 500 71 | gamma: 0.98 72 | horizon: 8 73 | optimizer: RCE 74 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/build/lib/safety_gym/__init__.py: -------------------------------------------------------------------------------- 1 | import safety_gym.envs -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/build/lib/safety_gym/random_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import gym 5 | import safety_gym # noqa 6 | import numpy as np # noqa 7 | 8 | def run_random(env_name): 9 | env = gym.make(env_name) 10 | obs = env.reset() 11 | done = False 12 | ep_ret = 0 13 | ep_cost = 0 14 | while True: 15 | if done: 16 | print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost)) 17 | ep_ret, ep_cost = 0, 0 18 | obs = env.reset() 19 | assert env.observation_space.contains(obs) 20 | act = env.action_space.sample() 21 | assert env.action_space.contains(act) 22 | obs, reward, done, info = env.step(act) 23 | print(obs['magnetometer'], obs['gyro']) 24 | # print('reward', reward) 25 | ep_ret += reward 26 | ep_cost += info.get('cost', 0) 27 | env.render() 28 | 29 | 30 | if __name__ == '__main__': 31 | 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('--env', default='Safexp-CarGoal1-v0') 34 | args = parser.parse_args() 35 | run_random(args.env) 36 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/safety_gym.png -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/__init__.py: -------------------------------------------------------------------------------- 1 | import safety_gym.envs -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import safety_gym.envs.suite -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/envs/mujoco.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | # This file is just to get around a baselines import hack. 5 | # env_type is set based on the final part of the entry_point module name. 6 | # In the regular gym mujoco envs this is 'mujoco'. 7 | # We want baselines to treat these as mujoco envs, so we redirect from here, 8 | # and ensure the registry entries are pointing at this file as well. 9 | from safety_gym.envs.engine import * # noqa 10 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/random_agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import gym 5 | import safety_gym # noqa 6 | import numpy as np # noqa 7 | 8 | def run_random(env_name): 9 | env = gym.make(env_name) 10 | obs = env.reset() 11 | done = False 12 | ep_ret = 0 13 | ep_cost = 0 14 | while True: 15 | if done: 16 | print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost)) 17 | ep_ret, ep_cost = 0, 0 18 | obs = env.reset() 19 | assert env.observation_space.contains(obs) 20 | act = env.action_space.sample() 21 | assert env.action_space.contains(act) 22 | obs, reward, done, info = env.step(act) 23 | print(obs['magnetometer'], obs['gyro']) 24 | # print('reward', reward) 25 | ep_ret += reward 26 | ep_cost += info.get('cost', 0) 27 | env.render() 28 | 29 | 30 | if __name__ == '__main__': 31 | 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('--env', default='Safexp-CarGoal1-v0') 34 | args = parser.parse_args() 35 | run_random(args.env) 36 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/test/test_envs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | import gym 5 | import safety_gym.envs # noqa 6 | 7 | 8 | class TestEnvs(unittest.TestCase): 9 | def check_env(self, env_name): 10 | ''' Run a single environment for a single episode ''' 11 | print('running', env_name) 12 | env = gym.make(env_name) 13 | env.reset() 14 | done = False 15 | while not done: 16 | _, _, done, _ = env.step(env.action_space.sample()) 17 | 18 | def test_envs(self): 19 | ''' Run all the bench envs ''' 20 | for env_spec in gym.envs.registry.all(): 21 | if 'Safexp' in env_spec.id: 22 | self.check_env(env_spec.id) 23 | 24 | 25 | 26 | if __name__ == '__main__': 27 | unittest.main() 28 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/test/test_goal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | import numpy as np 5 | 6 | from safety_gym.envs.engine import Engine, ResamplingError 7 | 8 | 9 | class TestGoal(unittest.TestCase): 10 | def rollout_env(self, env): 11 | ''' roll an environment until it is done ''' 12 | done = False 13 | while not done: 14 | _, _, done, _ = env.step([1,0]) 15 | 16 | def test_resample(self): 17 | ''' Episode should end with resampling failure ''' 18 | config = { 19 | 'robot_base': 'xmls/point.xml', 20 | 'num_steps': 1001, 21 | 'placements_extents': [-1, -1, 1, 1], 22 | 'goal_size': 1.414, 23 | 'goal_keepout': 1.414, 24 | 'goal_locations': [(1, 1)], 25 | 'robot_keepout': 1.414, 26 | 'robot_locations': [(-1, -1)], 27 | 'robot_rot': np.sin(np.pi / 4), 28 | 'terminate_resample_failure': True, 29 | '_seed': 0, 30 | } 31 | env = Engine(config) 32 | env.reset() 33 | self.assertEqual(env.steps, 0) 34 | # Move the robot towards the goal 35 | self.rollout_env(env) 36 | # Check that the environment terminated early 37 | self.assertLess(env.steps, 1000) 38 | 39 | # Try again with the raise 40 | config['terminate_resample_failure'] = False 41 | env = Engine(config) 42 | env.reset() 43 | # Move the robot towards the goal, which should cause resampling failure 44 | with self.assertRaises(ResamplingError): 45 | self.rollout_env(env) 46 | 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/safety_gym/xmls/README.md: -------------------------------------------------------------------------------- 1 | # xmls 2 | 3 | These are mujoco XML files which are used as bases for the simulations. 4 | 5 | Some design goals for them: 6 | 7 | - XML should be complete and simulate-able as-is 8 | - Include a floor geom which is a plane 9 | - Include joint sensor for the robot which provide observation 10 | - Include actuators which provide control 11 | - Default positions should all be neutral 12 | - position 0,0,0 should be resting on the floor, not intersecting it 13 | - robot should start at the origin 14 | - Scene should be clear of other objects 15 | - no obstacles or things to manipulate 16 | - only the robot in the scene 17 | 18 | Requirements for the robot 19 | - Position joints should be separate and named `x`, `y`, and `z` 20 | - 0, 0, 0 position should be resting on the floor above the origin at a neutral position 21 | - First 6 sensors should be (in order): 22 | - joint positions for x, y, z (absolute position in the scene) 23 | - joint velocities for x, y, z (absolute velocity in the scene) 24 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/env/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import sys 5 | 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \ 7 | "Safety Gym is designed to work with Python 3.6 and greater. " \ 8 | + "Please install it before proceeding." 9 | 10 | setup( 11 | name='safety_gym', 12 | packages=['safety_gym'], 13 | install_requires=[ 14 | 'gym~=0.15.3', 15 | 'joblib~=0.14.0', 16 | 'mujoco_py==2.0.2.7', 17 | 'numpy~=1.17.4', 18 | 'xmltodict~=0.12.0', 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/.gitignore: -------------------------------------------------------------------------------- 1 | *.*~ 2 | __pycache__/ 3 | *.pkl 4 | data/ 5 | **/*.egg-info 6 | .python-version 7 | .idea/ 8 | .vscode/ 9 | .DS_Store 10 | _build/ 11 | data/ 12 | .ipynb_checkpoints/ 13 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: Zuxin Liu 3 | @Email: zuxinl@andrew.cmu.edu 4 | @Date: 2020-03-24 10:59:16 5 | @LastEditTime: 2020-05-26 00:19:29 6 | @Description: 7 | ''' 8 | 9 | from mbrl.controllers import MPC as MPC 10 | from mbrl.controllers import SafeMPC 11 | from mbrl.models.model import RegressionModel 12 | from mbrl.models.ensemble import RegressionModelEnsemble 13 | from mbrl.models.constraint_model import CostModel 14 | 15 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from .mpc_controller import MPC 2 | from .safe_mpc_controller import SafeMPC 3 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/mbrl/models/__init__.py -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .cem import CEMOptimizer 2 | from .random import RandomOptimizer 3 | from .rce import RCEOptimizer -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/mbrl/optimizers/optimizer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: Zuxin Liu 3 | @Email: zuxinl@andrew.cmu.edu 4 | @Date: 2020-03-24 01:02:01 5 | @LastEditTime: 2020-03-24 10:49:27 6 | @Description: 7 | ''' 8 | 9 | from __future__ import absolute_import 10 | from __future__ import print_function 11 | from __future__ import division 12 | 13 | 14 | class Optimizer: 15 | def __init__(self, *args, **kwargs): 16 | pass 17 | 18 | def setup(self, cost_function): 19 | raise NotImplementedError("Must be implemented in subclass.") 20 | 21 | def reset(self): 22 | raise NotImplementedError("Must be implemented in subclass.") 23 | 24 | def obtain_solution(self, *args, **kwargs): 25 | raise NotImplementedError("Must be implemented in subclass.") 26 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/cg1_random.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_random.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/cg1_rce.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_rce.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/cg2_random.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_random.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/cg2_rce.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_rce.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg1_random.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_random.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg1_rce.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_rce.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg1_trpo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpo.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg1_trpol.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpol.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg2_random.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_random.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg2_rce.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_rce.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg2_trpo_10.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpo_10.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/media/pg2_trpol_10.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpol_10.gif -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/requirements.txt: -------------------------------------------------------------------------------- 1 | joblib==0.14.1 2 | matplotlib==3.1.3 3 | mpi4py==3.0.3 4 | psutil==5.7.2 5 | PyYAML==5.4 6 | tqdm==4.48.0 7 | seaborn==0.8.1 8 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @Author: Zuxin Liu 3 | @Email: zuxinl@andrew.cmu.edu 4 | @Date: 2020-05-23 16:02:07 5 | @LastEditTime: 2020-05-23 16:02:44 6 | @Description: 7 | ''' 8 | 9 | -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/utils/mpi_pytorch.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import numpy as np 3 | import os 4 | import torch 5 | from mpi4py import MPI 6 | from utils.mpi_tools import broadcast, mpi_avg, num_procs, proc_id 7 | 8 | def setup_pytorch_for_mpi(): 9 | """ 10 | Avoid slowdowns caused by each separate process's PyTorch using 11 | more than its fair share of CPU resources. 12 | """ 13 | #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) 14 | if torch.get_num_threads()==1: 15 | return 16 | fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1) 17 | torch.set_num_threads(fair_num_threads) 18 | #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) 19 | 20 | def mpi_avg_grads(module): 21 | """ Average contents of gradient buffers across MPI processes. """ 22 | if num_procs()==1: 23 | return 24 | for p in module.parameters(): 25 | p_grad_numpy = p.grad.numpy() # numpy view of tensor data 26 | avg_p_grad = mpi_avg(p.grad) 27 | p_grad_numpy[:] = avg_p_grad[:] 28 | 29 | def sync_params(module): 30 | """ Sync all parameters of module across all MPI processes. """ 31 | if num_procs()==1: 32 | return 33 | for p in module.parameters(): 34 | p_numpy = p.data.numpy() 35 | broadcast(p_numpy) -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/utils/run_entrypoint.py: -------------------------------------------------------------------------------- 1 | import zlib 2 | import pickle 3 | import base64 4 | 5 | if __name__ == '__main__': 6 | import argparse 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('encoded_thunk') 9 | args = parser.parse_args() 10 | thunk = pickle.loads(zlib.decompress(base64.b64decode(args.encoded_thunk))) 11 | thunk() -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/utils/serialization_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def convert_json(obj): 4 | """ Convert obj to a version which can be serialized with JSON. """ 5 | if is_json_serializable(obj): 6 | return obj 7 | else: 8 | if isinstance(obj, dict): 9 | return {convert_json(k): convert_json(v) 10 | for k,v in obj.items()} 11 | 12 | elif isinstance(obj, tuple): 13 | return (convert_json(x) for x in obj) 14 | 15 | elif isinstance(obj, list): 16 | return [convert_json(x) for x in obj] 17 | 18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__): 19 | return convert_json(obj.__name__) 20 | 21 | elif hasattr(obj,'__dict__') and obj.__dict__: 22 | obj_dict = {convert_json(k): convert_json(v) 23 | for k,v in obj.__dict__.items()} 24 | return {str(obj): obj_dict} 25 | 26 | return str(obj) 27 | 28 | def is_json_serializable(v): 29 | try: 30 | json.dumps(v) 31 | return True 32 | except: 33 | return False -------------------------------------------------------------------------------- /Safe-RL/safe-mbrl/utils/user_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | # Default neural network backend for each algo 5 | # (Must be either 'tf1' or 'pytorch') 6 | DEFAULT_BACKEND = { 7 | 'vpg': 'pytorch', 8 | 'trpo': 'tf1', 9 | 'ppo': 'pytorch', 10 | 'ddpg': 'pytorch', 11 | 'td3': 'pytorch', 12 | 'sac': 'pytorch' 13 | } 14 | 15 | # Where experiment outputs are saved by default: 16 | #DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data') 17 | DEFAULT_DATA_DIR = osp.join(osp.abspath('./'),'data') 18 | 19 | # Whether to automatically insert a date and time stamp into the names of 20 | # save directories: 21 | FORCE_DATESTAMP = False 22 | 23 | # Whether GridSearch provides automatically-generated default shorthands: 24 | DEFAULT_SHORTHAND = True 25 | 26 | # Tells the GridSearch how many seconds to pause for before launching 27 | # experiments. 28 | WAIT_BEFORE_LAUNCH = 5 -------------------------------------------------------------------------------- /Safe-RL/safeRL/.gitignore: -------------------------------------------------------------------------------- 1 | safe_recovery/output 2 | safe_recovery/logging 3 | safe_recovery/saved_models 4 | safe_recovery/old_policies 5 | 6 | 7 | 8 | 9 | *.o 10 | *.pyc 11 | 12 | # Compiled source # 13 | ################### 14 | *.com 15 | *.class 16 | *.dll 17 | *.exe 18 | *.o 19 | *.so 20 | 21 | # Packages # 22 | ############ 23 | # it's better to unpack these files and commit the raw source 24 | # git has its own built in compression methods 25 | *.7z 26 | *.dmg 27 | *.gz 28 | *.iso 29 | *.jar 30 | *.rar 31 | *.tar 32 | *.zip -------------------------------------------------------------------------------- /Safe-RL/safeRL/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "MADRaS"] 2 | path = MADRaS 3 | url = https://github.com/madras-simulator/MADRaS 4 | [submodule "safe-grid-gym"] 5 | path = safe-grid-gym 6 | url = https://github.com/david-lindner/safe-grid-gym 7 | branch = safe_recovery 8 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2019] [Harshit Sikchi] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/README.md~: -------------------------------------------------------------------------------- 1 | # HCOPE 2 | High-Confidence Off-Policy Evaluation. 3 | 4 | 5 | Python Implementation of HCOPE lower bound evaluation as given in the paper: 6 | Thomas, Philip S., Georgios Theocharous, and Mohammad Ghavamzadeh. "High-Confidence Off-Policy Evaluation." AAAI. 2015. 7 | 8 | 9 | ## Importance Sampling 10 | 11 | Implementation of: 12 | * Simple Importance Sampling 13 | * Per-Decision Importance Sampling 14 | * Normalized Per-Decision Importance Sampling (NPDIS) Estimator 15 | * Weighted Importance Sampling (WIS) Estimator 16 | * Weighted Per-Decision Importance Sampling (WPDIS) Estimator 17 | * Consistent Weighted Per-Decision Importance Sampling (CWPDIS) Estimator 18 | 19 | Comparision of different importance sampling estimators: 20 | ![Different Importance sampling estimators](http://url/to/img.png) 21 | 22 | 23 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/citation.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | --- 3 | authors: 4 | - 5 | affiliation: "University of Texas at Austin" 6 | family-names: Sikchi 7 | given-names: Harshit 8 | cff-version: "1.1.0" 9 | license: MIT 10 | message: "If you use this software, please cite it using these metadata." 11 | repository-code: "https://github.com/hari-sikchi/safeRL" 12 | title: safeRL 13 | ... 14 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/importance_sampling/importance_sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/importance_sampling/importance_sampling.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/IS_dist_+_0.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_+_0.1.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/IS_dist_minus_0.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_minus_0.1.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/IS_dist_random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_random.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/IS_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_variance.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/Result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Result.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/Theorem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Theorem.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/safe_actions.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions.gif -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/safe_actions_instability.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions_instability.gif -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/safety_layer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_layer.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/safety_optimization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_optimization.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/results/safety_signal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_signal.png -------------------------------------------------------------------------------- /Safe-RL/safeRL/safe_exploration/optimizers.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | 10 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES 11 | class Optimizer(object): 12 | def __init__(self, w_policy): 13 | self.w_policy = w_policy.flatten() 14 | self.dim = w_policy.size 15 | self.t = 0 16 | 17 | def update(self, globalg): 18 | self.t += 1 19 | step = self._compute_step(globalg) 20 | ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5) 21 | return self.w_policy + step, ratio 22 | 23 | def _compute_step(self, globalg): 24 | raise NotImplementedError 25 | 26 | 27 | class SGD(Optimizer): 28 | def __init__(self, pi, stepsize): 29 | Optimizer.__init__(self, pi) 30 | self.stepsize = stepsize 31 | 32 | def _compute_step(self, globalg): 33 | step = -self.stepsize * globalg 34 | return step 35 | 36 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/safe_exploration/shared_noise.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es 3 | 4 | import ray 5 | import numpy as np 6 | 7 | @ray.remote 8 | def create_shared_noise(): 9 | """ 10 | Create a large array of noise to be shared by all workers. Used 11 | for avoiding the communication of the random perturbations delta. 12 | """ 13 | 14 | seed = 12345 15 | count = 250000000 16 | noise = np.random.RandomState(seed).randn(count).astype(np.float64) 17 | return noise 18 | 19 | 20 | class SharedNoiseTable(object): 21 | def __init__(self, noise, seed = 11): 22 | 23 | self.rg = np.random.RandomState(seed) 24 | self.noise = noise 25 | assert self.noise.dtype == np.float64 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def get_mod(self, i, dim,ratio): 31 | return ratio*self.noise[i:i + dim] 32 | 33 | 34 | def sample_index(self, dim): 35 | return self.rg.randint(0, len(self.noise) - dim + 1) 36 | 37 | def get_delta(self, dim): 38 | idx = self.sample_index(dim) 39 | return idx, self.get(idx, dim) 40 | 41 | 42 | def get_delta_mod(self, dim,ratio): 43 | idx = self.sample_index(dim) 44 | return idx, ratio*self.get(idx, dim) 45 | 46 | -------------------------------------------------------------------------------- /Safe-RL/safeRL/safe_exploration/utils.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | def itergroups(items, group_size): 8 | assert group_size >= 1 9 | group = [] 10 | for x in items: 11 | group.append(x) 12 | if len(group) == group_size: 13 | yield tuple(group) 14 | del group[:] 15 | if group: 16 | yield tuple(group) 17 | 18 | 19 | 20 | def batched_weighted_sum(weights, vecs, batch_size): 21 | total = 0 22 | num_items_summed = 0 23 | for batch_weights, batch_vecs in zip(itergroups(weights, batch_size), 24 | itergroups(vecs, batch_size)): 25 | assert len(batch_weights) == len(batch_vecs) <= batch_size 26 | total += np.dot(np.asarray(batch_weights, dtype=np.float64), 27 | np.asarray(batch_vecs, dtype=np.float64)) 28 | num_items_summed += len(batch_weights) 29 | return total, num_items_summed 30 | 31 | def plot_info(param_dict, logdir): 32 | for key, value in param_dict.items(): 33 | x = value[0] 34 | y = value[1] 35 | x_name = value[2] 36 | y_name = value[3] 37 | print(x,y) 38 | plt.plot(x, y) 39 | plt.title(key) 40 | plt.xlabel(x_name) 41 | plt.ylabel(y_name) 42 | plt.savefig((logdir + "/plot_" +key + ".png")) 43 | plt.clf() 44 | 45 | 46 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/.dockerignore: -------------------------------------------------------------------------------- 1 | examples 2 | htmlcov 3 | .travis.yml 4 | .gitignore 5 | .git 6 | *.pyc 7 | .ipynb_checkpoints 8 | **/__pycache__ 9 | safe_learning.egg-info 10 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea 3 | .ipynb_checkpoints 4 | htmlcov 5 | .coverage 6 | .cache 7 | safe_learning.egg-info 8 | __pycache__ 9 | docs/safe_learning.* 10 | docs/_build 11 | *.swp 12 | *.DS_Store 13 | .pytest_cache 14 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: required 4 | services: 5 | - docker 6 | 7 | env: 8 | - PYTHON=python2 9 | - PYTHON=python3 10 | 11 | # Setup anaconda 12 | install: 13 | # Disabled since docker pull does not affect cache 14 | # Fixed in Docker 1.13 with --cache-from 15 | # - docker pull befelix/lyapunov-learning-private:${PYTHON} || true 16 | - docker build -f Dockerfile.${PYTHON} -t test-image . 17 | - docker ps -a 18 | 19 | # Run tests 20 | script: 21 | - docker run test-image scripts/test_code.sh 22 | 23 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/Dockerfile.dev: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda update conda --yes --quiet \ 11 | && conda install python=3.5 pip numpy scipy pandas --yes --quiet \ 12 | && conda clean --yes --all \ 13 | && hash -r 14 | 15 | # Get the requirements files (seperate from the main body) 16 | COPY requirements.txt requirements_dev.txt /reqs/ 17 | 18 | # Install requirements and clean up 19 | RUN pip --no-cache-dir install -r /reqs/requirements.txt \ 20 | && pip --no-cache-dir install -r /reqs/requirements_dev.txt \ 21 | && pip install jupyter jupyterlab dumb-init \ 22 | && rm -rf /root/.cache \ 23 | && rm -rf /reqs 24 | 25 | # Manually install GPflow and clean up 26 | RUN git clone --depth=1 --branch=0.4.0 https://github.com/GPflow/GPflow.git \ 27 | && cd GPflow \ 28 | && python setup.py install \ 29 | && rm -rf /GPflow 30 | 31 | # Output scrubber for jupyter 32 | ADD scripts/jupyter_output.py / 33 | 34 | RUN jupyter notebook --generate-config \ 35 | && cat /jupyter_output.py >> /root/.jupyter/jupyter_notebook_config.py \ 36 | && rm /jupyter_output.py 37 | 38 | WORKDIR /code 39 | 40 | # Make sure Ctrl+C commands can be forwarded 41 | ENTRYPOINT ["dumb-init", "--"] 42 | 43 | CMD python setup.py develop \ 44 | && jupyter lab --ip="0.0.0.0" --no-browser --allow-root 45 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/Dockerfile.python2: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda:4.5.11 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda install python=2.7 --yes --quiet \ 11 | && conda clean --yes --all \ 12 | && hash -r 13 | 14 | # Copy the main code 15 | COPY . /code 16 | RUN cd /code \ 17 | && pip install pip==18.1 \ 18 | && pip install numpy==1.14.5 \ 19 | && pip install -e .[test] --process-dependency-links \ 20 | && rm -rf /root/.cache 21 | 22 | WORKDIR /code 23 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/Dockerfile.python3: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda3:4.5.11 2 | 3 | # Install build essentials and clean up 4 | RUN apt-get update --quiet \ 5 | && apt-get install -y --no-install-recommends --quiet build-essential \ 6 | && apt-get clean \ 7 | && rm -rf /var/lib/apt/lists/* 8 | 9 | # Update conda, install packages, and clean up 10 | RUN conda install python=3.5 --yes --quiet \ 11 | # && conda clean --yes --all \ 12 | && hash -r 13 | 14 | # Copy the main code 15 | COPY . /code 16 | RUN cd /code \ 17 | && pip install pip==18.1 \ 18 | && pip install numpy==1.14.5 \ 19 | && pip install -e .[test] --process-dependency-links \ 20 | && rm -rf /root/.cache 21 | 22 | WORKDIR /code 23 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Felix Berkenkamp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | 3 | help: 4 | @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' 5 | 6 | doc: ## Build documentation (docs/_build/html/index.html) 7 | cd docs && $(MAKE) html 8 | 9 | coverage: ## Construct coverage (htmlcov/index.html) 10 | coverage html 11 | 12 | test-local: ## Test the local installation of the code 13 | ./scripts/test_code.sh 14 | 15 | test: docker ## Test the docker images 16 | docker run safe_learning_py2 make test-local 17 | docker run safe_learning_py3 make test-local 18 | 19 | dev: ## Mount current code as volume and run jupyterlab for development 20 | docker build -f Dockerfile.dev -t safe_learning_dev . 21 | docker run -p 8888:8888 -v $(shell pwd):/code safe_learning_dev 22 | 23 | docker: ## Build the docker images 24 | docker build -f Dockerfile.python2 -t safe_learning_py2 . 25 | docker build -f Dockerfile.python3 -t safe_learning_py3 . 26 | 27 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = SafeLearning 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/_templates/template.rst: -------------------------------------------------------------------------------- 1 | {{ name }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | .. auto{{ objtype }}:: {{ objname }} {% if objtype == "class" %} 6 | :members: 7 | :inherited-members: 8 | {% endif %} 9 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/api.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ***************** 3 | 4 | .. automodule:: safe_learning 5 | 6 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the Safe Learning documentation! 2 | =========================================== 3 | 4 | .. include:: introduction.rst 5 | 6 | .. toctree:: 7 | :caption: Contents 8 | :maxdepth: 3 9 | 10 | api 11 | 12 | Indices and tables 13 | ================== 14 | 15 | * :ref:`genindex` 16 | * :ref:`modindex` 17 | * :ref:`search` 18 | 19 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/introduction.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | TODO -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=SafeLearning 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | numpydoc >= 0.6 3 | sphinx_rtd_theme >= 0.1.8 4 | mock 5 | 6 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/examples/README.rst: -------------------------------------------------------------------------------- 1 | Example notebooks for the library 2 | ================================= 3 | 4 | Introductions 5 | ------------- 6 | - `1d_region_of_attraction_estimate.ipynb <./1d_region_of_attraction_estimate.ipynb>`_ shows how to estimate and learn the region of attraction for a fixed policy. 7 | - `basic_dynamic_programming.ipynb <./basic_dynamic_programming.ipynb>`_ does basic dynamic programming with piecewise linear function approximators for the mountain car example. 8 | - `reinforcement_learning_pendulum.ipynb <./reinforcement_learning_pendulum.ipynb>`_ does approximate policy iteration in an actor-critic framework with neural networks for the inverted pendulum. 9 | - `reinforcement_learning_cartpole.ipynb <./reinforcement_learning_cartpole.ipynb>`_ does the same as above for the cart-pole (i.e., the inverted pendulum on a cart). 10 | 11 | Experiments 12 | ----------- 13 | - `1d_example.ipynb <./1d_example.ipynb>`_ contains a 1D example including plots of the sets. 14 | - `inverted_pendulum.ipynb <./inverted_pendulum.ipynb>`_ contains a full neural network example with an inverted pendulum. 15 | - `adaptive_safety_verification.ipynb <./adaptive_safety_verification.ipynb>`_ investigates the benefits of an adaptive discretization in identifying safe sets for the inverted pendulum. 16 | - `lyapunov_function_learning.ipynb <./lyapunov_function_learning.ipynb>`_ demonstrates how a parameterized Lyapunov candidate for the inverted pendulum can be trained with the machine learning approach in [1]_. 17 | 18 | .. [1] S. M. Richards, F. Berkenkamp, A. Krause, 19 | `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems `_. Conference on Robot Learning (CoRL), 2018. 20 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.0,<1.15 2 | scipy>=1.0.0<=1.2.1 3 | gpflow==0.4.0 4 | matplotlib<=4.0.0 5 | scs==2.0.2 6 | cvxpy>=1,<=1.0.15 7 | tensorflow>=1.6.0,<=1.12.0 8 | future<=0.18.0 9 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | mock 2 | flake8>=3.0,<=3.5.0 3 | pytest==4.6.9 4 | pytest-cov==2.8.1 5 | pydocstyle>=2.0,<2.1 6 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/safe_learning/configuration.py: -------------------------------------------------------------------------------- 1 | """General configuration class for dtypes.""" 2 | 3 | from __future__ import absolute_import, print_function, division 4 | 5 | import tensorflow as tf 6 | 7 | 8 | class Configuration(object): 9 | """Configuration class.""" 10 | 11 | def __init__(self): 12 | """Initialization.""" 13 | super(Configuration, self).__init__() 14 | 15 | # Dtype for computations 16 | self.dtype = tf.float64 17 | 18 | # Batch size for stability verification 19 | self.gp_batch_size = 10000 20 | 21 | @property 22 | def np_dtype(self): 23 | """Return the numpy dtype.""" 24 | return self.dtype.as_numpy_dtype 25 | 26 | def __repr__(self): 27 | """Print the parameters.""" 28 | params = ['Configuration parameters:', ''] 29 | for param, value in self.__dict__.items(): 30 | params.append('{}: {}'.format(param, value.__repr__())) 31 | 32 | return '\n'.join(params) 33 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/scripts/jupyter_output.py: -------------------------------------------------------------------------------- 1 | def scrub_output_pre_save(model, **kwargs): 2 | """scrub output before saving notebooks""" 3 | # only run on notebooks 4 | if model['type'] != 'notebook': 5 | return 6 | # only run on nbformat v4 7 | if model['content']['nbformat'] != 4: 8 | return 9 | 10 | for cell in model['content']['cells']: 11 | if cell['cell_type'] != 'code': 12 | continue 13 | cell['outputs'] = [] 14 | cell['execution_count'] = None 15 | 16 | c.FileContentsManager.pre_save_hook = scrub_output_pre_save 17 | -------------------------------------------------------------------------------- /Safe-RL/safe_learning/scripts/test_code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | module="safe_learning" 4 | 5 | get_script_dir () { 6 | SOURCE="${BASH_SOURCE[0]}" 7 | # While $SOURCE is a symlink, resolve it 8 | while [ -h "$SOURCE" ]; do 9 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 10 | SOURCE="$( readlink "$SOURCE" )" 11 | # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory 12 | [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" 13 | done 14 | DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" 15 | echo "$DIR" 16 | } 17 | 18 | # Change to script root 19 | cd $(get_script_dir)/.. 20 | GREEN='\033[0;32m' 21 | NC='\033[0m' 22 | 23 | # Run style tests 24 | echo -e "${GREEN}Running style tests.${NC}" 25 | flake8 $module --exclude test*.py,__init__.py --ignore=E402,E731,W503 --show-source || { exit 1; } 26 | 27 | # Ignore import errors for __init__ and tests 28 | flake8 $module --filename=__init__.py,test*.py --ignore=F,E402,W503 --show-source || { exit 1; } 29 | 30 | echo -e "${GREEN}Testing docstring conventions.${NC}" 31 | # Test docstring conventions 32 | pydocstyle $module --convention=numpy || { exit 1; } 33 | 34 | # Run unit tests 35 | echo -e "${GREEN}Running unit tests.${NC}" 36 | pytest --doctest-modules --cov --cov-fail-under=80 $module || { exit 1; } 37 | 38 | -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | __pycache__/ 4 | .vscode/ 5 | result/ 6 | old/ 7 | -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/GPSG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/GPSG.png -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Akifumi Wachi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/simple_make_rand_settings.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | 3 | import GPy 4 | import numpy as np 5 | import arguments 6 | 7 | from safemdp.grid_world import (draw_gp_sample, compute_S_hat0) 8 | 9 | 10 | args = arguments.safemdp_argparse() 11 | 12 | # Define world 13 | world_shape = args.world_shape 14 | step_size = args.step_size 15 | 16 | # Define GP for safety 17 | noise_safety = args.noise_safety 18 | safety_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.), 19 | variance=1., ARD=True) 20 | safety_lik = GPy.likelihoods.Gaussian(variance=noise_safety ** 2) 21 | safety_lik.constrain_bounded(1e-6, 10000.) 22 | 23 | # Define GP for reward 24 | noise_reward = args.noise_reward 25 | reward_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.), 26 | variance=1., ARD=True) 27 | reward_lik = GPy.likelihoods.Gaussian(variance=noise_reward ** 2) 28 | reward_lik.constrain_bounded(1e-6, 10000.) 29 | 30 | # Safety and Reward functions 31 | safety, _ = draw_gp_sample(safety_kernel, world_shape, step_size) 32 | reward, _ = draw_gp_sample(reward_kernel, world_shape, step_size) 33 | 34 | # Set the minimum value for reward as zero 35 | reward -= min(reward) 36 | 37 | # Safety threhsold, Lipschitz constant, scaling factors for confidence interval 38 | h = args.h 39 | 40 | # Initialize safe sets 41 | S0 = np.zeros((np.prod(world_shape), 5), dtype=bool) 42 | S0[:, 0] = True 43 | S_hat0 = compute_S_hat0(np.nan, world_shape, 4, safety, step_size, h) 44 | start_pos = np.random.choice(np.where(S_hat0)[0]) 45 | 46 | # Save the problem settings as a npz file 47 | np.savez('data/simple/random_settings_new', safety=safety, reward=reward, 48 | start_pos=start_pos) 49 | -------------------------------------------------------------------------------- /Safe-RL/safe_near_optimal_mdp/utils/reward_utilities.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | __all__ = ['RewardObj'] 5 | 6 | 7 | class RewardObj(object): 8 | """Reward Object in MDPs. 9 | 10 | Parameters 11 | ---------- 12 | gp_r: GPy.core.GPRegression 13 | A Gaussian process model that can be used to determine the reward. 14 | beta_r: float 15 | The confidence interval used by the GP model. 16 | """ 17 | def __init__(self, gp_r, beta_r): 18 | super(RewardObj, self).__init__() 19 | 20 | # Scalar for gp confidence intervals 21 | self.beta = beta_r 22 | # GP model 23 | self.gp = gp_r 24 | 25 | def add_gp_observations(self, x_new, y_new): 26 | """Add observations to the gp.""" 27 | # Update GP with observations 28 | self.gp.set_XY(np.vstack((self.gp.X, x_new)), 29 | np.vstack((self.gp.Y, y_new))) 30 | -------------------------------------------------------------------------------- /Safe-RL/safe_rl_papers/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Chi Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/__init__.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.util import deprecation as deprecation 2 | deprecation._PRINT_DEPRECATION_WARNINGS = False 3 | 4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo 5 | from safe_rl.sac.sac import sac -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/pg/trust_region.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from safe_rl.pg.utils import EPS 4 | 5 | 6 | """ 7 | Tensorflow utilities for trust region optimization 8 | """ 9 | 10 | def flat_concat(xs): 11 | return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0) 12 | 13 | def flat_grad(f, params): 14 | return flat_concat(tf.gradients(xs=params, ys=f)) 15 | 16 | def hessian_vector_product(f, params): 17 | # for H = grad**2 f, compute Hx 18 | g = flat_grad(f, params) 19 | x = tf.placeholder(tf.float32, shape=g.shape) 20 | return x, flat_grad(tf.reduce_sum(g*x), params) 21 | 22 | def assign_params_from_flat(x, params): 23 | flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars 24 | splits = tf.split(x, [flat_size(p) for p in params]) 25 | new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)] 26 | return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)]) 27 | 28 | 29 | """ 30 | Conjugate gradient 31 | """ 32 | 33 | def cg(Ax, b, cg_iters=10): 34 | x = np.zeros_like(b) 35 | r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start. 36 | p = r.copy() 37 | r_dot_old = np.dot(r,r) 38 | for _ in range(cg_iters): 39 | z = Ax(p) 40 | alpha = r_dot_old / (np.dot(p, z) + EPS) 41 | x += alpha * p 42 | r -= alpha * z 43 | r_dot_new = np.dot(r,r) 44 | p = r + (r_dot_new / r_dot_old) * p 45 | r_dot_old = r_dot_new 46 | return x -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/pg/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.signal 3 | 4 | EPS = 1e-8 5 | 6 | def combined_shape(length, shape=None): 7 | if shape is None: 8 | return (length,) 9 | return (length, shape) if np.isscalar(shape) else (length, *shape) 10 | 11 | def keys_as_sorted_list(dict): 12 | return sorted(list(dict.keys())) 13 | 14 | def values_as_sorted_list(dict): 15 | return [dict[k] for k in keys_as_sorted_list(dict)] 16 | 17 | def discount_cumsum(x, discount): 18 | """ 19 | magic from rllab for computing discounted cumulative sums of vectors. 20 | 21 | input: 22 | vector x, 23 | [x0, 24 | x1, 25 | x2] 26 | 27 | output: 28 | [x0 + discount * x1 + discount^2 * x2, 29 | x1 + discount * x2, 30 | x2] 31 | """ 32 | return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1] 33 | -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/utils/load_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import joblib 4 | import os 5 | import os.path as osp 6 | import tensorflow as tf 7 | from safe_rl.utils.logx import restore_tf_graph 8 | 9 | def load_policy(fpath, itr='last', deterministic=False): 10 | 11 | # handle which epoch to load from 12 | if itr=='last': 13 | saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11] 14 | itr = '%d'%max(saves) if len(saves) > 0 else '' 15 | else: 16 | itr = '%d'%itr 17 | 18 | # load the things! 19 | sess = tf.Session(graph=tf.Graph()) 20 | model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr)) 21 | 22 | # get the correct op for executing actions 23 | if deterministic and 'mu' in model.keys(): 24 | # 'deterministic' is only a valid option for SAC policies 25 | print('Using deterministic action op.') 26 | action_op = model['mu'] 27 | else: 28 | print('Using default action op.') 29 | action_op = model['pi'] 30 | 31 | # make function for producing an action given a single state 32 | get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0] 33 | 34 | # try to load environment from save 35 | # (sometimes this will fail because the environment could not be pickled) 36 | try: 37 | state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl')) 38 | env = state['env'] 39 | except: 40 | env = None 41 | 42 | return env, get_action, sess -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/utils/readme.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo). -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/utils/run_utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os.path as osp 3 | 4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data') 5 | 6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True): 7 | 8 | # Make base path 9 | ymd_time = time.strftime("%Y-%m-%d_") if datestamp else '' 10 | relpath = ''.join([ymd_time, exp_name]) 11 | 12 | if seed is not None: 13 | # Make a seed-specific subfolder in the experiment directory. 14 | if datestamp: 15 | hms_time = time.strftime("%Y-%m-%d_%H-%M-%S") 16 | subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)]) 17 | else: 18 | subfolder = ''.join([exp_name, '_s', str(seed)]) 19 | relpath = osp.join(relpath, subfolder) 20 | 21 | data_dir = data_dir or DEFAULT_DATA_DIR 22 | logger_kwargs = dict(output_dir=osp.join(data_dir, relpath), 23 | exp_name=exp_name) 24 | return logger_kwargs -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/safe_rl/utils/serialization_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def convert_json(obj): 4 | """ Convert obj to a version which can be serialized with JSON. """ 5 | if is_json_serializable(obj): 6 | return obj 7 | else: 8 | if isinstance(obj, dict): 9 | return {convert_json(k): convert_json(v) 10 | for k,v in obj.items()} 11 | 12 | elif isinstance(obj, tuple): 13 | return (convert_json(x) for x in obj) 14 | 15 | elif isinstance(obj, list): 16 | return [convert_json(x) for x in obj] 17 | 18 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__): 19 | return convert_json(obj.__name__) 20 | 21 | elif hasattr(obj,'__dict__') and obj.__dict__: 22 | obj_dict = {convert_json(k): convert_json(v) 23 | for k,v in obj.__dict__.items()} 24 | return {str(obj): obj_dict} 25 | 26 | return str(obj) 27 | 28 | def is_json_serializable(v): 29 | try: 30 | json.dumps(v) 31 | return True 32 | except: 33 | return False -------------------------------------------------------------------------------- /Safe-RL/safety-starter-agents/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import sys 5 | 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \ 7 | "Safety Starter Agents is designed to work with Python 3.6 and greater. " \ 8 | + "Please install it before proceeding." 9 | 10 | setup( 11 | name='safe_rl', 12 | packages=['safe_rl'], 13 | install_requires=[ 14 | 'gym~=0.15.3', 15 | 'joblib==0.14.0', 16 | 'matplotlib==3.1.1', 17 | 'mpi4py==3.0.2', 18 | 'mujoco_py==2.0.2.7', 19 | 'numpy~=1.17.4', 20 | 'seaborn==0.8.1', 21 | 'tensorflow==1.13.1', 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /Safe-RL/vertex-net/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /Safe-RL/vertex-net/README.md: -------------------------------------------------------------------------------- 1 | # vertex-net 2 | This repository contains source code of the paper: 3 | 4 | Liyuan Zheng, Yuanyuan Shi, Lillian J. Ratliff, and Baosen Zhang, "Safe Reinforcement Learning of Control-Affine Systems with Vertex Networks", 5 | [[ArXiv]](https://arxiv.org/abs/2003.09488) 6 | -------------------------------------------------------------------------------- /Safe-RL/vertex-net/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/__init__.py -------------------------------------------------------------------------------- /Safe-RL/vertex-net/algos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/algos/__init__.py -------------------------------------------------------------------------------- /Safe-RL/vertex-net/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/envs/__init__.py -------------------------------------------------------------------------------- /Safe-RL/vertex-net/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/nets/__init__.py -------------------------------------------------------------------------------- /Safe-RL/vertex-net/nets/policy_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | use_cuda = torch.cuda.is_available() 6 | device = torch.device("cuda" if use_cuda else "cpu") 7 | 8 | 9 | class PolicyNetwork(nn.Module): 10 | def __init__(self, env, obs_dim, action_dim, hidden_dim, init_w=3e-3): 11 | super(PolicyNetwork, self).__init__() 12 | 13 | self.env = env 14 | 15 | self.linear1 = nn.Linear(obs_dim, hidden_dim) 16 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 17 | self.linear3 = nn.Linear(hidden_dim, action_dim) 18 | 19 | self.linear3.weight.data.uniform_(-init_w, init_w) 20 | self.linear3.bias.data.uniform_(-init_w, init_w) 21 | 22 | def forward(self, state): 23 | x = F.relu(self.linear1(state)) 24 | x = F.relu(self.linear2(x)) 25 | x = self.env.max_action * torch.tanh(self.linear3(x)) 26 | return x 27 | 28 | def get_action(self, state): 29 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 30 | action = self.forward(state) 31 | return action.detach().cpu().numpy()[0] -------------------------------------------------------------------------------- /Safe-RL/vertex-net/nets/value_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ValueNetwork(nn.Module): 7 | def __init__(self, obs_dim, action_dim, hidden_dim, init_w=3e-3): 8 | super(ValueNetwork, self).__init__() 9 | 10 | self.linear1 = nn.Linear(obs_dim + action_dim, hidden_dim) 11 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 12 | self.linear3 = nn.Linear(hidden_dim, 1) 13 | 14 | self.linear3.weight.data.uniform_(-init_w, init_w) 15 | self.linear3.bias.data.uniform_(-init_w, init_w) 16 | 17 | def forward(self, state, action): 18 | x = torch.cat((state, action), dim=1) 19 | x = F.relu(self.linear1(x)) 20 | x = F.relu(self.linear2(x)) 21 | x = self.linear3(x) 22 | return x -------------------------------------------------------------------------------- /Safe-RL/vertex-net/nets/vertex_policy_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | use_cuda = torch.cuda.is_available() 6 | device = torch.device("cuda" if use_cuda else "cpu") 7 | 8 | 9 | class VertexPolicyNetwork(nn.Module): 10 | def __init__(self, env, obs_dim, num_vertex, hidden_dim, init_w=3e-3): 11 | super(VertexPolicyNetwork, self).__init__() 12 | 13 | self.env = env 14 | 15 | self.linear1 = nn.Linear(obs_dim, hidden_dim) 16 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 17 | self.linear3 = nn.Linear(hidden_dim, num_vertex) 18 | 19 | self.linear3.weight.data.uniform_(-init_w, init_w) 20 | self.linear3.bias.data.uniform_(-init_w, init_w) 21 | 22 | def forward(self, state): 23 | x = F.relu(self.linear1(state)) 24 | x = F.relu(self.linear2(x)) 25 | x = F.softmax(self.linear3(x), dim=1) 26 | action_vertex = self.env.get_action_vertex(state.numpy()) 27 | action_vertex = torch.FloatTensor(action_vertex).to(device) 28 | x = torch.bmm(x.unsqueeze(1), action_vertex).squeeze(1) 29 | # x = torch.sum(x * action_vertex, dim=1).unsqueeze(1) 30 | return x 31 | 32 | def get_action(self, state): 33 | state = torch.FloatTensor(state).unsqueeze(0).to(device) 34 | action = self.forward(state) 35 | return action.detach().cpu().numpy()[0] -------------------------------------------------------------------------------- /Safe-RL/vertex-net/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/utils/__init__.py -------------------------------------------------------------------------------- /Safe-RL/vertex-net/utils/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | 5 | class ReplayBuffer: 6 | def __init__(self, capacity): 7 | self.capacity = capacity 8 | self.buffer = [] 9 | self.position = 0 10 | 11 | def push(self, state, action, reward, next_state, done): 12 | if len(self.buffer) < self.capacity: 13 | self.buffer.append(None) 14 | self.buffer[self.position] = (state, action, reward, next_state, done) 15 | self.position = (self.position + 1) % self.capacity 16 | 17 | def sample(self, batch_size): 18 | batch = random.sample(self.buffer, batch_size) 19 | state, action, reward, next_state, done = map(np.stack, zip(*batch)) 20 | return state, action, reward, next_state, done 21 | 22 | def __len__(self): 23 | return len(self.buffer) --------------------------------------------------------------------------------