├── README.md
└── Safe-RL
    ├── AlwaysSafe
        ├── .gitignore
        ├── LICENSE
        ├── Pipfile
        ├── README.md
        ├── agents
        │   ├── __init__.py
        │   ├── abs_opt_cmdp.py
        │   └── opt_cmdp.py
        ├── planners
        │   ├── __init__.py
        │   ├── abs_lp_optimistic.py
        │   ├── lp.py
        │   └── lp_optimistic.py
        ├── scripts
        │   ├── .gitignore
        │   ├── __init__.py
        │   ├── cliff_walking.py
        │   ├── factored.py
        │   └── simple.py
        ├── tests
        │   ├── __init__.py
        │   ├── test_abs_opt_cmdp.py
        │   ├── test_lp_agent.py
        │   ├── test_lp_optimistic.py
        │   ├── test_lp_optimistic_abs.py
        │   ├── test_opt_cmdp.py
        │   └── test_training.py
        └── util
        │   ├── __init__.py
        │   ├── grb.py
        │   ├── mdp.py
        │   └── training.py
    ├── AutomotiveSafeRL
        ├── .gitignore
        ├── Project.toml
        ├── README.md
        ├── RNNFiltering
        │   ├── RNNFiltering.jl
        │   ├── bagging_training.jl
        │   ├── data_generation.jl
        │   ├── datagen.sh
        │   ├── generate_data.sh
        │   ├── generate_dataset.jl
        │   ├── load_model_weights.jl
        │   ├── model_loading.jl
        │   ├── scp_model.sh
        │   ├── train.sh
        │   ├── train_single.sh
        │   ├── train_tracking.jl
        │   └── visualize_prediction.ipynb
        ├── evaluation
        │   ├── evaluation.jl
        │   ├── evaluation_functions.jl
        │   ├── evaluation_script.sh
        │   ├── helpers.jl
        │   └── parallel_evaluation.jl
        ├── notebooks
        │   ├── baseline.ipynb
        │   ├── baseline_policy.ipynb
        │   ├── car_mdp.ipynb
        │   ├── crosswalk.ipynb
        │   ├── decomposition.ipynb
        │   ├── decomposition2.ipynb
        │   ├── evaluation_scenarios.ipynb
        │   ├── graphs.ipynb
        │   ├── interactive_evaluation.ipynb
        │   ├── joint_mask.ipynb
        │   ├── joint_problem.ipynb
        │   ├── ped_mdp.ipynb
        │   ├── pedcar_mdp.ipynb
        │   ├── plot_results.ipynb
        │   ├── plots.ipynb
        │   ├── profiling.ipynb
        │   ├── qmdp_approximation.ipynb
        │   ├── test.ipynb
        │   └── tracking.ipynb
        ├── old_scripts
        │   ├── accepting_states.jl
        │   ├── baseline_script.jl
        │   ├── carmdp_product.jl
        │   ├── carmdp_script.jl
        │   ├── carmdp_vi_until.jl
        │   ├── evaluation_script.jl
        │   ├── fast_pedcar_vi.jl
        │   ├── joint_eval.jl
        │   ├── jointmdp_script.jl
        │   ├── pedcar_local_vi.jl
        │   ├── pedcar_script.jl
        │   ├── pedcar_sync.jl
        │   ├── pedcar_vi.jl
        │   ├── pedcar_vi_benchmark.jl
        │   ├── pedcar_vi_eval.jl
        │   ├── pedmdp_local_vi.jl
        │   ├── pedmdp_script.jl
        │   └── pedmdp_vi_until.jl
        ├── src
        │   ├── baseline_policy.jl
        │   ├── decomposed_tracking.jl
        │   ├── decomposition.jl
        │   ├── masked_dqn.jl
        │   ├── masking.jl
        │   ├── qmdp_approximation.jl
        │   ├── render_helpers.jl
        │   └── util.jl
        ├── test
        │   ├── runtests.jl
        │   ├── test_car_mdp.jl
        │   ├── test_discretization.jl
        │   ├── test_interpolation.jl
        │   └── test_pedestrian_mdp.jl
        └── training_scripts
        │   ├── carmdp_training.jl
        │   ├── dqn_jointeval.jl
        │   ├── jointmdp_training.jl
        │   ├── pedcar_dqn.jl
        │   ├── pedcar_eval.jl
        │   ├── pedcar_training.jl
        │   ├── pedcar_vi.jl
        │   ├── pedmdp_training.jl
        │   ├── process_utility.jl
        │   ├── sparse_vi.jl
        │   ├── training.sh
        │   ├── training.tex
        │   └── until_dqn.jl
    ├── Constraint_RL_MPC
        ├── .idea
        │   ├── Constraint_RL_MPC.iml
        │   ├── misc.xml
        │   ├── modules.xml
        │   ├── vcs.xml
        │   └── workspace.xml
        ├── Abgabe
        │   ├── Buffer
        │   │   ├── ReplayBuffer.py
        │   │   └── __pycache__
        │   │   │   └── ReplayBuffer.cpython-35.pyc
        │   ├── Disturbances
        │   │   ├── external_disturbances_old.mat
        │   │   ├── external_disturbances_randn.mat
        │   │   └── external_disturbances_uniform.mat
        │   ├── Model
        │   │   ├── Linear_Env.py
        │   │   └── __pycache__
        │   │   │   └── Linear_Env.cpython-35.pyc
        │   ├── Neural_Network
        │   │   ├── Actor_Model.py
        │   │   ├── Critic_Model.py
        │   │   ├── NeuralNetwork.py
        │   │   └── __pycache__
        │   │   │   ├── Actor_Model.cpython-35.pyc
        │   │   │   ├── Critic_Model.cpython-35.pyc
        │   │   │   └── NeuralNetwork.cpython-35.pyc
        │   ├── Normalize
        │   │   ├── MinMax.py
        │   │   └── __pycache__
        │   │   │   └── MinMax.cpython-35.pyc
        │   ├── Pre_training
        │   │   ├── Immediate_constraint_functions.py
        │   │   ├── Test_immediate_constraint_functions.py
        │   │   ├── __pycache__
        │   │   │   └── constraints.cpython-35.pyc
        │   │   ├── constraints.py
        │   │   ├── constraints_test_E_low_weights.h5f
        │   │   ├── constraints_test_E_up_weights.h5f
        │   │   ├── constraints_test_T_low_weights.h5f
        │   │   ├── constraints_test_T_up_weights.h5f
        │   │   └── readme.txt
        │   ├── Training_MPC
        │   │   ├── MPC.py
        │   │   ├── Main_MPC.py
        │   │   ├── Main_System_Identification.py
        │   │   ├── SI_MPC_weights.h5f
        │   │   ├── SI_MinMax.npy
        │   │   ├── __pycache__
        │   │   │   └── MPC.cpython-35.pyc
        │   │   └── readme.txt
        │   └── Training_RL
        │   │   ├── DDPG.py
        │   │   ├── Main_RL.py
        │   │   ├── __pycache__
        │   │       └── DDPG.cpython-35.pyc
        │   │   ├── ddpg_Test1_5_weights_actor.h5f
        │   │   ├── ddpg_Test1_5_weights_critic.h5f
        │   │   ├── ddpg_Test2_5_weights_actor.h5f
        │   │   ├── ddpg_Test2_5_weights_critic.h5f
        │   │   ├── ddpg_Test3_5_weights_actor.h5f
        │   │   ├── ddpg_Test3_5_weights_critic.h5f
        │   │   └── readme.txt
        └── README.md
    ├── LeaveNoTrace
        ├── .gitignore
        ├── .gitmodules
        ├── CONTRIBUTING.md
        ├── LICENSE
        ├── README.md
        ├── coach_util.py
        ├── demo.py
        ├── env_util.py
        ├── envs
        │   ├── __init__.py
        │   ├── assets
        │   │   ├── cliff_cheetah.xml
        │   │   ├── cliff_walker.xml
        │   │   ├── peg_insertion.xml
        │   │   └── pusher.xml
        │   ├── cliff_envs.py
        │   ├── frozen_lake.py
        │   ├── hopper.py
        │   ├── peg_insertion.py
        │   └── pusher.py
        ├── lnt.py
        └── plot.png
    ├── PCPO
        └── iclr_2020_code_submission.zip
    ├── RL-Safety-Algorithms
        ├── LICENSE
        ├── README.md
        ├── experiments
        │   ├── benchmark_circle_tasks.py
        │   ├── benchmark_gather_tasks.py
        │   ├── benchmark_reach_tasks.py
        │   ├── benchmark_run_tasks.py
        │   └── safety_settings.py
        ├── rl_safety_algorithms
        │   ├── __init__.py
        │   ├── algs
        │   │   ├── __init__.py
        │   │   ├── core.py
        │   │   ├── cpo
        │   │   │   ├── __init__.py
        │   │   │   ├── cpo.py
        │   │   │   └── defaults.py
        │   │   ├── iwpg
        │   │   │   ├── __init__.py
        │   │   │   ├── defaults.py
        │   │   │   └── iwpg.py
        │   │   ├── lag-trpo
        │   │   │   ├── __init__.py
        │   │   │   ├── defaults.py
        │   │   │   └── lag-trpo.py
        │   │   ├── npg
        │   │   │   ├── __init__.py
        │   │   │   ├── defaults.py
        │   │   │   └── npg.py
        │   │   ├── pdo
        │   │   │   ├── __init__.py
        │   │   │   ├── defaults.py
        │   │   │   └── pdo.py
        │   │   ├── trpo
        │   │   │   ├── __init__.py
        │   │   │   ├── defaults.py
        │   │   │   └── trpo.py
        │   │   ├── utils.py
        │   │   └── vtrace.py
        │   ├── benchmark.py
        │   ├── common
        │   │   ├── __init__.py
        │   │   ├── experiment_analysis.py
        │   │   ├── loggers.py
        │   │   ├── model.py
        │   │   ├── mpi_tools.py
        │   │   ├── multi_processing_utils.py
        │   │   ├── online_mean_std.py
        │   │   ├── trainer.py
        │   │   └── utils.py
        │   ├── play.py
        │   └── train.py
        ├── setup.py
        └── tests
        │   ├── test_algs_mpi.py
        │   ├── test_algs_single_thread.py
        │   ├── test_gae.py
        │   ├── test_mean_std.py
        │   ├── test_mean_std_mpi.py
        │   └── test_trust_region_utils.py
    ├── Safe-MBPO
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── config
        │   ├── ant.json
        │   ├── cheetah-no-flip.json
        │   ├── hopper.json
        │   └── humanoid.json
        ├── main.py
        ├── requirements.txt
        └── src
        │   ├── __init__.py
        │   ├── checkpoint.py
        │   ├── cli.py
        │   ├── config.py
        │   ├── defaults.py
        │   ├── dynamics.py
        │   ├── log.py
        │   ├── normalization.py
        │   ├── policy.py
        │   ├── sampling.py
        │   ├── shared.py
        │   ├── smbpo.py
        │   ├── squashed_gaussian.py
        │   ├── ssac.py
        │   ├── torch_util.py
        │   ├── train.py
        │   └── util.py
    ├── Safe-RL-Benchmark
        ├── .dockerignore
        ├── .gitignore
        ├── .travis.yml
        ├── LICENSE
        ├── Makefile
        ├── README.rst
        ├── SafeRLBench
        │   ├── __init__.py
        │   ├── algo
        │   │   ├── README.rst
        │   │   ├── __init__.py
        │   │   ├── a3c.py
        │   │   ├── policygradient.py
        │   │   ├── q_learning.py
        │   │   ├── safeopt.py
        │   │   └── test.py
        │   ├── base.py
        │   ├── bench.py
        │   ├── configuration.py
        │   ├── envs
        │   │   ├── README.rst
        │   │   ├── __init__.py
        │   │   ├── _quadrocopter
        │   │   │   ├── __init__.py
        │   │   │   ├── quadrocopter_classes.py
        │   │   │   ├── quadrotor_dynamics.py
        │   │   │   ├── quaternions.py
        │   │   │   └── transformations.py
        │   │   ├── general_mountaincar.py
        │   │   ├── gym_wrap.py
        │   │   ├── linear_car.py
        │   │   ├── mdp.py
        │   │   ├── quadrocopter.py
        │   │   └── test.py
        │   ├── error.py
        │   ├── measure.py
        │   ├── monitor.py
        │   ├── policy
        │   │   ├── __init__.py
        │   │   ├── controller.py
        │   │   ├── linear_policy.py
        │   │   ├── neural_network.py
        │   │   └── test.py
        │   ├── spaces
        │   │   ├── __init__.py
        │   │   ├── bounded_space.py
        │   │   ├── discrete_space.py
        │   │   ├── rd_space.py
        │   │   └── test.py
        │   └── test
        │   │   ├── test_bench.py
        │   │   ├── test_configuration.py
        │   │   ├── test_integration.py
        │   │   └── test_measure.py
        ├── docs
        │   ├── Makefile
        │   ├── algorithm.rst
        │   ├── api
        │   │   ├── algo.rst
        │   │   ├── bench.rst
        │   │   ├── envs.rst
        │   │   ├── measure.rst
        │   │   ├── misc.rst
        │   │   ├── policy.rst
        │   │   ├── spaces.rst
        │   │   └── srb.rst
        │   ├── conf.py
        │   ├── environment.rst
        │   ├── index.rst
        │   └── toc.rst
        ├── examples
        │   ├── GettingStarted.ipynb
        │   └── SafeOpt.ipynb
        ├── misc
        │   ├── Dockerfile.python2
        │   └── Dockerfile.python3
        ├── requirements.txt
        ├── requirements_dev.txt
        ├── setup.py
        ├── test_code.sh
        └── tox.ini
    ├── Safe-Reinforcement-Learning
        └── README.md
    ├── Safe_reinforcement_learning
        ├── README.md
        ├── Safe_RL_LQR_experiment.m
        ├── iterate_calculate.m
        ├── poster.pdf
        ├── quadconstr.m
        ├── quadhess.m
        └── quadobj.m
    ├── Shield-Hybrid-Systems
        ├── .gitignore
        ├── Manifest.toml
        ├── Project.toml
        ├── README.md
        ├── Shared Code
        │   ├── BBBarbaricReachabilityFunction.jl
        │   ├── BBRigorousReachabilityFunction.jl
        │   ├── BBShieldSynthesis.jl
        │   ├── BBSquares.jl
        │   ├── Ball.jl
        │   ├── CCBarbaricReachabilityFunction.jl
        │   ├── Cruise.jl
        │   ├── DC-DC Converter.jl
        │   ├── DCShielding.jl
        │   ├── ExperimentUtilities.jl
        │   ├── FlatUI.jl
        │   ├── Get libbbshield.jl
        │   ├── Get libccshield.jl
        │   ├── Get libdcshield.jl
        │   ├── Get libopshield.jl
        │   ├── Get librwshield.jl
        │   ├── OPShielding.jl
        │   ├── OilPump.jl
        │   ├── PlotsDefaults.jl
        │   ├── RWShieldSynthesis.jl
        │   ├── RWSquares.jl
        │   ├── RandomWalk.jl
        │   ├── ShieldSynthesis.jl
        │   ├── Squares.jl
        │   ├── libbbshield
        │   │   ├── shield.c
        │   │   └── shield_dump (sample).c
        │   ├── libccshield
        │   │   ├── postshield.c
        │   │   ├── preshield.c
        │   │   └── shield_dump (sample).c
        │   ├── libdcshield
        │   │   └── shield.c
        │   ├── libopshield
        │   │   ├── shield.c
        │   │   └── shield_dump (sample).c
        │   └── librwshield
        │   │   ├── shield.c
        │   │   └── shield_dump (sample).c
        ├── fig-BBGranularityCost
        │   ├── Blueprints
        │   │   ├── BB__Shielded.xml
        │   │   └── TrainSaveEvaluateSingle.q
        │   ├── ExtractQueryResults.jl
        │   ├── Figure from CSV.jl
        │   ├── Get libbbshield.jl
        │   ├── Run Experiment.jl
        │   └── Synthesize Set of Shields.jl
        ├── fig-BBShieldRobustness
        │   ├── Check Robustness of Shields.jl
        │   ├── Get libbbshield.jl
        │   ├── Run Experiment.jl
        │   └── StatisticalChecking.jl
        ├── fig-BBShieldingResultsGroup
        │   ├── All Queries.py
        │   ├── Blueprints
        │   │   ├── BB__PostShielded.xml
        │   │   ├── BB__PreShielded.xml
        │   │   ├── BB__ShieldedLayabout.xml
        │   │   ├── BB__Unshielded.xml
        │   │   ├── PostShielded.q
        │   │   ├── PreShielded.q
        │   │   ├── ShieldedLayabout.q
        │   │   └── UnShielded.q
        │   ├── Example.png
        │   ├── ReadMe.md
        │   ├── ReadResults.jl
        │   └── Run Experiment.jl
        ├── fig-BarbaricMethodAccuracy
        │   ├── Example.png
        │   ├── Reliability of Barbaric Method.jl
        │   └── Run Experiment.jl
        ├── fig-CCShieldingResultsGroup
        │   ├── All Queries.py
        │   ├── Blueprints
        │   │   ├── CC__PostShieldedDeterministic.xml
        │   │   ├── CC__PostShieldedNondeterministic.xml
        │   │   ├── CC__Shielded.xml
        │   │   ├── CC__Unshielded.xml
        │   │   ├── LoadEvaluate.q
        │   │   ├── MinimizeCostEvaluate.q
        │   │   ├── MinimizeInterventionsEvaluate.q
        │   │   ├── NoStrategyEvaluate.q
        │   │   ├── TrainSaveEvaluate.q
        │   │   └── TrainSaveEvaluateSingle.q
        │   ├── Example.png
        │   ├── PostShield Strategy.jl
        │   ├── ReadMe.md
        │   ├── ReadResults.jl
        │   └── Run Experiment.jl
        ├── fig-DCShieldingResultsGroup
        │   ├── All Queries.py
        │   ├── Blueprints
        │   │   ├── DC__PostShielded.xml
        │   │   ├── DC__PreShielded.xml
        │   │   ├── DC__ShieldedLayabout.xml
        │   │   ├── DC__Unshielded.xml
        │   │   ├── PostShielded.q
        │   │   ├── PreShielded.q
        │   │   ├── ShieldedLayabout.q
        │   │   └── Unshielded.q
        │   ├── ReadMe.md
        │   ├── ReadResults.jl
        │   └── Run Experiment.jl
        ├── fig-DifferenceRigorousBarbaric
        │   ├── Example.png
        │   └── Run Experiment.jl
        ├── fig-NoRecovery
        │   ├── BB No Recovery.jl
        │   ├── Example.png
        │   └── Run Experiment.jl
        ├── fig-OPShieldingResultsGroup
        │   ├── All Queries.py
        │   ├── Blueprints
        │   │   ├── OP__PostShielded.xml
        │   │   ├── OP__PreShielded.xml
        │   │   ├── OP__ShieldedLayabout.xml
        │   │   └── OP__Unshielded.xml
        │   ├── Example.png
        │   ├── OPStrategyVisualisation.jl
        │   ├── ReadMe.md
        │   ├── ReadResults.jl
        │   └── Run Experiment.jl
        ├── fig-RWShieldingResultsGroup
        │   ├── All Queries.py
        │   ├── Blueprints
        │   │   ├── PostShielded.q
        │   │   ├── PreShielded.q
        │   │   ├── RW__PostShielded.xml
        │   │   ├── RW__PreShielded.xml
        │   │   ├── RW__ShieldedLayabout.xml
        │   │   ├── RW__Unshielded.xml
        │   │   ├── ShieldedLayabout.q
        │   │   └── Unshielded.q
        │   ├── Example.png
        │   ├── RandomWalk Shield.jl
        │   ├── ReadMe.md
        │   ├── ReadResults.jl
        │   └── Run Experiment.jl
        ├── run_all.sh
        ├── tab-BBSynthesis
        │   ├── Blueprints
        │   │   ├── BB__PreShielded.xml
        │   │   └── TrainSaveCheckSafety.q
        │   ├── CheckSafetyOfPreshielded.jl
        │   ├── Example.png
        │   ├── ReadMe.md
        │   ├── Run Experiment.jl
        │   ├── Statistical Checking of Shield.jl
        │   ├── Synthesize Set of Shields.jl
        │   └── Table from CSVs.jl
        ├── tab-CCSynthesis
        │   ├── Blueprints
        │   │   ├── CC__PreShielded.xml
        │   │   └── TrainSaveCheckSafety.q
        │   ├── CC Statistical Checking of Shield.jl
        │   ├── CC Synthesize Set of Shields.jl
        │   ├── CheckSafetyOfPreshielded.jl
        │   ├── Example.png
        │   ├── ReadMe.md
        │   ├── Run Experiment.jl
        │   └── Table from CSVs.jl
        ├── tab-DCSynthesis
        │   ├── Blueprints
        │   │   ├── DC__PreShielded.xml
        │   │   └── TrainSaveCheckSafety.q
        │   ├── CheckSafetyOfPreshielded.jl
        │   ├── DC Statistical Checking of Shield.jl
        │   ├── DC Synthesize Set of Shields.jl
        │   ├── DCShield.jl
        │   ├── Run Experiment.jl
        │   └── Table from CSVs.jl
        ├── tab-OPSynthesis
        │   ├── Blueprints
        │   │   ├── OP__PreShielded.xml
        │   │   └── TrainSaveCheckSafety.q
        │   ├── CheckSafetyOfPreshielded.jl
        │   ├── OP Statistical Checking of Shield.jl
        │   ├── OP Synthesize Set of Shields.jl
        │   ├── OPShield.jl
        │   ├── Run Experiment.jl
        │   └── Table from CSVs.jl
        └── tab-RWSynthesis
        │   ├── Blueprints
        │       ├── RW__PreShielded.xml
        │       └── TrainSaveCheckSafety.q
        │   ├── CheckSafetyOfPreshielded.jl
        │   ├── Example.png
        │   ├── RW Statistical Checking of Shield.jl
        │   ├── RW Synthesize Set of Shields.jl
        │   ├── ReadMe.md
        │   ├── Run Experiment.jl
        │   └── Table from CSVs.jl
    ├── safe-mbrl
        ├── .gitignore
        ├── baseline
        │   ├── LICENSE
        │   ├── README.md
        │   ├── safe_rl
        │   │   ├── __init__.py
        │   │   ├── pg
        │   │   │   ├── agents.py
        │   │   │   ├── algos.py
        │   │   │   ├── buffer.py
        │   │   │   ├── network.py
        │   │   │   ├── run_agent.py
        │   │   │   ├── trust_region.py
        │   │   │   └── utils.py
        │   │   ├── sac
        │   │   │   ├── __init__.py
        │   │   │   └── sac.py
        │   │   └── utils
        │   │   │   ├── load_utils.py
        │   │   │   ├── logx.py
        │   │   │   ├── mpi_tf.py
        │   │   │   ├── mpi_tools.py
        │   │   │   ├── readme.md
        │   │   │   ├── run_utils.py
        │   │   │   └── serialization_utils.py
        │   ├── scripts
        │   │   ├── experiment.py
        │   │   ├── plot.py
        │   │   └── test_policy.py
        │   └── setup.py
        ├── config.yml
        ├── data
        │   ├── cg1
        │   │   ├── cpo
        │   │   │   └── cpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-cem
        │   │   │   ├── ensemble-cem_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-cem_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-cem_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-random
        │   │   │   ├── ensemble-random_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-random_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-random_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-rce
        │   │   │   ├── ensemble-rce_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-rce_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-rce_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── trpo-Lagrangian
        │   │   │   └── trpo-Lagrangian
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── trpo
        │   │   │   └── trpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   └── weights
        │   │   │   ├── config.yml
        │   │   │   └── progress.txt
        │   ├── cg2
        │   │   ├── cpo
        │   │   │   └── cpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-cem
        │   │   │   ├── ensemble-cem_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-cem_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-cem_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-random
        │   │   │   ├── ensemble-random_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-random_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-random_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-rce
        │   │   │   ├── ensemble-rce_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-rce_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-rce_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── trpo-Lagrangian
        │   │   │   └── trpo-Lagrangian
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── trpo
        │   │   │   └── trpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   └── weights
        │   │   │   ├── config.yml
        │   │   │   └── progress.txt
        │   ├── figures
        │   │   ├── TestFigure3.png
        │   │   ├── pg1-Cost.png
        │   │   ├── pg1-Reward.png
        │   │   ├── pg2-Cost.png
        │   │   └── pg2-Reward.png
        │   ├── pg1
        │   │   ├── cpo
        │   │   │   └── cpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-cem
        │   │   │   ├── ensemble-cem_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-cem_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-cem_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-random
        │   │   │   ├── ensemble-random_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-random_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-random_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── ensemble-rce
        │   │   │   ├── ensemble-rce_s0
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   ├── ensemble-rce_s10
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   │   └── ensemble-rce_s100
        │   │   │   │   ├── config.yml
        │   │   │   │   └── progress.txt
        │   │   ├── fix-dynamic-model-compare-optimizer
        │   │   │   ├── model-ensemble-with-cem
        │   │   │   │   └── model-ensemble-with-cem_s1000
        │   │   │   │   │   ├── config.yml
        │   │   │   │   │   └── progress.txt
        │   │   │   ├── model-ensemble-with-random
        │   │   │   │   └── model-ensemble-with-random_s1000
        │   │   │   │   │   ├── config.yml
        │   │   │   │   │   └── progress.txt
        │   │   │   └── model-ensemble-with-ts
        │   │   │   │   └── model-ensemble-with-ts_s1000
        │   │   │   │       ├── config.yml
        │   │   │   │       └── progress.txt
        │   │   ├── trpo-Lagrangian
        │   │   │   └── trpo-Lagrangian
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   ├── trpo
        │   │   │   └── trpo
        │   │   │   │   ├── config.json
        │   │   │   │   └── progress.txt
        │   │   └── weights
        │   │   │   ├── config.yml
        │   │   │   └── progress.txt
        │   └── pg2
        │   │   ├── cpo
        │   │       ├── a-target10
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── b-target7.5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── c-target5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── d-target2.5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── e-target0.5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       └── f-target0.01
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │   ├── ensemble-cem
        │   │       ├── ensemble-cem_s0
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       ├── ensemble-cem_s10
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       └── ensemble-cem_s100
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │   ├── ensemble-random
        │   │       ├── ensemble-random_s0
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       ├── ensemble-random_s10
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       └── ensemble-random_s100
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │   ├── ensemble-rce
        │   │       ├── ensemble-rce_s0
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       ├── ensemble-rce_s10
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │       └── ensemble-rce_s100
        │   │       │   ├── config.yml
        │   │       │   └── progress.txt
        │   │   ├── trpo
        │   │       └── trpo
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │   ├── trpo_lagrangian
        │   │       ├── a-target10
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── b-target7.5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── c-target5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── d-taget2-5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       ├── f-target0.5
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │       └── g-target0.01
        │   │       │   ├── config.json
        │   │       │   └── progress.txt
        │   │   └── weights
        │   │       ├── config.yml
        │   │       └── progress.txt
        ├── env
        │   ├── LICENSE
        │   ├── README.md
        │   ├── build
        │   │   └── lib
        │   │   │   └── safety_gym
        │   │   │       ├── __init__.py
        │   │   │       └── random_agent.py
        │   ├── dist
        │   │   └── safety_gym-0.0.0-py3.6.egg
        │   ├── safety_gym.png
        │   ├── safety_gym
        │   │   ├── __init__.py
        │   │   ├── bench
        │   │   │   ├── bench_utils.py
        │   │   │   └── characteristic_scores.json
        │   │   ├── envs
        │   │   │   ├── __init__.py
        │   │   │   ├── engine.py
        │   │   │   ├── mujoco.py
        │   │   │   ├── suite-origin.py
        │   │   │   ├── suite.py
        │   │   │   └── world.py
        │   │   ├── random_agent.py
        │   │   ├── test
        │   │   │   ├── test_bench.py
        │   │   │   ├── test_button.py
        │   │   │   ├── test_determinism.py
        │   │   │   ├── test_engine.py
        │   │   │   ├── test_envs.py
        │   │   │   ├── test_goal.py
        │   │   │   └── test_obs.py
        │   │   └── xmls
        │   │   │   ├── README.md
        │   │   │   ├── car-origin.xml
        │   │   │   ├── car.xml
        │   │   │   ├── car_vel.xml
        │   │   │   ├── doggo.xml
        │   │   │   ├── point-origin.xml
        │   │   │   ├── point.xml
        │   │   │   └── rover4We.xml
        │   └── setup.py
        ├── mbrl
        │   ├── .gitignore
        │   ├── __init__.py
        │   ├── controllers
        │   │   ├── __init__.py
        │   │   ├── mpc_controller.py
        │   │   └── safe_mpc_controller.py
        │   ├── models
        │   │   ├── __init__.py
        │   │   ├── base.py
        │   │   ├── constraint_model.py
        │   │   ├── ensemble.py
        │   │   └── model.py
        │   └── optimizers
        │   │   ├── __init__.py
        │   │   ├── cem.py
        │   │   ├── optimizer.py
        │   │   ├── random.py
        │   │   └── rce.py
        ├── media
        │   ├── cg1_random.gif
        │   ├── cg1_rce.gif
        │   ├── cg2_random.gif
        │   ├── cg2_rce.gif
        │   ├── pg1_random.gif
        │   ├── pg1_rce.gif
        │   ├── pg1_trpo.gif
        │   ├── pg1_trpol.gif
        │   ├── pg2_random.gif
        │   ├── pg2_rce.gif
        │   ├── pg2_trpo_10.gif
        │   └── pg2_trpol_10.gif
        ├── readme.md
        ├── requirements.txt
        ├── run.py
        ├── script
        │   ├── count.py
        │   └── plot.py
        └── utils
        │   ├── __init__.py
        │   ├── env_utils.py
        │   ├── logx.py
        │   ├── mpi_pytorch.py
        │   ├── mpi_tools.py
        │   ├── plot.py
        │   ├── run_entrypoint.py
        │   ├── run_utils.py
        │   ├── serialization_utils.py
        │   └── user_config.py
    ├── safeRL
        ├── .gitignore
        ├── .gitmodules
        ├── HCOPE
        │   ├── filter.py
        │   ├── hcope.py
        │   ├── hcope_debug.py
        │   ├── hcope_test.py
        │   └── policies.py
        ├── LICENSE.txt
        ├── README.md
        ├── README.md~
        ├── citation.cff
        ├── importance_sampling
        │   ├── importance_sampling.png
        │   └── importance_sampling.py
        ├── results
        │   ├── IS_dist_+_0.1.png
        │   ├── IS_dist_minus_0.1.png
        │   ├── IS_dist_random.png
        │   ├── IS_variance.png
        │   ├── Result.png
        │   ├── Theorem.png
        │   ├── safe_actions.gif
        │   ├── safe_actions_instability.gif
        │   ├── safety_layer.png
        │   ├── safety_optimization.png
        │   └── safety_signal.png
        └── safe_exploration
        │   ├── filter.py
        │   ├── learn_safety_function.py
        │   ├── logz.py
        │   ├── lqr_env.py
        │   ├── optimizers.py
        │   ├── plotSafetyFuct.py
        │   ├── policies_safe.py
        │   ├── run_policy_contrained.py
        │   ├── shared_noise.py
        │   ├── train_safe_explorer.py
        │   └── utils.py
    ├── safe_learning
        ├── .dockerignore
        ├── .gitignore
        ├── .travis.yml
        ├── Dockerfile.dev
        ├── Dockerfile.python2
        ├── Dockerfile.python3
        ├── LICENSE
        ├── Makefile
        ├── README.rst
        ├── docs
        │   ├── Makefile
        │   ├── _templates
        │   │   └── template.rst
        │   ├── api.rst
        │   ├── conf.py
        │   ├── index.rst
        │   ├── introduction.rst
        │   ├── make.bat
        │   └── requirements.txt
        ├── examples
        │   ├── 1d_example.ipynb
        │   ├── 1d_region_of_attraction_estimate.ipynb
        │   ├── README.rst
        │   ├── adaptive_safety_verification.ipynb
        │   ├── basic_dynamic_programming.ipynb
        │   ├── inverted_pendulum.ipynb
        │   ├── lyapunov_function_learning.ipynb
        │   ├── plotting.py
        │   ├── reinforcement_learning_cartpole.ipynb
        │   ├── reinforcement_learning_pendulum.ipynb
        │   └── utilities.py
        ├── requirements.txt
        ├── requirements_dev.txt
        ├── safe_learning
        │   ├── __init__.py
        │   ├── configuration.py
        │   ├── functions.py
        │   ├── lyapunov.py
        │   ├── reinforcement_learning.py
        │   ├── tests
        │   │   ├── test_functions.py
        │   │   ├── test_lyapunov.py
        │   │   ├── test_rl.py
        │   │   └── test_utilities.py
        │   └── utilities.py
        ├── scripts
        │   ├── jupyter_output.py
        │   └── test_code.sh
        └── setup.py
    ├── safe_near_optimal_mdp
        ├── .gitignore
        ├── GPSG.png
        ├── LICENSE
        ├── README.md
        ├── arguments.py
        ├── data
        │   └── simple
        │   │   └── random_settings.npz
        ├── gp_safety_gym.py
        ├── main_oracle.py
        ├── main_safemdp.py
        ├── main_seo.py
        ├── main_sno_mdp.py
        ├── simple_make_rand_settings.py
        ├── test
        │   └── test_gp_safety_gym.py
        └── utils
        │   ├── mdp_utilities.py
        │   ├── reward_utilities.py
        │   └── safety_utilities.py
    ├── safe_rl_papers
        ├── LICENSE
        └── README.md
    ├── safety-starter-agents
        ├── .gitignore
        ├── LICENSE
        ├── README.md
        ├── safe_rl
        │   ├── __init__.py
        │   ├── pg
        │   │   ├── agents.py
        │   │   ├── algos.py
        │   │   ├── buffer.py
        │   │   ├── network.py
        │   │   ├── run_agent.py
        │   │   ├── trust_region.py
        │   │   └── utils.py
        │   ├── sac
        │   │   ├── __init__.py
        │   │   └── sac.py
        │   └── utils
        │   │   ├── load_utils.py
        │   │   ├── logx.py
        │   │   ├── mpi_tf.py
        │   │   ├── mpi_tools.py
        │   │   ├── readme.md
        │   │   ├── run_utils.py
        │   │   └── serialization_utils.py
        ├── scripts
        │   ├── experiment.py
        │   ├── plot.py
        │   └── test_policy.py
        └── setup.py
    └── vertex-net
        ├── .gitignore
        ├── README.md
        ├── __init__.py
        ├── algos
            ├── __init__.py
            └── ddpy.py
        ├── envs
            ├── __init__.py
            ├── hovercraft.py
            └── pendulum.py
        ├── nets
            ├── __init__.py
            ├── policy_net.py
            ├── value_net.py
            └── vertex_policy_net.py
        ├── run_hovercraft.py
        ├── run_pendulum.py
        └── utils
            ├── __init__.py
            └── replay_buffer.py


/Safe-RL/AlwaysSafe/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .idea
3 | notebooks/
4 | results/
5 | Pipfile.lock
6 | __pycache__
7 | *.pyc
8 | 
9 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Thiago D. Simão
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.python.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | matplotlib = "*"
 8 | pandas = "*"
 9 | tqdm = "*"
10 | gym = "*"
11 | cvxpy = "*"
12 | 
13 | [packages.gym_factored]
14 | git = "git://github.com/tdsimao/gym-factored.git"
15 | editable = true
16 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .opt_cmdp import OptCMDPAgent
2 | from .abs_opt_cmdp import AbsOptCMDPAgent
3 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/planners/__init__.py:
--------------------------------------------------------------------------------
1 | from .lp import LinearProgrammingPlanner
2 | from .lp_optimistic import OptimisticLinearProgrammingPlanner
3 | from .abs_lp_optimistic import AbsOptimisticLinearProgrammingPlanner
4 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/scripts/.gitignore:
--------------------------------------------------------------------------------
1 | sandbox
2 | 


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/scripts/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/tests/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/AlwaysSafe/util/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/AlwaysSafe/util/grb.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from gurobipy import Model, quicksum, GRB, GurobiError
 3 |     GUROBI_FOUND = True
 4 | except ModuleNotFoundError as e:
 5 |     GUROBI_FOUND = False
 6 | 
 7 | 
 8 | def solve_gurobi_lp(model, verbose=False, check_if_infeasible=False):
 9 |     if not verbose:
10 |         model.Params.OutputFlag = 0
11 |     model.optimize()
12 | 
13 |     if model.status == GRB.Status.INF_OR_UNBD:
14 |         # Turn presolve off to determine whether model is infeasible or unbounded
15 |         model.setParam(GRB.Param.Presolve, 0)
16 |         model.optimize()
17 | 
18 |     if model.status == GRB.Status.OPTIMAL:
19 |         # model.write('model.lp')
20 |         # model.write('model.sol')
21 |         if verbose:
22 |             print('Optimal objective: {}'.format(model.objVal))
23 |         return model
24 |     elif model.status == GRB.Status.UNBOUNDED:
25 |         model.write('model_unbounded.lp')
26 |         raise GurobiError(model.status,
27 |                           'Optimization stopped (UNBOUNDED), check the file model_unbounded.lp')
28 |     elif model.status == GRB.Status.INFEASIBLE:
29 |         if check_if_infeasible:
30 |             model.write('model_infeasible.lp')
31 |             model.computeIIS()
32 |             model.write("model.ilp")
33 |             raise GurobiError(model.status,
34 |                               'Optimization stopped (INFEASIBLE), check files model_infeasible.lp and model.ilp')
35 |     return model
36 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/.gitignore:
--------------------------------------------------------------------------------
 1 | *.out
 2 | *.jld
 3 | *.jld2
 4 | **/log*
 5 | **/.ipynb_checkpoints
 6 | *.lab
 7 | *.tra
 8 | *.csv
 9 | *.webm
10 | *.bson
11 | *.hoa


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/datagen.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 generate_dataset.jl --seed=1 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen1.jodhpur.out &
2 | nohup julia1.0 generate_dataset.jl --seed=2 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen2.jodhpur.out &
3 | nohup julia1.0 generate_dataset.jl --seed=3 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen3.jodhpur.out &
4 | nohup julia1.0 generate_dataset.jl --seed=4 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen4.jodhpur.out &
5 | nohup julia1.0 generate_dataset.jl --seed=5 --ntrain=3000 --nval=500 --folder=/scratch/boutonm/ > gen5.jodhpur.out &
6 | 
7 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/generate_data.sh:
--------------------------------------------------------------------------------
1 | nohup julia1.0 generate_dataset.jl --folder=/scratch/boutonm/ --ntrain=3000 --nval=500 --seed=1 > datagen.jodhpur.out &
2 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/model_loading.jl:
--------------------------------------------------------------------------------
 1 | using Flux
 2 | using StaticArrays
 3 | using ProgressMeter
 4 | using POMDPs
 5 | using POMDPToolbox
 6 | using AutomotiveDrivingModels
 7 | using AutomotivePOMDPs
 8 | using AutomotiveSensors
 9 | using PedCar
10 | using BSON: @load
11 | 
12 | mdp = PedCarMDP(pos_res=2.0, vel_res=2., ped_birth=0.7, car_birth=0.7)
13 | pomdp = UrbanPOMDP(env=mdp.env,
14 |                     sensor = GaussianSensor(false_positive_rate=0.05, 
15 |                                             pos_noise = LinearNoise(min_noise=0.5, increase_rate=0.05), 
16 |                                             vel_noise = LinearNoise(min_noise=0.5, increase_rate=0.05)),
17 |                    ego_goal = LaneTag(2, 1),
18 |                    max_cars=1, 
19 |                    max_peds=1, 
20 |                    car_birth=0.7, 
21 |                    ped_birth=0.7, 
22 |                    obstacles=false, # no fixed obstacles
23 |                    lidar=false,
24 |                    ego_start=20,
25 |                    ΔT=0.5)
26 | 
27 | rng = MersenneTwister(1)
28 | policy = RandomPolicy(rng, pomdp, VoidUpdater())
29 | 
30 | 
31 | @load "model_1.bson" model
32 | @load "weights_1.bson" weights
33 | 
34 | @time mean(loss(val_X[i], val_Y[i]) for i=1:length(val_X))
35 | 
36 | function loss(x, y)
37 |     l = mean(Flux.mse.(model.(x), y))
38 |     truncate!(model)
39 |     reset!(model)
40 |     return l
41 | end
42 | 
43 | loss.(val_X, val_Y)
44 | 
45 | xs = Flux.batchseq(val_X)
46 | ys = Flux.batchseq(val_Y)
47 | loss(xs, ys)
48 | 
49 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/scp_model.sh:
--------------------------------------------------------------------------------
1 | scp boutonm@bethpage:/home/boutonm/AutomotiveSafeRL/training_scripts/RNNFiltering/*.bson .
2 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/train.sh:
--------------------------------------------------------------------------------
 1 | nohup julia1.0 bagging_training.jl --resume 10 --seed 10 > nn_1.jodhpur.out &
 2 | nohup julia1.0 bagging_training.jl --resume 20 --seed 20 > nn_2.jodhpur.out &
 3 | nohup julia1.0 bagging_training.jl --resume 30 --seed 30 > nn_3.jodhpur.out &
 4 | nohup julia1.0 bagging_training.jl --resume 40 --seed 40 > nn_4.jodhpur.out &
 5 | nohup julia1.0 bagging_training.jl --resume 50 --seed 50 > nn_5.jodhpur.out &
 6 | #nohup julia1.0 bagging_training.jl  --seed 6 > nn_6.jodhpur.out &
 7 | #nohup julia1.0 bagging_training.jl  --seed 7 > nn_7.jodhpur.out &
 8 | #nohup julia1.0 bagging_training.jl  --seed 8 > nn_8.jodhpur.out &
 9 | #nohup julia1.0 bagging_training.jl  --seed 9 > nn_9.jodhpur.out &
10 | #nohup julia1.0 bagging_training.jl  --seed 10 > nn_10.jodhpur.out &
11 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/RNNFiltering/train_single.sh:
--------------------------------------------------------------------------------
 1 | nohup julia1.0 train_tracking.jl --seed=1 --entity=car > car1.jodhpur.out &
 2 | nohup julia1.0 train_tracking.jl --seed=2 --entity=car > car2.jodhpur.out &
 3 | nohup julia1.0 train_tracking.jl --seed=3 --entity=car > car3.jodhpur.out &
 4 | nohup julia1.0 train_tracking.jl --seed=4 --entity=car > car4.jodhpur.out &
 5 | nohup julia1.0 train_tracking.jl --seed=5 --entity=car > car5.jodhpur.out &
 6 | 
 7 | nohup julia1.0 train_tracking.jl --seed=1 --entity=ped > ped1.jodhpur.out &
 8 | nohup julia1.0 train_tracking.jl --seed=2 --entity=ped > ped2.jodhpur.out &
 9 | nohup julia1.0 train_tracking.jl --seed=3 --entity=ped > ped3.jodhpur.out &
10 | nohup julia1.0 train_tracking.jl --seed=4 --entity=ped > ped4.jodhpur.out &
11 | nohup julia1.0 train_tracking.jl --seed=5 --entity=ped > ped5.jodhpur.out &
12 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_product.jl:
--------------------------------------------------------------------------------
 1 | rng = MersenneTwister(1)
 2 | using AutomotivePOMDPs
 3 | using MDPModelChecking
 4 | using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
 5 | using DiscreteValueIteration
 6 | using ProgressMeter, Parameters, JLD
 7 | 
 8 | params = UrbanParams(nlanes_main=1,
 9 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
10 |                      crosswalk_length =  [14.0, 14., 14.0],
11 |                      crosswalk_width = [4.0, 4.0, 3.1],
12 |                      stop_line = 22.0)
13 | env = UrbanEnv(params=params);
14 |              
15 | mdp = CarMDP(env = env, vel_res=2.0, pos_res=3.0);
16 | 
17 | function MDPModelChecking.labels(mdp::CarMDP, s::CarMDPState)
18 |     if s.crash
19 |         return ["crash"]
20 |     elseif s.ego.posF.s >= get_end(mdp.env.roadway[mdp.ego_goal]) &&
21 |             get_lane(mdp.env.roadway, s.ego).tag == mdp.ego_goal
22 |         return ["goal"]
23 |     else
24 |         return ["!crash", "!goal"]
25 |     end
26 | end
27 | 
28 | property = "!crash U goal" 
29 | 
30 | solver = ModelCheckingSolver(property=property, solver=ValueIterationSolver())
31 | 
32 | policy = solve(solver, mdp, verbose=true)
33 | 
34 | JLD.save("carmdp.jld", "policy", policy)
35 | JLD.save("car_acc_states.jld", "accepting_states", policy.mdp.accepting_states)
36 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/carmdp_vi_until.jl:
--------------------------------------------------------------------------------
 1 | rng = MersenneTwister(1)
 2 | @everywhere begin
 3 |     using AutomotivePOMDPs
 4 |     using MDPModelChecking
 5 |     using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
 6 |     using DiscreteValueIteration
 7 |     using ProgressMeter, Parameters, JLD
 8 | end
 9 | params = UrbanParams(nlanes_main=1,
10 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
11 |                      crosswalk_length =  [14.0, 14., 14.0],
12 |                      crosswalk_width = [4.0, 4.0, 3.1],
13 |                      stop_line = 22.0)
14 | env = UrbanEnv(params=params);
15 | 
16 | mdp = CarMDP(env = env, pos_res=2., vel_res=3., car_birth=0.7)
17 | 
18 | # reachability analysis
19 | mdp.collision_cost = 0.
20 | mdp.γ = 1.
21 | mdp.goal_reward = 1.
22 | 
23 | solver = ParallelValueIterationSolver(n_procs=7)
24 | 
25 | policy = solve(solver, mdp, verbose=true)
26 | JLD.save("car_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy)
27 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/pedcar_vi_benchmark.jl:
--------------------------------------------------------------------------------
 1 | @everywhere begin
 2 |     using POMDPs, POMDPToolbox, DiscreteValueIteration
 3 |     using AutomotivePOMDPs, AutomotiveDrivingModels
 4 | end
 5 | rng = MersenneTwister(1)
 6 | 
 7 | params = UrbanParams(nlanes_main=1,
 8 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
 9 |                      crosswalk_length =  [14.0, 14., 14.0],
10 |                      crosswalk_width = [4.0, 4.0, 3.1],
11 |                      stop_line = 22.0)
12 | env = UrbanEnv(params=params);
13 | 
14 | mdp = PedCarMDP(env=env, pos_res=6.0, vel_res=3.0, ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0))
15 | # reachability analysis
16 | mdp.collision_cost = 0.
17 | mdp.γ = 1.
18 | mdp.goal_reward = 1.
19 | 
20 | solver = ParallelValueIterationSolver(n_procs=8, max_iterations=4, belres=1e-4)
21 | policy = solve(solver, mdp, verbose=true)
22 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/old_scripts/pedmdp_vi_until.jl:
--------------------------------------------------------------------------------
 1 | rng = MersenneTwister(1)
 2 | @everywhere begin
 3 |     using AutomotivePOMDPs
 4 |     using MDPModelChecking
 5 |     using GridInterpolations, StaticArrays, POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
 6 |     using DiscreteValueIteration
 7 |     using ProgressMeter, Parameters, JLD
 8 | end
 9 | params = UrbanParams(nlanes_main=1,
10 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
11 |                      crosswalk_length =  [14.0, 14., 14.0],
12 |                      crosswalk_width = [4.0, 4.0, 3.1],
13 |                      stop_line = 22.0)
14 | env = UrbanEnv(params=params);
15 | 
16 | mdp = PedMDP(env = env, pos_res=1., vel_res=1., ped_birth=0.7, ped_type=VehicleDef(AgentClass.PEDESTRIAN, 1.0, 3.0))
17 | 
18 | # reachability analysis
19 | mdp.collision_cost = 0.
20 | mdp.γ = 1.
21 | mdp.goal_reward = 1.
22 | 
23 | solver = ParallelValueIterationSolver(n_procs=7)
24 | 
25 | policy = solve(solver, mdp, verbose=true)
26 | JLD.save("ped_until.jld", "util", policy.util, "qmat", policy.qmat, "policy", policy.policy)
27 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | using Base.Test
 2 | using Parameters
 3 | #dep
 4 | include("../AutomotivePOMDPs/AutomotivePOMDPs.jl")
 5 | using AutomotivePOMDPs
 6 | using POMDPs, POMDPToolbox, AutoViz, AutomotiveDrivingModels, Reel
 7 | using GridInterpolations, StaticArrays
 8 | include("mdp_models/discretization.jl")
 9 | include("mdp_models/pedestrian_mdp/pomdp_types.jl")
10 | include("mdp_models/pedestrian_mdp/state_space.jl")
11 | 
12 | rng = MersenneTwister(1)
13 | 
14 | include("test_discretization.jl")
15 | include("test_pedestrian_mdp.jl")
16 | include("test_interpolation.jl")
17 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/test_car_mdp.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | function test_stateindexing(mdp::CarMDP)
 3 |     state_space = states(mdp)
 4 |     for (i, s) in enumerate(state_space)
 5 |         if i != stateindex(mdp, s)
 6 |             return false
 7 |         end
 8 |     end
 9 |     return true
10 | end
11 | 
12 | 
13 | 
14 | params = UrbanParams(nlanes_main=1,
15 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
16 |                      crosswalk_length =  [10.0, 10., 10.0],
17 |                      crosswalk_width = [4.0, 4.0, 3.1],
18 |                      stop_line = 22.0)
19 | env = UrbanEnv(params=params);
20 | 
21 | mdp = CarMDP(env = env);
22 | 
23 | @test test_stateindexing(mdp)
24 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/test/test_pedestrian_mdp.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | function test_stateindexing(mdp::PedMDP)
 4 |     state_space = states(mdp)
 5 |     for (i, s) in enumerate(state_space)
 6 |         if i != stateindex(mdp, s)
 7 |             return false
 8 |         end
 9 |     end
10 |     return true
11 | end
12 | 
13 | 
14 | 
15 | params = UrbanParams(nlanes_main=1,
16 |                      crosswalk_pos =  [VecSE2(6, 0., pi/2), VecSE2(-6, 0., pi/2), VecSE2(0., -5., 0.)],
17 |                      crosswalk_length =  [10.0, 10., 10.0],
18 |                      crosswalk_width = [4.0, 4.0, 3.1],
19 |                      stop_line = 22.0)
20 | env = UrbanEnv(params=params);
21 | 
22 | mdp = PedMDP(env = env);
23 | 
24 | @test test_stateindexing(mdp)
25 | 


--------------------------------------------------------------------------------
/Safe-RL/AutomotiveSafeRL/training_scripts/training.sh:
--------------------------------------------------------------------------------
 1 | nohup julia jointmdp_training.jl --log log_nm100 --cost 2 > log100.out &
 2 | nohup julia jointmdp_training.jl --log log_nm101 --cost 3 > log101.out &
 3 | nohup julia jointmdp_training.jl --log log_nm102 --cost 0.5 > log102.out &
 4 | nohup julia jointmdp_training.jl --log log_nm103 --cost 10 > log103.out &
 5 | nohup julia jointmdp_training.jl --log log_nm104 --cost 20 > log104.out &
 6 | 
 7 | #nohup julia jointmdp_script.jl --log log60 --goal 1 > log60.out &
 8 | #nohup julia jointmdp_script.jl --log log61 --goal 1.5 > log61.out &
 9 | #nohup julia jointmdp_script.jl --log log62 --goal 2 > log62.out &
10 | #nohup julia jointmdp_script.jl --log log63 --goal 3 > log63.out &
11 | #nohup julia jointmdp_script.jl --log log64 --goal 5 > log64.out &
12 | 
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/Constraint_RL_MPC.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Constraint_RL_MPC.iml" filepath="$PROJECT_DIR$/.idea/Constraint_RL_MPC.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/ReplayBuffer.py:
--------------------------------------------------------------------------------
 1 | from collections import deque
 2 | import random
 3 | 
 4 | 
 5 | class ReplayBuffer():
 6 | 
 7 |     def __init__(self, buffer_size):
 8 |         self.buffer_size = buffer_size
 9 |         self.num_experiences = 0
10 |         self.buffer = deque()
11 | 
12 |     def size(self):
13 |         return self.buffer_size
14 | 
15 |     def add_with_dist(self, state, action, reward, new_state, done, dist):
16 |         experience = (state, action, reward, new_state, done, dist)
17 |         if self.num_experiences < self.buffer_size:
18 |             self.buffer.append(experience)
19 |             self.num_experiences += 1
20 |         else:
21 |             self.buffer.popleft()
22 |             self.buffer.append(experience)
23 |             
24 |     def add(self, state, action, reward, new_state, done):
25 |         experience = (state, action, reward, new_state, done)
26 |         if self.num_experiences < self.buffer_size:
27 |             self.buffer.append(experience)
28 |             self.num_experiences += 1
29 |         else:
30 |             self.buffer.popleft()
31 |             self.buffer.append(experience)
32 | 
33 |     def sample(self, batch_size):
34 |         # Randomly sample batch_size examples
35 |         if self.num_experiences < batch_size:
36 |             return random.sample(self.buffer, self.num_experiences)
37 |         else:
38 |             return random.sample(self.buffer, batch_size)
39 | 
40 |     def erase(self):
41 |         self.buffer = deque()
42 |         self.num_experiences = 0
43 | 
44 | 


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Buffer/__pycache__/ReplayBuffer.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_old.mat


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_randn.mat


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Disturbances/external_disturbances_uniform.mat


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Model/__pycache__/Linear_Env.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Actor_Model.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/Critic_Model.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Neural_Network/__pycache__/NeuralNetwork.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/MinMax.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Min - max normalization
 3 | """
 4 | 
 5 | 
 6 | def minmax_norm(x, min_x, max_x):
 7 |     """
 8 |     This function normalizes data
 9 |     :param x: input data
10 |     :param min_x: minimum value
11 |     :param max_x: output data
12 |     :return: normalized input data x_norm
13 |     """
14 |     x_norm = (x - min_x)/(max_x - min_x)
15 | 
16 |     return x_norm
17 | 
18 | 
19 | def minmax_norm_back(x_norm, min_x, max_x):
20 |     """
21 |     This function denormalizes data
22 |     :param x_norm: input data
23 |     :param min_x: minimum value
24 |     :param max_x: output data
25 |     :return: real input data x
26 |     """
27 |     x = x_norm * (max_x - min_x) + min_x
28 | 
29 |     return x
30 | 


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Normalize/__pycache__/MinMax.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/__pycache__/constraints.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_low_weights.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_E_up_weights.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_low_weights.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/constraints_test_T_up_weights.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Pre_training/readme.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ##### Immediate_constraint_functions.py ##################################################################
 3 | 
 4 | Pre-training phase to learn immediate constraint functions
 5 | has to be evaluated for every constraint
 6 | 
 7 | 
 8 | PARAMETER:
 9 | 
10 | num_samples =  number of samples per episode
11 | num_episodes =  number of episodes
12 | 
13 | state_flag = 0-> Temperature low, 1-> Energy low,  2-> Temperature up, 3-> Energy up,  define safety signal
14 | 
15 | # define network parameters
16 | num_in 
17 | num_out 
18 | num_hidden
19 | activation 
20 | activation_out
21 | optimizer 
22 | 
23 | OUTPUT:
24 | 
25 | network weights of the trained network is saved in the same folder
26 | 
27 | 
28 | ##### Test_Immediate_constraint_functions.py ##############################################################
29 | 
30 | evaluation of safety layer, to make sure that the constraints are working
31 | loads neural network weights, so they have to be trained before
32 | 


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MPC_weights.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/SI_MinMax.npy


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/__pycache__/MPC.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_MPC/readme.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ##### Main_System_Identification.py ################################################
 3 | 
 4 | trains neural network for system identification of the model
 5 | has to be evaluated first
 6 | 
 7 | PARAMETER:
 8 | 
 9 | num_samples =  number of samples per episode
10 | num_episodes =  number of episodes
11 | 
12 | # network parameter
13 | num_hidden 
14 | activation 
15 | activation_out
16 | optimizer 
17 | 
18 | # model paramter
19 | Q 
20 | R 
21 | 
22 | dist_flag = 0-> train without disturbances 1-> train with disturbances 
23 | 
24 | 
25 | OUTPUT:
26 | 
27 | network weights of the trained network is saved in the same folder
28 | evolution of error is plotted
29 | 
30 | 
31 | ##### Main_MPC.py ##################################################################
32 | 
33 | Execution of the MPC algorithm with the trained network
34 | 
35 | 
36 | PARAMETER:
37 | 
38 | network parameters have to be the same as the SI parameters
39 | 
40 | N = prediction horizon
41 | S = samples to be evaluated
42 | 
43 | OUTPUT:
44 | 
45 | evolution of states and inputs is plotted
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/__pycache__/DDPG.cpython-35.pyc


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_actor.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test1_5_weights_critic.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_actor.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test2_5_weights_critic.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_actor.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/ddpg_Test3_5_weights_critic.h5f


--------------------------------------------------------------------------------
/Safe-RL/Constraint_RL_MPC/Abgabe/Training_RL/readme.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | ##### Main_RL.py ##################################################################
 3 | 
 4 | Training phase and test phase of the DDPG algorithm
 5 | it can be evaluated together with MPC, if enabled
 6 | 
 7 | 
 8 | PARAMETER:
 9 | 
10 | num_samples =  number of samples per episode
11 | num_episodes =  number of episodes
12 | 
13 | episodesTrain =  number of episodes for the training
14 | episodesTest =  number of episodes for the test
15 | stepsEpisodes =  number of samples per episode training
16 | stepsEpisodes_test = number of samples per episode training
17 | 
18 | future_steps_tracing = number of steps the tracing trajectory is used from the future -> 0 = Nonw
19 | buffersize = size of replay buffer
20 | 
21 | disturbance =  0 -> no disturbance , 1 -> added disturbance
22 | future_steps_dist = number of steps the disturbance is used from the future -> 0 = None
23 | 
24 | # parameter of the noise process
25 | sigma
26 | theta
27 | mu 
28 | 
29 | constraints = Flag constraints [None, SafetyLayer, Rewardshaping]
30 | 
31 | # Environmental details
32 | Q 
33 | R 
34 | ENV_NAME = Name where the weights are saved
35 | 
36 | 
37 | # MPC parameters
38 | do_MPC = Flag whether MPC should be evaluated
39 | N =  Prediction horizon for MPC result
40 | 
41 | OUTPUT:
42 | 
43 | network weights of the trained network is saved in the same folder
44 | evolution of states and inputs is ploted
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "coach"]
2 | 	path = coach
3 | 	url = https://github.com/ben-eysenbach/coach.git
4 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/envs/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/frozen_lake.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv as _FrozenLakeEnv
 2 | from gym import spaces
 3 | import numpy as np
 4 | 
 5 | 
 6 | class FrozenLakeEnv(_FrozenLakeEnv):
 7 |     """Modified version of FrozenLake-v0.
 8 | 
 9 |     1. Convert integer states to one hot encoding.
10 |     2. Make the goal state reversible
11 |     """
12 |     def __init__(self, map_name):
13 |         super(FrozenLakeEnv, self).__init__(map_name=map_name,
14 |                                             is_slippery=False)
15 |         self.observation_space = spaces.Box(low=np.zeros(self.nS),
16 |                                             high=np.ones(self.nS))
17 |         # Make the goal state not terminate
18 |         goal_s = self.nS - 1
19 |         left_s = goal_s - 1
20 |         up_s = goal_s - int(np.sqrt(self.nS))
21 | 
22 |         self.P[goal_s] = {
23 |             0: [(1.0, left_s, 0.0, False)],
24 |             1: [(1.0, goal_s, 1.0, True)],
25 |             2: [(1.0, goal_s, 1.0, True)],
26 |             3: [(1.0, up_s, 0.0, True)],
27 |         }
28 | 
29 |     def _s_to_one_hot(self, s):
30 |         one_hot = np.zeros(self.nS)
31 |         one_hot[s] = 1.
32 |         return one_hot
33 | 
34 |     def step(self, a):
35 |         (s, r, done, info) = super(FrozenLakeEnv, self).step(a)
36 |         done = (s == self.nS - 1)  # Assume we can't detect dangerous states
37 |         one_hot = self._s_to_one_hot(s)
38 |         r -= 1  # Make the reward be in {-1, 0}
39 |         return (one_hot, r, done, info)
40 | 
41 |     def reset(self):
42 |         s = super(FrozenLakeEnv, self).reset()
43 |         one_hot = self._s_to_one_hot(s)
44 |         return one_hot
45 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/envs/hopper.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.mujoco.hopper import HopperEnv as _HopperEnv
 2 | 
 3 | 
 4 | class HopperEnv(_HopperEnv):
 5 |     """Modified version of Hopper-v1."""
 6 | 
 7 |     def step(self, action):
 8 |         (obs, r, done, info) = super(HopperEnv, self).step(action)
 9 |         return (obs, r, False, info)
10 | 


--------------------------------------------------------------------------------
/Safe-RL/LeaveNoTrace/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/LeaveNoTrace/plot.png


--------------------------------------------------------------------------------
/Safe-RL/PCPO/iclr_2020_code_submission.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/PCPO/iclr_2020_code_submission.zip


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Sven Gronauer, Technical University Munich (TUM)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/README.md:
--------------------------------------------------------------------------------
 1 | # RL-Safety-Algorithms
 2 | 
 3 | Algorithms for Safe Reinforcement Learning Problems that were tested and 
 4 | benchmarked in the 
 5 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym).
 6 | 
 7 | ## Installation
 8 | 
 9 | Install this repository with:
10 | 
11 | ```
12 | git clone https://github.com/SvenGronauer/RL-Safety-Algorithms.git
13 | 
14 | cd RL-Safety-Algorithms
15 | 
16 | pip install -e .
17 | ```
18 | 
19 | 
20 | ## Getting Started
21 | 
22 | Works with every environment that is compatible with the OpenAI Gym interface:
23 | 
24 | ```
25 | python -m rl_safety_algorithms.train --alg trpo --env MountainCarContinuous-v0
26 | ```
27 | 
28 | For an open-source framework to benchmark and test safety, we recommend the 
29 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym). To train an
30 | algorithms such as Constrained Policy Optimization, run:
31 | 
32 | ```
33 | python -m rl_safety_algorithms.train --alg cpo --env SafetyBallCircle-v0
34 | ```
35 | 
36 | ## Benchmark
37 | 
38 | In order to benchmark tasks from the 
39 | [Bullet-Safety-Gym](https://github.com/svengronauer/Bullet-Safety-Gym),
40 | we have prepared scripts in the `experiments` directory.
41 | 
42 | ```
43 | cd experiments/
44 | python benchmark_circle_tasks.py
45 | ```
46 | 
47 | In our experiments, we used a Threadripper 3990X CPU with 64 physical CPU cores,
48 | thus, we ran the experiments with the following flag for optimal MPI usage:
49 | 
50 | ```
51 | python benchmark_circle_tasks.py --num-cores 64
52 | ```
53 | 
54 | Plots from experiment runs can be also taken from the
55 | [Bullet-Safety-Gym Benchmarks](https://github.com/SvenGronauer/Bullet-Safety-Gym/blob/master/docs/benchmark.md)


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_circle_tasks.py:
--------------------------------------------------------------------------------
 1 | from rl_safety_algorithms.benchmark import Benchmark
 2 | import bullet_safety_gym  # noqa
 3 | from safety_settings import alg_setup, argument_parser
 4 | 
 5 | 
 6 | def main(args):
 7 |     env_specific_kwargs = {
 8 |         'SafetyBallCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000},
 9 |         'SafetyCarCircle-v0': {'epochs': 500, 'steps_per_epoch': 32000},
10 |         'SafetyDroneCircle-v0': {'epochs': 1000, 'steps_per_epoch': 64000},
11 |         'SafetyAntCircle-v0': {'epochs': 1500, 'steps_per_epoch': 64000},
12 |     }
13 |     bench = Benchmark(
14 |         alg_setup,
15 |         env_ids=list(env_specific_kwargs.keys()),
16 |         log_dir=args.log_dir,
17 |         num_cores=args.num_cores,
18 |         num_runs=args.num_runs,
19 |         env_specific_kwargs=env_specific_kwargs,
20 |         use_mpi=True,
21 |         init_seed=args.seed,
22 |     )
23 |     bench.run()
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     args = argument_parser()
28 |     main(args)
29 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_gather_tasks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from rl_safety_algorithms.benchmark import Benchmark
 4 | import bullet_safety_gym  # noqa
 5 | from safety_settings import alg_setup, argument_parser
 6 | 
 7 | 
 8 | def main(args):
 9 |     env_specific_kwargs = {
10 |         'SafetyBallGather-v0': {'epochs': 500, 'cost_limit': 0.2,
11 |                                 'steps_per_epoch': 32000},
12 |         'SafetyCarGather-v0': {'epochs': 500, 'cost_limit': 0.2,
13 |                                'steps_per_epoch': 32000},
14 |         'SafetyDroneGather-v0': {'epochs': 1000, 'cost_limit': 0.2,
15 |                                  'steps_per_epoch': 64000},
16 |         'SafetyAntGather-v0': {'epochs': 1000, 'cost_limit': 0.2,
17 |                                'steps_per_epoch': 64000}
18 |     }
19 |     bench = Benchmark(
20 |         alg_setup,
21 |         env_ids=list(env_specific_kwargs.keys()),
22 |         log_dir=args.log_dir,
23 |         num_cores=args.num_cores,
24 |         num_runs=args.num_runs,
25 |         env_specific_kwargs=env_specific_kwargs,
26 |         use_mpi=True,
27 |         init_seed=args.seed,
28 |     )
29 |     bench.run()
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     args = argument_parser()
34 |     main(args)
35 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_reach_tasks.py:
--------------------------------------------------------------------------------
 1 | from rl_safety_algorithms.benchmark import Benchmark
 2 | import bullet_safety_gym  # noqa
 3 | from safety_settings import alg_setup, argument_parser
 4 | 
 5 | 
 6 | def main(args):
 7 |     env_specific_kwargs = {
 8 |         'SafetyBallReach-v0': {'epochs': 500, 'steps_per_epoch': 32000,
 9 |                                'cost_limit': 10},  # terminates after 250 steps
10 |         'SafetyCarReach-v0': {'epochs': 1000, 'steps_per_epoch': 32000,
11 |                               'cost_limit': 10},  # terminates after 500 steps
12 |         'SafetyDroneReach-v0': {'epochs': 1000, 'steps_per_epoch': 64000,
13 |                                 'cost_limit': 10},  # terminates after 500 steps
14 |         'SafetyAntReach-v0': {'epochs': 1500, 'steps_per_epoch': 64000},
15 |     }
16 |     bench = Benchmark(
17 |         alg_setup,
18 |         env_ids=list(env_specific_kwargs.keys()),
19 |         log_dir=args.log_dir,
20 |         num_cores=args.num_cores,
21 |         num_runs=args.num_runs,
22 |         env_specific_kwargs=env_specific_kwargs,
23 |         use_mpi=True,
24 |         init_seed=args.seed
25 |     )
26 |     bench.run()
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     args = argument_parser()
31 |     main(args)
32 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/benchmark_run_tasks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from rl_safety_algorithms.benchmark import Benchmark
 4 | import bullet_safety_gym  # noqa
 5 | from safety_settings import alg_setup, argument_parser
 6 | 
 7 | 
 8 | def main(args):
 9 |     env_specific_kwargs = {
10 |         'SafetyBallRun-v0': {'epochs': 100, 'steps_per_epoch': 32000},
11 |         'SafetyCarRun-v0': {'epochs': 200, 'steps_per_epoch': 32000},
12 |         'SafetyDroneRun-v0': {'epochs': 500, 'steps_per_epoch': 64000},
13 |         'SafetyAntRun-v0': {'epochs': 500, 'steps_per_epoch': 64000},
14 |     }
15 |     bench = Benchmark(
16 |         alg_setup,
17 |         env_ids=list(env_specific_kwargs.keys()),
18 |         log_dir=args.log_dir,
19 |         num_cores=args.num_cores,
20 |         num_runs=args.num_runs,
21 |         env_specific_kwargs=env_specific_kwargs,
22 |         use_mpi=True,
23 |         init_seed=args.seed,
24 |     )
25 |     bench.run()
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     args = argument_parser()
30 |     main(args)
31 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/experiments/safety_settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | 
 5 | alg_setup = {
 6 |     'trpo': {"target_kl": [0.001, 0.01]},
 7 |     'lag-trpo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2],
 8 |                  'lambda_lr': [0.001, 0.01, 0.1]},  # SGD is default
 9 |     'cpo': {'target_kl': [1.0e-4, 5.0e-4, 1.0e-3], 'lam_c': [0.50, 0.90, 0.95]},
10 |     'pdo': {'target_kl': [1.0e-4, 1.0e-3, 1.0e-2],
11 |             'lambda_lr': [0.001, 0.01, 0.1]},  # Adam is default
12 | }
13 | 
14 | 
15 | def get_alg_setup():
16 |     return alg_setup
17 | 
18 | 
19 | def argument_parser():
20 |     n_cpus = os.cpu_count()
21 |     parser = argparse.ArgumentParser(
22 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
23 |     )
24 |     parser.add_argument('--num-cores', '-c', type=int, default=n_cpus,
25 |                         help='Number of parallel processes generated.')
26 |     parser.add_argument('--num-runs', '-r', type=int, default=4,
27 |                         help='Number of total runs that are executed.')
28 |     parser.add_argument('--log-dir', type=str, default='/var/tmp/ga87zej',
29 |                         help='Define a custom directory for logging.')
30 |     parser.add_argument('--seed', type=int, default=0,
31 |                         help='Define the initial seed.')
32 |     args = parser.parse_args()
33 |     return args
34 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/cpo/defaults.py:
--------------------------------------------------------------------------------
 1 | def defaults():
 2 |     return dict(
 3 |         actor='mlp',
 4 |         ac_kwargs={
 5 |             'pi': {'hidden_sizes': (64, 64),
 6 |                    'activation': 'tanh'},
 7 |             'val': {'hidden_sizes': (64, 64),
 8 |                     'activation': 'tanh'}
 9 |         },
10 |         adv_estimation_method='gae',
11 |         epochs=300,  # 9.8M steps
12 |         gamma=0.99,
13 |         lam_c=0.95,
14 |         steps_per_epoch=64 * 1000,  # default: 64k
15 |         target_kl=0.0001,
16 |         use_exploration_noise_anneal=True
17 |     )
18 | 
19 | 
20 | def locomotion():
21 |     """Default hyper-parameters for Bullet's locomotion environments."""
22 |     params = defaults()
23 |     params['epochs'] = 312
24 |     params['max_ep_len'] = 1000
25 |     params['steps_per_epoch'] = 32 * 1000
26 |     params['vf_lr'] = 3e-4  # default choice is Adam
27 |     return params
28 | 
29 | 
30 | # Hack to circumvent kwarg errors with the official PyBullet Envs
31 | def gym_locomotion_envs():
32 |     params = locomotion()
33 |     return params
34 | 
35 | 
36 | def gym_manipulator_envs():
37 |     """Default hyper-parameters for Bullet's manipulation environments."""
38 |     params = defaults()
39 |     params['epochs'] = 312
40 |     params['max_ep_len'] = 150
41 |     params['steps_per_epoch'] = 32 * 1000
42 |     params['vf_lr'] = 3e-4  # default choice is Adam
43 |     return params
44 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/iwpg/defaults.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Define default parameters for Importance-weighted Policy Gradient (IWPG)
 3 |     algorithm.
 4 | """
 5 | 
 6 | 
 7 | def defaults():
 8 |     return dict(
 9 |         actor='mlp',
10 |         ac_kwargs={
11 |             'pi': {'hidden_sizes': (64, 64),
12 |                    'activation': 'tanh'},
13 |             'val': {'hidden_sizes': (64, 64),
14 |                     'activation': 'tanh'}
15 |         },
16 |         adv_estimation_method='gae',
17 |         epochs=300,
18 |         gamma=0.99,
19 |         steps_per_epoch=32 * 1000,
20 |         # Early stopping criterion adds robustness towards hyper-parameters
21 |         # see "Successful ingredients" Paper
22 |         use_kl_early_stopping=True,
23 |     )
24 | 
25 | 
26 | def locomotion():
27 |     """Default hyper-parameters for Bullet's locomotion environments."""
28 |     params = defaults()
29 |     params['epochs'] = 312
30 |     params['max_ep_len'] = 1000
31 |     params['pi_lr'] = 3e-4  # default choice is Adam
32 |     params['steps_per_epoch'] = 8 * 1000
33 |     params['vf_lr'] = 3e-4  # default choice is Adam
34 |     return params
35 | 
36 | 
37 | # Hack to circumvent kwarg errors with the official PyBullet Envs
38 | def gym_locomotion_envs():
39 |     params = locomotion()
40 |     return params
41 | 
42 | 
43 | def gym_manipulator_envs():
44 |     """Default hyper-parameters for Bullet's manipulation environments."""
45 |     params = defaults()
46 |     params['epochs'] = 312
47 |     params['max_ep_len'] = 150
48 |     params['pi_lr'] = 3e-4  # default choice is Adam
49 |     params['steps_per_epoch'] = 32 * 1000
50 |     params['vf_lr'] = 3e-4  # default choice is Adam
51 |     return params
52 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/lag-trpo/defaults.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Define default parameters for Lagrangian-TRPO algorithm.
 3 | """
 4 | 
 5 | 
 6 | def defaults():
 7 |     return dict(
 8 |         actor='mlp',
 9 |         ac_kwargs={
10 |             'pi': {'hidden_sizes': (64, 64),
11 |                    'activation': 'tanh'},
12 |             'val': {'hidden_sizes': (64, 64),
13 |                     'activation': 'tanh'}
14 |         },
15 |         adv_estimation_method='gae',
16 |         epochs=300,
17 |         gamma=0.99,
18 |         steps_per_epoch=64 * 1000,
19 |         use_exploration_noise_anneal=True
20 |     )
21 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/npg/defaults.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Define default parameters for NPG algorithm.
 3 | """
 4 | 
 5 | 
 6 | def defaults():
 7 |     return dict(
 8 |         actor='mlp',
 9 |         ac_kwargs={
10 |             'pi': {'hidden_sizes': (64, 64),
11 |                    'activation': 'tanh'},
12 |             'val': {'hidden_sizes': (64, 64),
13 |                     'activation': 'tanh'}
14 |         },
15 |         adv_estimation_method='gae',
16 |         epochs=300,
17 |         gamma=0.99,
18 |         steps_per_epoch=64 * 1000,
19 |         target_kl=0.01,
20 |     )
21 | 
22 | 
23 | def bullet():
24 |     """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0."""
25 |     return defaults()
26 | 
27 | 
28 | def gym_locomotion_envs():
29 |     """Default hyper-parameters for Bullet's locomotion environments."""
30 |     params = defaults()
31 |     params['epochs'] = 312
32 |     params['max_ep_len'] = 1000
33 |     params['pi_lr'] = 1e-4  # default choice is Adam
34 |     params['steps_per_epoch'] = 32 * 1000
35 |     return params
36 | 
37 | 
38 | def gym_manipulator_envs():
39 |     params = defaults()
40 |     params['epochs'] = 312
41 |     params['max_ep_len'] = 150
42 |     params['pi_lr'] = 1e-4  # default choice is Adam
43 |     params['steps_per_epoch'] = 32 * 1000
44 |     return params
45 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/pdo/defaults.py:
--------------------------------------------------------------------------------
 1 | def defaults():
 2 |     return dict(
 3 |         actor='mlp',
 4 |         ac_kwargs={
 5 |             'pi': {'hidden_sizes': (64, 64),
 6 |                    'activation': 'tanh'},
 7 |             'val': {'hidden_sizes': (64, 64),
 8 |                     'activation': 'tanh'}
 9 |         },
10 |         adv_estimation_method='gae',
11 |         epochs=300,  # 9.8M steps
12 |         gamma=0.99,
13 |         lambda_lr=0.001,
14 |         lambda_optimizer='Adam',
15 |         steps_per_epoch=64 * 1000,
16 |         target_kl=0.001,
17 |         use_exploration_noise_anneal=True
18 |     )
19 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/algs/trpo/defaults.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     Define default parameters for NPG algorithm.
 3 | """
 4 | 
 5 | 
 6 | def defaults():
 7 |     return dict(
 8 |         actor='mlp',
 9 |         ac_kwargs={
10 |             'pi': {'hidden_sizes': (64, 64),
11 |                    'activation': 'tanh'},
12 |             'val': {'hidden_sizes': (64, 64),
13 |                     'activation': 'tanh'}
14 |         },
15 |         adv_estimation_method='gae',
16 |         epochs=300,
17 |         gamma=0.99,
18 |         steps_per_epoch=64 * 1000,  # default: 64k
19 |         target_kl=0.01,
20 |         use_exploration_noise_anneal=True
21 |     )
22 | 
23 | 
24 | def bullet():
25 |     """ Default hyper-parameters for PyBullet Envs such as KukaBulletEnv-v0."""
26 |     return defaults()
27 | 
28 | 
29 | def gym_locomotion_envs():
30 |     """Default hyper-parameters for Bullet's locomotion environments."""
31 |     params = defaults()
32 |     params['epochs'] = 312
33 |     params['max_ep_len'] = 1000
34 |     params['steps_per_epoch'] = 64 * 1000
35 |     return params
36 | 
37 | 
38 | def gym_manipulator_envs():
39 |     params = defaults()
40 |     params['epochs'] = 312
41 |     params['max_ep_len'] = 150
42 |     params['steps_per_epoch'] = 32 * 1000
43 |     return params
44 | 
45 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/RL-Safety-Algorithms/rl_safety_algorithms/common/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | import sys
 3 | 
 4 | if sys.version_info.major != 3:
 5 |     raise TypeError(
 6 |         'This Python is only compatible with Python 3, but you are running '
 7 |         'Python {}. The installation will likely fail.'.format(
 8 |             sys.version_info.major))
 9 | 
10 | with open("README.md", "r") as fh:
11 |     long_description = fh.read()
12 | 
13 | setuptools.setup(
14 |     name="rl_safety_algorithms",  # this is the name displayed in 'pip list'
15 |     version="0.1",
16 |     author="Sven Gronauer",
17 |     author_email="sven.gronauer@tum.de",
18 |     description="Algorithms for Safe Reinforcement Learning Problems.",
19 |     install_requires=[
20 |         'mpi4py',  # can be skipped if you want to use single threads
21 |         'numpy',
22 |         'torch'
23 |     ],
24 |     long_description=long_description,
25 |     long_description_content_type="text/markdown",
26 |     url="https://github.com/sven.gronauer",
27 |     packages=setuptools.find_packages(),
28 |     classifiers=[
29 |         "Programming Language :: Python :: 3",
30 |         "License :: OSI Approved :: MIT License",
31 |         "Operating System :: OS Independent",
32 |     ],
33 | )
34 | 


--------------------------------------------------------------------------------
/Safe-RL/RL-Safety-Algorithms/tests/test_algs_single_thread.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import gym
 3 | import pybullet_envs  # noqa
 4 | import rl_safety_algorithms.common.utils as U
 5 | from rl_safety_algorithms.algs import core
 6 | import inspect
 7 | import sys
 8 | from rl_safety_algorithms.common.loggers import setup_logger_kwargs
 9 | 
10 | 
11 | class TestAlgorithms(unittest.TestCase):
12 | 
13 |     @staticmethod
14 |     def check_alg(alg_name, env_id):
15 |         """" Run one epoch update with algorithm."""
16 |         print(f'Run {alg_name}.')
17 |         defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id)
18 |         defaults['epochs'] = 1
19 |         defaults['num_mini_batches'] = 4
20 |         defaults['steps_per_epoch'] = 1000
21 |         defaults['verbose'] = False
22 | 
23 |         defaults['logger_kwargs'] = setup_logger_kwargs(
24 |             exp_name='unittest',
25 |             seed=0,
26 |             base_dir='/var/tmp/',
27 |             datestamp=True,
28 |             level=0,
29 |             use_tensor_board=True,
30 |             verbose=False)
31 |         alg = U.get_alg_class(alg_name, env_id, **defaults)
32 |         # sanity check of argument passing
33 |         assert alg.alg == alg_name, f'Expected {alg_name} but got {alg.alg}'
34 |         # return learn_fn(env_id, **defaults)
35 |         ac, env = alg.learn()
36 | 
37 |         return ac, env
38 | 
39 |     def test_algorithms(self):
40 |         """ Run all the specified algorithms."""
41 |         algs = ['iwpg', 'npg', 'trpo', 'lag-trpo', 'pdo', 'cpo']
42 |         for alg in algs:
43 |             ac, env = self.check_alg(alg, 'HopperBulletEnv-v0')
44 |             self.assertTrue(isinstance(env, gym.Env))
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     unittest.main()
49 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Garrett Thomas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/README.md:
--------------------------------------------------------------------------------
 1 | # Safe-MBPO
 2 | Code for the NeurIPS 2021 paper "Safe Reinforcement Learning by Imagining the Near Future" by Garrett Thomas, Yuping Luo, and Tengyu Ma.
 3 | 
 4 | Some code is borrowed from [Force](https://github.com/gwthomas/force).
 5 | 
 6 | ## Installation
 7 | We are using Python 3.8. The required packages can be installed via
 8 | 
 9 | 	pip install -r requirements.txt
10 | 
11 | You also must set the `ROOT_DIR` in `code/defaults.py`.
12 | This is where experiments' logs and checkpoints will be placed.
13 | 
14 | Once setup is complete, run the code using the following command:
15 | 
16 | 	python main.py -c config/ENV.json
17 | 
18 | where ENV is replaced appropriately. To override a specific hyperparameter, add `-s PARAM VALUE` where `PARAM` is a string.
19 | Use `.` to specify hierarchical structure in the config, e.g. `-s alg_cfg.horizon 10`.


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/ant.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env_name": "ant",
3 |   "alg_cfg": {
4 |     "sac_cfg": {
5 |       "target_entropy": -4.0
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/cheetah-no-flip.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env_name": "cheetah-no-flip",
3 |   "alg_cfg": {
4 |     "sac_cfg": {
5 |       "target_entropy": -3.0
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/hopper.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env_name": "hopper",
3 |   "alg_cfg": {
4 |     "sac_cfg": {
5 |       "target_entropy": -1.0
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/config/humanoid.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env_name": "humanoid",
3 |   "alg_cfg": {
4 |     "sac_cfg": {
5 |       "target_entropy": -2.0
6 |     }
7 |   }
8 | }


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | tqdm
4 | h5py
5 | opencv-python
6 | torch==1.4.0
7 | gym==0.17.2
8 | mujoco-py==2.0.2.13


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe-MBPO/src/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/defaults.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | PRECISION = 2
 4 | OPTIMIZER = torch.optim.Adam
 5 | BATCH_SIZE = 256
 6 | ACTOR_LR = 3e-4
 7 | CRITIC_LR = 1e-3
 8 | 
 9 | # ROOT_DIR = None    # set a path (directory) where experiments should be saved
10 | ROOT_DIR = '/tiger/u/gwthomas/data/smbpo'


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/normalization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .torch_util import Module
 4 | 
 5 | 
 6 | class Normalizer(Module):
 7 |     def __init__(self, dim, epsilon=1e-6):
 8 |         super().__init__()
 9 |         self.dim = dim
10 |         self.epsilon = epsilon
11 |         self.register_buffer('mean', torch.zeros(dim))
12 |         self.register_buffer('std', torch.zeros(dim))
13 | 
14 |     def fit(self, X):
15 |         assert torch.is_tensor(X)
16 |         assert X.dim() == 2
17 |         assert X.shape[1] == self.dim
18 |         self.mean.data.copy_(X.mean(dim=0))
19 |         self.std.data.copy_(X.std(dim=0))
20 | 
21 |     def forward(self, x):
22 |         return (x - self.mean) / (self.std + self.epsilon)
23 | 
24 |     def unnormalize(self, normal_X):
25 |         return self.mean + (self.std * normal_X)


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/shared.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from gym.wrappers import RescaleAction
 4 | 
 5 | from .sampling import SampleBuffer
 6 | 
 7 | 
 8 | def get_env(env_name, wrap_torch=True):
 9 |     from .env.torch_wrapper import TorchWrapper
10 |     from .env.hopper_no_bonus import HopperNoBonusEnv
11 |     from .env.cheetah_no_flip import CheetahNoFlipEnv
12 |     from .env.ant_no_bonus import AntNoBonusEnv
13 |     from .env.humanoid_no_bonus import HumanoidNoBonusEnv
14 |     envs = {
15 |         'hopper': HopperNoBonusEnv,
16 |         'cheetah-no-flip': CheetahNoFlipEnv,
17 |         'ant': AntNoBonusEnv,
18 |         'humanoid': HumanoidNoBonusEnv
19 |     }
20 |     env = envs[env_name]()
21 |     if not (np.all(env.action_space.low == -1.0) and np.all(env.action_space.high == 1.0)):
22 |         env = RescaleAction(env, -1.0, 1.0)
23 |     if wrap_torch:
24 |         env = TorchWrapper(env)
25 |     return env
26 | 
27 | 
28 | class SafetySampleBuffer(SampleBuffer):
29 |     COMPONENT_NAMES = (*SampleBuffer.COMPONENT_NAMES, 'violations')
30 | 
31 |     def __init__(self, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 |         self._create_buffer('violations', torch.bool, [])


--------------------------------------------------------------------------------
/Safe-RL/Safe-MBPO/src/squashed_gaussian.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch.nn.functional as F
 3 | from torch import distributions as pd
 4 | 
 5 | 
 6 | # Borrowed from https://github.com/denisyarats/pytorch_sac
 7 | 
 8 | class TanhTransform(pd.transforms.Transform):
 9 |     env_name = pd.constraints.real
10 |     coenv_name = pd.constraints.interval(-1.0, 1.0)
11 |     bijective = True
12 |     sign = +1
13 | 
14 |     def __init__(self, cache_size=1):
15 |         super().__init__(cache_size=cache_size)
16 | 
17 |     @staticmethod
18 |     def atanh(x):
19 |         return 0.5 * (x.log1p() - (-x).log1p())
20 | 
21 |     def __eq__(self, other):
22 |         return isinstance(other, TanhTransform)
23 | 
24 |     def _call(self, x):
25 |         return x.tanh()
26 | 
27 |     def _inverse(self, y):
28 |         # We do not clamp to the boundary here as it may degrade the performance of certain algorithms.
29 |         # one should use `cache_size=1` instead
30 |         return self.atanh(y)
31 | 
32 |     def log_abs_det_jacobian(self, x, y):
33 |         # We use a formula that is more numerically stable, see details in the following link
34 |         # https://github.com/tensorflow/probability/commit/ef6bb176e0ebd1cf6e25c6b5cecdd2428c22963f#diff-e120f70e92e6741bca649f04fcd907b7
35 |         return 2. * (math.log(2.) - x - F.softplus(-2. * x))
36 | 
37 | 
38 | class SquashedGaussian(pd.transformed_distribution.TransformedDistribution):
39 |     def __init__(self, loc, scale, validate_args=None):
40 |         base_dist = pd.Normal(loc, scale)
41 |         super().__init__(base_dist, TanhTransform(), validate_args=validate_args)
42 | 
43 |     @property
44 |     def mean(self):
45 |         mu = self.base_dist.loc
46 |         for transform in self.transforms:
47 |             mu = transform(mu)
48 |         return mu


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.dockerignore:
--------------------------------------------------------------------------------
 1 | examples
 2 | htmlcov
 3 | .travis.yml
 4 | .gitignore
 5 | .git
 6 | *.pyc
 7 | .ipynb_checkpoints
 8 | __pycache__
 9 | SafeRLBench.egg-info
10 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .ipynb_checkpoints
 3 | .DS_Store
 4 | .idea
 5 | .coverage
 6 | covhtml
 7 | MANIFEST
 8 | _build
 9 | 
10 | *.pyc
11 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: required
 4 | 
 5 | services:
 6 |   - docker
 7 | 
 8 | env:
 9 |   - PYTHON=python2
10 |   - PYTHON=python3
11 | 
12 | # Setup docker container
13 | install:
14 |   - docker build -f misc/Dockerfile.${PYTHON} -t test-image .
15 |   - docker ps -a
16 |   - ci_env=`bash <(curl -s https://codecov.io/env)`
17 | 
18 | # Run tests
19 | script:
20 |   - docker run test-image flake8 SafeRLBench --exclude "test*.py,__init__.py,_quadrocopter" --ignore=E402,W503 --show-source
21 |   - docker run test-image flake8 SafeRLBench --filename="__init__.py,test*.py" --ignore=F,E402,W503 --show-source
22 |   - docker run test-image pydocstyle SafeRLBench --match='(?!__init__).*\.py'
23 |   - docker run $ci_env test-image /bin/bash -c "nosetests --with-doctest --with-coverage --cover-package=SafeRLBench --verbosity=2 SafeRLBench ; bash <(curl -s https://codecov.io/bash)"
24 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Felix Berkenkamp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import logging
 4 | 
 5 | from .configuration import SRBConfig
 6 | 
 7 | # Initialize configuration
 8 | config = SRBConfig(logging.getLogger(__name__))
 9 | 
10 | from .monitor import AlgoMonitor, EnvMonitor
11 | from .base import EnvironmentBase, Space, AlgorithmBase, Policy, ProbPolicy
12 | from .bench import Bench, BenchConfig
13 | from . import algo
14 | from . import envs
15 | from . import policy
16 | from . import spaces
17 | from . import error
18 | from . import measure
19 | 
20 | # Add things to all
21 | __all__ = ['EnvironmentBase',
22 |            'Space',
23 |            'AlgorithmBase',
24 |            'Policy',
25 |            'ProbPolicy',
26 |            'AlgoMonitor',
27 |            'EnvMonitor',
28 |            'SRBConfig',
29 |            'Bench',
30 |            'BenchConfig',
31 |            'envs',
32 |            'algo',
33 |            'policy',
34 |            'spaces',
35 |            'measure',
36 |            'error']
37 | 
38 | 
39 | # Import test after __all__ (no documentation)
40 | # from numpy.testing import Tester
41 | # test = Tester().test
42 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/__init__.py:
--------------------------------------------------------------------------------
 1 | """Algorithm Module.
 2 | 
 3 | =================== =========================================
 4 | Algorithm
 5 | =============================================================
 6 | A3C                 Asynchronous Actor-Critic Agents
 7 | PolicyGradient      Different Policy Gradient Implementations
 8 | DiscreteQLearning   Q-Learning using a table
 9 | SafeOpt             Bayesian Optimization with SafeOpt
10 | SafeOptSwarm        Bayesion Optimization with SafeOptSwarm
11 | =================== =========================================
12 | """
13 | 
14 | from .policygradient import PolicyGradient
15 | from .safeopt import SafeOpt, SafeOptSwarm
16 | from .a3c import A3C
17 | from .q_learning import DiscreteQLearning
18 | 
19 | __all__ = ['PolicyGradient', 'SafeOpt', 'A3C', 'DiscreteQLearning',
20 |            'SafeOptSwarm']
21 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/algo/test.py:
--------------------------------------------------------------------------------
 1 | """Algorithm Tests."""
 2 | 
 3 | from SafeRLBench.algo import PolicyGradient, A3C
 4 | from SafeRLBench.envs import LinearCar
 5 | from .policygradient import CentralFDEstimator, estimators
 6 | 
 7 | from SafeRLBench.policy import NeuralNetwork
 8 | 
 9 | from unittest2 import TestCase
10 | from mock import MagicMock, Mock
11 | 
12 | 
13 | class TestPolicyGradient(TestCase):
14 |     """PolicyGradientTestClass."""
15 | 
16 |     def test_pg_init(self):
17 |         """Test: POLICYGRADIENT: initialization."""
18 |         env_mock = MagicMock()
19 |         pol_mock = Mock()
20 | 
21 |         for key, item in estimators.items():
22 |             pg = PolicyGradient(env_mock, pol_mock, estimator=key)
23 |             self.assertIsInstance(pg.estimator, item)
24 | 
25 |         pg = PolicyGradient(env_mock, pol_mock, estimator=CentralFDEstimator)
26 |         self.assertIsInstance(pg.estimator, CentralFDEstimator)
27 | 
28 |         self.assertRaises(ImportError, PolicyGradient,
29 |                           env_mock, pol_mock, CentralFDEstimator(env_mock))
30 | 
31 | 
32 | class TestA3C(TestCase):
33 |     """A3C Test Class."""
34 | 
35 |     def test_a3c_init(self):
36 |         """Test: A3C: initialization."""
37 |         a3c = A3C(LinearCar(), NeuralNetwork([2, 6, 1]))
38 | 
39 |         fields = ['environment', 'policy', 'max_it', 'num_workers', 'rate',
40 |                   'done', 'policy', 'p_net', 'v_net', 'workers', 'threads',
41 |                   'global_counter', 'sess']
42 | 
43 |         for field in fields:
44 |             assert hasattr(a3c, field)
45 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .general_mountaincar import GeneralMountainCar
 4 | from .linear_car import LinearCar
 5 | from .gym_wrap import GymWrap
 6 | from .quadrocopter import Quadrocopter
 7 | from .mdp import MDP
 8 | 
 9 | __all__ = [
10 |     'GeneralMountainCar',
11 |     'LinearCar',
12 |     'GymWrap',
13 |     'Quadrocopter',
14 |     'MDP'
15 | ]
16 | 
17 | # TODO: Envs: Add module docs in __init__ file.
18 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/envs/_quadrocopter/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | 
3 | from .quadrotor_dynamics import QuadrotorDynamics
4 | from .quadrocopter_classes import StateVector
5 | 
6 | __all__ = ['QuadrotorDynamics', 'StateVector']
7 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/policy/__init__.py:
--------------------------------------------------------------------------------
 1 | from .linear_policy import LinearPolicy, NoisyLinearPolicy
 2 | from .linear_policy import DiscreteLinearPolicy
 3 | from .neural_network import NeuralNetwork
 4 | from .controller import NonLinearQuadrocopterController
 5 | 
 6 | __all__ = [
 7 |     'LinearPolicy',
 8 |     'NoisyLinearPolicy',
 9 |     'DiscreteLinearPolicy',
10 |     'NeuralNetwork',
11 |     'NonLinearQuadrocopterController'
12 | ]
13 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function, absolute_import
2 | 
3 | from .rd_space import RdSpace
4 | from .bounded_space import BoundedSpace
5 | from .discrete_space import DiscreteSpace
6 | 
7 | __all__ = ['RdSpace', 'BoundedSpace', 'DiscreteSpace']
8 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/discrete_space.py:
--------------------------------------------------------------------------------
 1 | """Discrete space implementation."""
 2 | 
 3 | from SafeRLBench import Space
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | class DiscreteSpace(Space):
 9 |     """Discrete Space.
10 | 
11 |     Let d be the dimension of the space, then it will contain elements
12 |     {0, 1, ... , dim-1}.
13 | 
14 |     Examples
15 |     --------
16 |     Create a `DiscreteSpace` with three states:
17 |     >>> from SafeRLBench.spaces import DiscreteSpace
18 |     >>> discrete_space = DiscreteSpace(3)
19 |     """
20 | 
21 |     def __init__(self, dim):
22 |         """Initialize `DiscreteSpace`.
23 | 
24 |         Parameters
25 |         ----------
26 |         dim : int
27 |             Number of states.
28 |         """
29 |         assert dim > 0, ("If you need a discrete space without elements, you "
30 |                          + "do not need this class.")
31 |         self._dim = dim
32 | 
33 |     def contains(self, x):
34 |         """Check if element is part of the space."""
35 |         return (isinstance(x, int) and x >= 0 and x < self._dim)
36 | 
37 |     def sample(self):
38 |         """Sample an element of the space."""
39 |         return np.random.randint(self._dim)
40 | 
41 |     @property
42 |     def dimension(self):
43 |         """Return dimension of the space."""
44 |         return self._dim
45 | 
46 |     def __repr__(self):
47 |         return 'DiscreteSpace(dim=%d)' % self._dim
48 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/rd_space.py:
--------------------------------------------------------------------------------
 1 | """R^d with any shape."""
 2 | import numpy as np
 3 | from SafeRLBench import Space
 4 | 
 5 | 
 6 | class RdSpace(Space):
 7 |     """R^d Vectorspace."""
 8 | 
 9 |     def __init__(self, shape):
10 |         """Initialize with shape."""
11 |         self.shape = shape
12 |         self._dim = None
13 | 
14 |     def contains(self, x):
15 |         """Check if element is contained."""
16 |         return isinstance(x, np.ndarray) and x.shape == self.shape
17 | 
18 |     def sample(self):
19 |         """Return arbitrary element."""
20 |         return np.ones(self.shape)
21 | 
22 |     @property
23 |     def dimension(self):
24 |         """Return dimension of the space."""
25 |         if self._dim is None:
26 |             d = 1
27 |             for i in range(len(self.shape)):
28 |                 d *= self.shape[i]
29 |             self._dim = d
30 |         return self._dim
31 | 
32 |     def __repr__(self):
33 |         return 'RdSpace(shape=%s)' % str(self.shape)
34 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/SafeRLBench/spaces/test.py:
--------------------------------------------------------------------------------
 1 | """Tests for spaces module."""
 2 | from __future__ import absolute_import
 3 | 
 4 | from functools import partial
 5 | import inspect
 6 | 
 7 | from numpy import array
 8 | import SafeRLBench.spaces as spaces
 9 | 
10 | 
11 | """Dictionary storing initialization arguments for classes."""
12 | class_arguments = {
13 |     spaces.BoundedSpace: [array([-1, -2]), array([1, 0])],
14 |     spaces.RdSpace: [(3, 2)],
15 |     spaces.DiscreteSpace: [5]
16 | }
17 | 
18 | 
19 | class TestSpaces(object):
20 |     """Wrap spaces tests."""
21 | 
22 |     classes = []
23 | 
24 |     @classmethod
25 |     def setUpClass(cls):
26 |         """Initialize classes list."""
27 |         for name, c in inspect.getmembers(spaces):
28 |             if inspect.isclass(c):
29 |                 cls.classes.append(c)
30 | 
31 |     def exhaustive_tests(self):
32 |         """Check: Spaces tests initial values for testing."""
33 |         for c in self.classes:
34 |             if c not in class_arguments:
35 |                 assert(False)
36 | 
37 |     def generate_tests(self):
38 |         """Generate tests for spaces implementations."""
39 |         for c in self.classes:
40 |             if c in class_arguments:
41 |                 check = partial(self.check_contains)
42 |                 check.description = ('Test: ' + c.__name__.upper()
43 |                                      + ': implementation.')
44 |                 yield check, c
45 | 
46 |     def check_contains(self, c):
47 |         """Check if contains and element is implemented."""
48 |         space = c(*class_arguments[c])
49 |         try:
50 |             x = space.sample()
51 |             b = space.contains(x)
52 |         except NotImplementedError:
53 |             assert(False)
54 |         assert(b)
55 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SafeRLBench
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/algorithm.rst:
--------------------------------------------------------------------------------
1 | Algorithms
2 | ==========
3 | 
4 | .. include:: ../SafeRLBench/algo/README.rst
5 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/algo.rst:
--------------------------------------------------------------------------------
 1 | Algorithm Module
 2 | ================
 3 | 
 4 | This module contains implementations of different algorithms. Please refer to
 5 | the class documentation for detailed instructions on how to use them.
 6 | 
 7 | .. contents:: Contents
 8 |   :local:
 9 | 
10 | AlgorithmBase
11 | -------------
12 | 
13 | .. autoclass:: SafeRLBench.AlgorithmBase
14 |   :members:
15 | 
16 | A3C
17 | ---
18 | 
19 | .. autoclass:: SafeRLBench.algo.A3C
20 |   :members:
21 | 
22 | Policy Gradient
23 | ---------------
24 | 
25 | .. autoclass:: SafeRLBench.algo.PolicyGradient
26 |   :members:
27 | 
28 | Q-Learning
29 | ----------
30 | 
31 | .. autoclass:: SafeRLBench.algo.DiscreteQLearning
32 |   :members:
33 | 
34 | SafeOpt
35 | -------
36 | 
37 | .. autoclass:: SafeRLBench.algo.SafeOpt
38 |   :members:
39 | 
40 | SafeOptSwarm
41 | ------------
42 | 
43 | .. autoclass:: SafeRLBench.algo.SafeOptSwarm
44 |   :members:
45 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/bench.rst:
--------------------------------------------------------------------------------
 1 | Benchmark
 2 | =========
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Bench
 8 | -----
 9 | 
10 | .. autoclass:: SafeRLBench.Bench
11 |   :members:
12 | 
13 | BenchConfig
14 | -----------
15 | 
16 | .. autoclass:: SafeRLBench.BenchConfig
17 |   :members:
18 | 
19 | BenchRun
20 | --------
21 | 
22 | .. autoclass:: SafeRLBench.bench.BenchRun
23 |   :members:
24 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/envs.rst:
--------------------------------------------------------------------------------
 1 | Environment Module
 2 | ==================
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | EnvironmentBase
 8 | ---------------
 9 | 
10 | .. autoclass:: SafeRLBench.EnvironmentBase
11 |   :members:
12 | 
13 | GeneralMountainCar
14 | ------------------
15 | 
16 | .. autoclass:: SafeRLBench.envs.GeneralMountainCar
17 |   :members:
18 | 
19 | GymWrap
20 | -------
21 | 
22 | .. autoclass:: SafeRLBench.envs.GymWrap
23 |   :members:
24 | 
25 | LinearCar
26 | ---------
27 | 
28 | .. autoclass:: SafeRLBench.envs.LinearCar
29 |   :members:
30 | 
31 | MDP
32 | ---
33 | 
34 | .. autoclass:: SafeRLBench.envs.MDP
35 |   :members:
36 | 
37 | Quadrocopter
38 | ------------
39 | 
40 | .. autoclass:: SafeRLBench.envs.Quadrocopter
41 |   :members:
42 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/measure.rst:
--------------------------------------------------------------------------------
 1 | Measure Module
 2 | ==============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Measure
 8 | -------
 9 | 
10 | .. autoclass:: SafeRLBench.measure.Measure
11 |   :members:
12 | 
13 | BestPerformance
14 | ---------------
15 | 
16 | .. autoclass:: SafeRLBench.measure.BestPerformance
17 |   :members:
18 | 
19 | SafetyMeasure
20 | -------------
21 | 
22 | .. autoclass:: SafeRLBench.measure.SafetyMeasure
23 |   :members:
24 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/misc.rst:
--------------------------------------------------------------------------------
 1 | Miscellaneous
 2 | =============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Configuration
 8 | -------------
 9 | 
10 | .. autoclass:: SafeRLBench.SRBConfig
11 |   :members:
12 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/policy.rst:
--------------------------------------------------------------------------------
 1 | Policy Module
 2 | =============
 3 | 
 4 | .. contents::
 5 |   :local:
 6 | 
 7 | Bases
 8 | -----
 9 | 
10 | Deterministic Policy Base
11 | ~~~~~~~~~~~~~~~~~~~~~~~~~
12 | 
13 | .. autoclass:: SafeRLBench.Policy
14 |   :members:
15 | 
16 | Probabilistic Policy Base
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~
18 | 
19 | .. autoclass:: SafeRLBench.ProbPolicy
20 |   :members:
21 | 
22 | Linear Policies
23 | ---------------
24 | 
25 | LinearPolicy
26 | ~~~~~~~~~~~~
27 | 
28 | .. autoclass:: SafeRLBench.policy.LinearPolicy
29 |   :members:
30 | 
31 | DiscreteLinearPolicy
32 | ~~~~~~~~~~~~~~~~~~~~
33 | 
34 | .. autoclass:: SafeRLBench.policy.DiscreteLinearPolicy
35 |   :members:
36 | 
37 | NoisyLinearPolicy
38 | ~~~~~~~~~~~~~~~~~
39 | 
40 | .. autoclass:: SafeRLBench.policy.NoisyLinearPolicy
41 |   :members:
42 | 
43 | NonLinearQuadrocopterController
44 | -------------------------------
45 | 
46 | .. autoclass:: SafeRLBench.policy.NonLinearQuadrocopterController
47 |   :members:
48 | 
49 | NeuralNetwork
50 | -------------
51 | 
52 | .. autoclass:: SafeRLBench.policy.NeuralNetwork
53 |   :members:
54 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/spaces.rst:
--------------------------------------------------------------------------------
 1 | Spaces Module
 2 | =============
 3 | 
 4 | .. contents:: Contents
 5 |   :local:
 6 | 
 7 | Space
 8 | -----
 9 | 
10 | .. autoclass:: SafeRLBench.Space
11 |   :members:
12 | 
13 | BoundedSpace
14 | ------------
15 | 
16 | .. autoclass:: SafeRLBench.spaces.BoundedSpace
17 |   :members:
18 | 
19 | DiscreteSpace
20 | -------------
21 | 
22 | .. autoclass:: SafeRLBench.spaces.DiscreteSpace
23 |   :members:
24 | 
25 | RdSpace
26 | -------
27 | 
28 | .. autoclass:: SafeRLBench.spaces.RdSpace
29 |   :members:
30 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/api/srb.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. toctree::
 5 | 
 6 |   algo
 7 |   envs
 8 |   policy
 9 |   spaces
10 |   measure
11 |   bench
12 |   misc
13 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/environment.rst:
--------------------------------------------------------------------------------
1 | Environments
2 | ============
3 | 
4 | .. include:: ../SafeRLBench/envs/README.rst
5 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. SafeRLBench documentation master file, created by
2 |    sphinx-quickstart on Mon Mar 27 16:08:01 2017.
3 |    You can adapt this file completely to your liking, but it should at least
4 |    contain the root `toctree` directive.
5 | 
6 | .. include:: ../README.rst
7 | 
8 | .. include:: toc.rst
9 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/docs/toc.rst:
--------------------------------------------------------------------------------
 1 | Content
 2 | =======
 3 | 
 4 | .. toctree::
 5 |   :maxdepth: 2
 6 | 
 7 |   algorithm
 8 |   environment
 9 |   api/srb
10 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python2:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda
 2 | 
 3 | ENV TF_CPP_MIN_LOG_LEVEL=2
 4 | 
 5 | # Install build essentials and clean up
 6 | RUN apt-get update --quiet \
 7 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 8 |   && apt-get clean
 9 | 
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 |   && apt-get clean
13 | 
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 |   && conda install python=2.7 pip numpy scipy nose --yes --quiet \
17 |   && conda clean --yes --all \
18 |   && hash -r
19 | 
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 | 
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 |     && rm -rf /root/.cache
26 | 
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 |   && rm -rf /root/.cache
30 | 
31 | # Install extra python2 requirements
32 | RUN pip --no-cache-dir install futures multiprocessing \
33 |   && rm -rf /root/.cache
34 | 
35 | # Install SafeOpt
36 | RUN git clone https://github.com/befelix/SafeOpt.git \
37 |   && cd SafeOpt \
38 |   && python setup.py install \
39 |   && rm -rf /SafeOpt
40 | 
41 | # Copy the main code
42 | COPY . /code
43 | RUN cd /code && python setup.py develop
44 | 
45 | WORKDIR /code
46 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/misc/Dockerfile.python3:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | ENV TF_CPP_MIN_LOG_LEVEL=2
 4 | 
 5 | # Install build essentials and clean up
 6 | RUN apt-get update --quiet \
 7 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 8 |   && apt-get clean
 9 | 
10 | # Fix matlab issues.
11 | RUN apt-get install -y --quiet libfreetype6-dev pkg-config libpng12-dev \
12 |   && apt-get clean
13 | 
14 | # Update conda, install packages, and clean up
15 | RUN conda update conda --yes --quiet \
16 |   && conda install python=3.5 pip numpy scipy nose --yes --quiet \
17 |   && conda clean --yes --all \
18 |   && hash -r
19 | 
20 | # Get the requirements files (seperate from the main body)
21 | COPY requirements.txt requirements_dev.txt /code/
22 | 
23 | # Install requirements and clean up
24 | RUN pip --no-cache-dir install -r code/requirements.txt \
25 |     && rm -rf /root/.cache
26 | 
27 | # Install dev requirements and clean up
28 | RUN pip --no-cache-dir install -r code/requirements_dev.txt \
29 |   && rm -rf /root/.cache
30 | 
31 | # Install SafeOpt
32 | RUN git clone https://github.com/befelix/SafeOpt.git \
33 |   && cd SafeOpt \
34 |   && python setup.py install \
35 |   && rm -rf /SafeOpt
36 | 
37 | # Copy the main code
38 | COPY . /code
39 | RUN cd /code && python setup.py develop
40 | 
41 | WORKDIR /code
42 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.7
2 | scipy >= 0.19.0
3 | six >= 1.10
4 | futures >= 3.0.5
5 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | gym >= 0.8.0
 2 | tensorflow >= 1.0.0
 3 | GPy >= 1.6.1
 4 | 
 5 | # Style testing
 6 | flake8 >= 3.3.0
 7 | pep8 >= 1.7.0
 8 | pep8-naming >= 0.4.1
 9 | pydocstyle >= 1.1.1
10 | 
11 | # Unittesting
12 | nose >= 1.3.7
13 | nose-exclude >= 0.5.0
14 | coverage >= 4.3.4
15 | unittest2 >= 1.1.0
16 | mock >= 2.0.0
17 | 
18 | # Documentation
19 | sphinx >= 1.5.3
20 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='SafeRLBench',
 5 |     version='1.0.1',
 6 |     author='Nicolas Ochsner',
 7 |     author_email='ochsnern@student.ethz.ch',
 8 |     packages=[
 9 |         'SafeRLBench',
10 |         'SafeRLBench.algo',
11 |         'SafeRLBench.envs',
12 |         'SafeRLBench.spaces',
13 |         'SafeRLBench.policy',
14 |     ],
15 |     description='Safe Reinforcement Learning Benchmark',
16 |     keywords='reinforcement-learning benchmark',
17 |     url='https://github.com/befelix/Safe-RL-Benchmark',
18 |     install_requires=[
19 |         'numpy >= 1.7',
20 |         'scipy >= 0.19.0',
21 |         'six >= 1.10',
22 |         'futures >= 3.0.5;python_version<"3.2"'
23 |     ],
24 |     extras_require={
25 |         'gym': ['gym >= 0.8.0'],
26 |         'safeopt': ['GPy >= 1.6.1', 'safeopt >= 0.1'],
27 |         'neural': ['tensorflow >= 1.0.0'],
28 |     },
29 |     dependency_links=[
30 |         'git+https://github.com/befelix/SafeOpt/tarball/master#egg=safeopt-0.1'
31 |     ],
32 | )
33 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/test_code.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | module="SafeRLBench"
 4 | 
 5 | get_script_dir () {
 6 |      SOURCE="${BASH_SOURCE[0]}"
 7 |      # While $SOURCE is a symlink, resolve it
 8 |      while [ -h "$SOURCE" ]; do
 9 |           DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 |           SOURCE="$( readlink "$SOURCE" )"
11 |           # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 |           [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 |      done
14 |      DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 |      echo "$DIR"
16 | }
17 | 
18 | # tensorflow environment variable
19 | export TF_CPP_MIN_LOG_LEVEL='3'
20 | 
21 | # Change to script root
22 | cd $(get_script_dir)
23 | GREEN='\033[0;32m'
24 | NC='\033[0m'
25 | 
26 | BOLD=$(tput bold)
27 | NORMAL=$(tput sgr0)
28 | 
29 | # Run style tests
30 | echo -e "${GREEN}${BOLD}Running style tests:${NC}"
31 | flake8 $module --exclude test*.py,__init__.py,_quadrocopter --show-source
32 | 
33 | # Ignore import errors for __init__ and tests
34 | flake8 $module --filename=__init__.py,test*.py --ignore=F --show-source
35 | 
36 | echo -e "${GREEN}${BOLD}Testing docstring conventions:${NC}"
37 | # Test docstring conventions
38 | pydocstyle $module --match='(?!__init__).*\.py' 2>&1 | grep -v "WARNING: __all__"
39 | 
40 | echo -e "${GREEN}${BOLD}Running unit tests in current environment.${NC}"
41 | nosetests -v --with-doctest --with-coverage --cover-erase --cover-package=$module $module 2>&1 | grep -v "^Level "
42 | 
43 | # Export html
44 | coverage html
45 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe-RL-Benchmark/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py35
 3 | 
 4 | [testenv]
 5 | deps =
 6 |   nose
 7 |   numpy
 8 |   theano
 9 |   mock
10 |   unittest2
11 | commands = nosetests
12 | 
13 | [flake8]
14 | ignore = E402,W503,D105,D413
15 | exclude =
16 |   SafeRLBench/envs/_quadrocopter*
17 | 
18 | [pydocstyle]
19 | add_ignore = D203,D105,D413
20 | match_dir = '[^\.\_].*'
21 | 
22 | [coverage:run]
23 | omit =
24 |   */_quadrocopter*
25 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/README.md:
--------------------------------------------------------------------------------
1 | ## Description
2 | Codes for the constrained Linear-Quadratic Regulator (LQR) experiment.
3 | ## Reference
4 | Ming Yu, Zhuoran Yang, Mladen Kolar, and Zhaoran Wang. Convergent Policy Optimization for Safe Reinforcement Learning. In NeurIPS 2019.
5 | ## Run codes
6 | Run "Safe_RL_LQR_experiment.m"
7 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/iterate_calculate.m:
--------------------------------------------------------------------------------
 1 | function X = iterate_calculate( Init, M, N )
 2 | % this function iteratively solve for the following equation for X:
 3 | % X = M + N'*X*N
 4 | % starting from Init
 5 | 
 6 | X = Init; diff = 1; iter = 0;
 7 | while diff > 1e-3
 8 |     iter = iter + 1;
 9 |     X_old = X;
10 |     X = M + N'*X*N;
11 |     diff = norm(X_old - X);
12 | end
13 | 
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Safe_reinforcement_learning/poster.pdf


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadconstr.m:
--------------------------------------------------------------------------------
 1 | function [y,yeq,grady,gradyeq] = quadconstr(x,H,k,d)
 2 | jj = length(H); % jj is the number of inequality constraints
 3 | y = zeros(1,jj);
 4 | for i = 1:jj
 5 |     y(i) = 1/2*x'*H{i}*x + k{i}'*x + d{i};
 6 | end
 7 | yeq = [];
 8 |     
 9 | if nargout > 2
10 |     grady = zeros(length(x),jj);
11 |     for i = 1:jj
12 |         grady(:,i) = H{i}*x + k{i};
13 |     end
14 | end
15 | gradyeq = [];


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadhess.m:
--------------------------------------------------------------------------------
1 | function hess = quadhess(x,lambda,Q,H)
2 | hess = Q;
3 | jj = length(H); % jj is the number of inequality constraints
4 | for i = 1:jj
5 |     hess = hess + lambda.ineqnonlin(i)*H{i};
6 | end


--------------------------------------------------------------------------------
/Safe-RL/Safe_reinforcement_learning/quadobj.m:
--------------------------------------------------------------------------------
1 | function [y,grady] = quadobj(x,Q,f,c)
2 | y = 1/2*x'*Q*x + f'*x + c;
3 | if nargout > 1
4 |     grady = Q*x + f;
5 | end


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/.gitignore:
--------------------------------------------------------------------------------
 1 | ## Julia ignores ##
 2 | 
 3 | # Files generated by invoking Julia with --code-coverage
 4 | *.jl.cov
 5 | *.jl.*.cov
 6 | 
 7 | # Files generated by invoking Julia with --track-allocation
 8 | *.jl.mem
 9 | 
10 | # System-specific files and directories generated by the BinaryProvider and BinDeps packages
11 | # They contain absolute paths specific to the host computer, and so should not be committed
12 | deps/deps.jl
13 | deps/build.log
14 | deps/downloads/
15 | deps/usr/
16 | deps/src/
17 | 
18 | # Build artifacts for creating documentation generated by the Documenter package
19 | docs/build/
20 | docs/site/
21 | 
22 | # File generated by Pkg, the package manager, based on a corresponding Project.toml
23 | # It records a fixed state of all packages used by the project. As such, it should not be
24 | # committed for packages, but should be committed for applications that require a static
25 | # environment.
26 | 
27 | # Manifest.toml
28 | 
29 | 
30 | ## C ##
31 | 
32 | *.o
33 | *.so
34 | 
35 | 
36 | ## Additional Ignores ##
37 | 
38 | # Don't want to commit changes to these
39 | shield_dump.c
40 | 
41 | # My sync program keeps pooping in the corners
42 | *.insyncdl
43 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/Project.toml:
--------------------------------------------------------------------------------
 1 | [deps]
 2 | ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 3 | CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 4 | Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
 5 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 6 | Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 7 | Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
 8 | GridShielding = "d6812381-bd27-4ab8-a35f-a1c7ba1f8c22"
 9 | HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
10 | InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
12 | LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
13 | Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
14 | MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
15 | Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
16 | NaturalSort = "c020b1a1-e9b0-503a-9c33-f039bfc54a85"
17 | PProf = "e4faabce-9ead-11e9-39d9-4379958e3056"
18 | Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
19 | Pluto = "c3e4b0f8-55cb-11ea-2926-15256bba5781"
20 | PlutoLinks = "0ff47ea0-7a50-410d-8455-4348d5de0420"
21 | PlutoSerialization = "89dfed0f-77d6-439b-aaac-839db4b25fb8"
22 | PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
23 | Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
24 | ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
25 | Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
26 | ReachabilityAnalysis = "1e97bd63-91d1-579d-8e8d-501d2b57c93f"
27 | Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
28 | StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
29 | StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
30 | Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7"
31 | Unzip = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
32 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/Shared Code/PlotsDefaults.jl:
--------------------------------------------------------------------------------
1 | # This may be the shortest code-file I've ever authored.
2 | Plots.default(fontfamily="serif-roman") 
3 | 
4 | halfpage = 300
5 | 
6 | squeezed = (halfpage, 150)
7 | regular = (halfpage, 220)
8 | tall = (halfpage, 400)


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/Blueprints/TrainSaveEvaluateSingle.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded
11 | 
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded
14 | 
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBGranularityCost/ExtractQueryResults.jl:
--------------------------------------------------------------------------------
 1 | struct UppaalQueryFailedException <: Exception
 2 |     message::AbstractString
 3 | end
 4 | 
 5 | function extract_query_results(query_results::AbstractString)
 6 |     results = []
 7 |     open(query_results) do file
 8 |         for line in eachline(file)
 9 |             m_mean = match(r"mean=([\d.e-]+)", line)
10 |             aborted = occursin(r"EXCEPTION: |is time-locked.|-- Aborted.", line)
11 | 
12 |             if aborted
13 |                 throw(UppaalQueryFailedException(line))
14 |             end
15 | 
16 |             if m_mean === nothing
17 |                 continue
18 |             end
19 | 
20 |             push!(results, m_mean[1])
21 |         end
22 |     end
23 | 
24 |     results
25 | end


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldRobustness/StatisticalChecking.jl:
--------------------------------------------------------------------------------
 1 | function evaluate_safety(mechanics, policy, number_of_runs;
 2 |         run_duration=120,
 3 |         min_v_on_impact=1,
 4 |         unlucky=false)
 5 |     
 6 |     safety_violations_observed = 0
 7 |     unsafe_trace = []
 8 |     rand_step = eps()
 9 | 
10 |     for run in 1:number_of_runs
11 |         v, p = 0, rand(7:rand_step:10)
12 |         # Simulate the ball for run_duration seconds
13 |         vs, ps, ts = simulate_sequence(mechanics, v, p, policy, run_duration,
14 |             min_v_on_impact=min_v_on_impact,
15 |             unlucky=unlucky)
16 |         # See if it ends at v=0, p=0
17 |         if last(vs) == 0 && last(ps) == 0
18 |             safety_violations_observed += 1
19 |         end
20 |     end
21 |     (; safety_violations_observed, number_of_runs)
22 | end
23 | 
24 | # It does not choose a random policy. It returns a policy that acts randomly.
25 | function random_policy(hit_chance)
26 | 	return (v, p) -> 
27 | 		if rand(0:eps():1) <= hit_chance
28 | 			"hit"
29 | 		else
30 | 			"nohit"
31 | 		end
32 | end
33 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PostShielded.q:
--------------------------------------------------------------------------------
 1 | //Load a strategy using  deterrence in {1000, 100, 10, 0}, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy Deterrence1000 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence1000.strategy.json")
 5 | 
 6 | /* formula 2 */
 7 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence1000
 8 | 
 9 | /* formula 3 */
10 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence1000
11 | 
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:interventions) under Deterrence1000
14 | 
15 | /* formula 5 */
16 | strategy Deterrence100 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence100.strategy.json")
17 | 
18 | /* formula 6 */
19 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence100
20 | 
21 | /* formula 7 */
22 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence100
23 | 
24 | /* formula 8 */
25 | E[<=120;%checks%] (max:interventions) under Deterrence100
26 | 
27 | /* formula 9 */
28 | strategy Deterrence10 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence10.strategy.json")
29 | 
30 | /* formula 10 */
31 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence10
32 | 
33 | /* formula 11 */
34 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence10
35 | 
36 | /* formula 12 */
37 | E[<=120;%checks%] (max:interventions) under Deterrence10
38 | 
39 | /* formula 13 */
40 | strategy Deterrence0 = loadStrategy {} -> {p, v}("%resultsdir%/Deterrence0.strategy.json")
41 | 
42 | /* formula 14 */
43 | E[<=120;%checks%] (max:LearnerPlayer.fired) under Deterrence0
44 | 
45 | /* formula 15 */
46 | E[<=120;%checks%] (max:(number_deaths > 0)) under Deterrence0
47 | 
48 | /* formula 16 */
49 | E[<=120;%checks%] (max:interventions) under Deterrence0
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | E[<=120;%checks%] (max:LearnerPlayer.fired) under PreShielded
11 | 
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:(number_deaths > 0)) under PreShielded
14 | 
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
 1 | //Evaluate the queries with no strategy applied
 2 | 
 3 | /* formula 2 */
 4 | E[<=120;%checks%] (max:LearnerPlayer.fired)
 5 | 
 6 | /* formula 3 */
 7 | E[<=120;%checks%] (max:(number_deaths > 0))
 8 | 
 9 | /* formula 4 */
10 | E[<=120;%checks%] (max:interventions)
11 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BBShieldingResultsGroup/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-BarbaricMethodAccuracy/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeCostEvaluate.q:
--------------------------------------------------------------------------------
 1 | 
 2 | /* formula 1 */
 3 | strategy MinCost = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
 4 | 
 5 | /* formula 2 */
 6 | saveStrategy("%resultsdir%/MinCost.strategy.json", MinCost)
 7 | 
 8 | /* formula 3 */
 9 | E[<=120;%checks%] (max: D/1000)                          under MinCost
10 | 
11 | /* formula 4 */
12 | E[<=120;%checks%] (max:(rDistance <= 0))                 under MinCost
13 | 
14 | /* formula 5 */
15 | E[<=120;%checks%] (max: interventions)                     under MinCost
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/MinimizeInterventionsEvaluate.q:
--------------------------------------------------------------------------------
 1 | 
 2 | /* formula 1 */
 3 | strategy MinInterventions = minE (interventions) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
 4 | 
 5 | /* formula 2 */
 6 | saveStrategy("%resultsdir%/MinInterventions.strategy.json", MinInterventions)
 7 | 
 8 | /* formula 3 */
 9 | E[<=120;%checks%] (max: D/1000)                          under MinInterventions
10 | 
11 | /* formula 4 */
12 | E[<=120;%checks%] (max:(rDistance <= 0))                 under MinInterventions
13 | 
14 | /* formula 5 */
15 | E[<=120;%checks%] (max: interventions)                   under MinInterventions
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/NoStrategyEvaluate.q:
--------------------------------------------------------------------------------
 1 | //Evaluate the queries with no strategy applied
 2 | 
 3 | /* formula 1 */
 4 | E[<=120;%checks%] (max: D/1000)
 5 | 
 6 | /* formula 2 */
 7 | E[<=120;%checks%] (max:(rDistance <= 0))
 8 | 
 9 | /* formula 3 */
10 | E[<=120;%checks%] (max:interventions)
11 | 
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Blueprints/TrainSaveEvaluateSingle.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | // HACK: Since this query file is only used for PreShield, haven't implemented a way to count interventions. It will be zero, because I need a number to be printed.
 3 | 
 4 | /* formula 1 */
 5 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
 6 | 
 7 | /* formula 2 */
 8 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 9 | 
10 | /* formula 3 */
11 | E[<=120;%checks%] (max: D/1000)                           under PreShielded
12 | 
13 | /* formula 4 */
14 | E[<=120;%checks%] (max:(rDistance <= 0))                  under PreShielded
15 | 
16 | /* formula 5 */
17 | E[<=120;2] (max: 0)
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-CCShieldingResultsGroup/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0) under PreShielded
11 | 
12 | /* formula 4 */
13 | E[<=120;%checks%] (max:number_deaths > 0) under PreShielded
14 | 
15 | /* formula 5 */
16 | E[<=120;%checks%] (max:interventions) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DCShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
 1 | //Evaluate the queries with no strategy applied
 2 | 
 3 | /* formula 2 */
 4 | E[<=120;%checks%] (max:Monitor.dist + switches*1.0)
 5 | 
 6 | /* formula 3 */
 7 | E[<=120;%checks%] (max:number_deaths > 0)
 8 | 
 9 | /* formula 4 */
10 | E[<=120;%checks%] (max:interventions)
11 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-DifferenceRigorousBarbaric/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-NoRecovery/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-OPShieldingResultsGroup/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PostShielded.q:
--------------------------------------------------------------------------------
 1 | //Load a strategy using  deterrence in {1000, 100, 10, 0}, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy Deterrence1000 = loadStrategy {} -> {x, t}  ("%resultsdir%/Deterrence1000.strategy.json")
 5 | 
 6 | /* formula 2 */
 7 | E[#<=30;%checks%] (max:total_cost) under Deterrence1000
 8 | 
 9 | /* formula 3 */
10 | E[#<=30;%checks%00] (max:t>1) under Deterrence1000
11 | 
12 | /* formula 4 */
13 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence1000
14 | 
15 | /* formula 5 */
16 | strategy Deterrence100 = loadStrategy {} -> {x, t}  ("%resultsdir%/Deterrence100.strategy.json")
17 | 
18 | /* formula 6 */
19 | E[#<=30;%checks%] (max:total_cost) under Deterrence100
20 | 
21 | /* formula 7 */
22 | E[#<=30;%checks%00] (max:t>1) under Deterrence100
23 | 
24 | /* formula 8 */
25 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence100
26 | 
27 | /* formula 9 */
28 | strategy Deterrence10 = loadStrategy {} -> {x, t}  ("%resultsdir%/Deterrence10.strategy.json")
29 | 
30 | /* formula 10 */
31 | E[#<=30;%checks%] (max:total_cost) under Deterrence10
32 | 
33 | /* formula 11 */
34 | E[#<=30;%checks%00] (max:t>1) under Deterrence10
35 | 
36 | /* formula 12 */
37 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence10
38 | 
39 | 
40 | /* formula 13 */
41 | strategy Deterrence0 = loadStrategy {} -> {x, t}  ("%resultsdir%/Deterrence10.strategy.json")
42 | 
43 | /* formula 14 */
44 | E[#<=30;%checks%] (max:total_cost) under Deterrence0
45 | 
46 | /* formula 15 */
47 | E[#<=30;%checks%00] (max:t>1) under Deterrence0
48 | 
49 | /* formula 16 */
50 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under Deterrence0
51 | 
52 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/PreShielded.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | E[#<=30;%checks%] (max:total_cost) under PreShielded
11 | 
12 | /* formula 4 */
13 | E[#<=30;%checks%00] (max:t>1) under PreShielded
14 | 
15 | /* formula 5 */
16 | E[#<=30;%checks%] (max:100*interventions/(steps || 1)) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Blueprints/ShieldedLayabout.q:
--------------------------------------------------------------------------------
 1 | //Evaluate the queries with no strategy applied
 2 | 
 3 | /* formula 1 */
 4 | E[#<=30;%checks%] (max:total_cost)
 5 | 
 6 | /* formula 2 */
 7 | E[#<=30;%checks%00] (max:t>1)
 8 | 
 9 | /* formula 3 */
10 | E[#<=30;%checks%] (max:100*interventions/(steps || 1))
11 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/fig-RWShieldingResultsGroup/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then check its safety.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (LearnerPlayer.fired) [<=120] {} -> {p, v}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | Pr[<=120] (<> number_deaths > 0) under PreShielded
11 | 
12 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-BBSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # Synthesize and Test Shields
 2 | 
 3 | Synthesize shields and test their safety against different random agent. 
 4 | "Shield" is used as shorthand for a nondeterministic strategy that can be used to shield a learning agent or another strategy.
 5 | 
 6 | A random agent is defined by it's `hit_chance` such that it will choose randomly between actions `(hit, nohit)` with probabilities `(1-hit_chance, hit_chance)`. 
 7 | 
 8 | Shields are synthesised using either a "barbaric" or "rigorous" reachability method. 
 9 | The rigorous method makes use of the library `ReachabilityAnalysis.jl` to over-approximate possible outcomes of the system. This gives theoretical guarantees for the safety, at the cost of more compute time and a less optimistic shield. 
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 
11 | 
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "Run Experiment.jl"` from within this folder. 
13 | 
14 | Some of files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 
15 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (D/1000) [<=120] {} -> {rVelocityEgo, rVelocityFront, rDistance}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | Pr[<=120] (<> rDistance <= 0)         under PreShielded
11 | 
12 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-CCSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # Synthesize and Test Cruise Control Shields
 2 | 
 3 | Synthesize shields and test their safety on a random agent.
 4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy.
 5 | 
 6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used. 
 7 | It is the random agent with uniform chance of picking any action. 
 8 | 
 9 | Shields are synthesised using the "barbaric" reachability method only.
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 
11 | 
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder. 
13 | 
14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code  found in `Shared Code`. 
15 | 
16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 
17 | 


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-DCSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE(Monitor.dist + switches*1.0) [<=120] {Converter.location} -> {x1, x2}: <> time >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | Pr[<=120] (<> number_deaths > 0) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-OPSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then check its safety.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (aov) [<=120] {p} -> {t, v}: <> elapsed >= 120
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | Pr[<=120] (<>(number_deaths > 0)) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Blueprints/TrainSaveCheckSafety.q:
--------------------------------------------------------------------------------
 1 | // Train a single strategy, save it, then evaluate it.
 2 | 
 3 | /* formula 1 */
 4 | strategy PreShielded = minE (total_cost) [#<=30] {} -> {x, t} : <> x>=1 or t>=1
 5 | 
 6 | /* formula 2 */
 7 | saveStrategy("%resultsdir%/PreShielded.strategy.json", PreShielded)
 8 | 
 9 | /* formula 3 */
10 | Pr[#<=30] (<> t>1) under PreShielded


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/Example.png


--------------------------------------------------------------------------------
/Safe-RL/Shield-Hybrid-Systems/tab-RWSynthesis/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # Synthesize and Test Cruise Control Shields
 2 | 
 3 | Synthesize shields and test their safety on a random agent.
 4 | By shields I mean a nondeterministic strategy that can be used to shield a learning agent or another strategy.
 5 | 
 6 | As opposed to the similar Bouncing Ball experiment, only one random agent will be used. 
 7 | It is the random agent with uniform chance of picking any action. 
 8 | 
 9 | Shields are synthesised using the "barbaric" reachability method only.
10 | The barbaric method makes use of a sampling-based method to under-approximate the possible outcomes of the system. This is a quick-and-dirty solution to the reachability problem, and will be tested here is whether it works in practice. 
11 | 
12 | Everything is tied together in the file `Run Experiment.jl`. Run as `julia "tab-CCSynthesis/Run Experiment.jl"` from within the ReproducibilityPackage folder. 
13 | 
14 | It makes use of files `CC Synthesize Set of Shields.jl` and `CC Statistical Checking of Shield.jl` which in turn depend on code  found in `Shared Code`. 
15 | 
16 | The files are Pluto Notebooks, which by their nature are also valid standalone julia scripts. 
17 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/.gitignore:
--------------------------------------------------------------------------------
 1 | *.*~
 2 | __pycache__/
 3 | *.pkl
 4 | **/*.egg-info
 5 | .python-version
 6 | .idea/
 7 | .vscode/
 8 | .DS_Store
 9 | _build/
10 | data/*ppo*
11 | *.pickle
12 | .ipynb_checkpoints/
13 | *.ckpt
14 | #*.png
15 | *.pt
16 | */simple_save/*


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/__init__.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.util import deprecation as deprecation
2 | deprecation._PRINT_DEPRECATION_WARNINGS = False
3 | 
4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo
5 | from safe_rl.sac.sac import sac


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/pg/trust_region.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from safe_rl.pg.utils import EPS
 4 | 
 5 | 
 6 | """
 7 | Tensorflow utilities for trust region optimization
 8 | """
 9 | 
10 | def flat_concat(xs):
11 |     return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0)
12 | 
13 | def flat_grad(f, params):
14 |     return flat_concat(tf.gradients(xs=params, ys=f))
15 | 
16 | def hessian_vector_product(f, params):
17 |     # for H = grad**2 f, compute Hx
18 |     g = flat_grad(f, params)
19 |     x = tf.placeholder(tf.float32, shape=g.shape)
20 |     return x, flat_grad(tf.reduce_sum(g*x), params)
21 | 
22 | def assign_params_from_flat(x, params):
23 |     flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars
24 |     splits = tf.split(x, [flat_size(p) for p in params])
25 |     new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)]
26 |     return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)])
27 | 
28 | 
29 | """
30 | Conjugate gradient
31 | """
32 | 
33 | def cg(Ax, b, cg_iters=10):
34 |     x = np.zeros_like(b)
35 |     r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start.
36 |     p = r.copy()
37 |     r_dot_old = np.dot(r,r)
38 |     for _ in range(cg_iters):
39 |         z = Ax(p)
40 |         alpha = r_dot_old / (np.dot(p, z) + EPS)
41 |         x += alpha * p
42 |         r -= alpha * z
43 |         r_dot_new = np.dot(r,r)
44 |         p = r + (r_dot_new / r_dot_old) * p
45 |         r_dot_old = r_dot_new
46 |     return x


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/pg/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.signal
 3 | 
 4 | EPS = 1e-8
 5 | 
 6 | def combined_shape(length, shape=None):
 7 |     if shape is None:
 8 |         return (length,)
 9 |     return (length, shape) if np.isscalar(shape) else (length, *shape)
10 | 
11 | def keys_as_sorted_list(dict):
12 |     return sorted(list(dict.keys()))
13 | 
14 | def values_as_sorted_list(dict):
15 |     return [dict[k] for k in keys_as_sorted_list(dict)]
16 | 
17 | def discount_cumsum(x, discount):
18 |     """
19 |     magic from rllab for computing discounted cumulative sums of vectors.
20 | 
21 |     input: 
22 |         vector x, 
23 |         [x0, 
24 |          x1, 
25 |          x2]
26 | 
27 |     output:
28 |         [x0 + discount * x1 + discount^2 * x2,  
29 |          x1 + discount * x2,
30 |          x2]
31 |     """
32 |     return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
33 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/baseline/safe_rl/sac/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/load_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import joblib
 4 | import os
 5 | import os.path as osp
 6 | import tensorflow as tf
 7 | from safe_rl.utils.logx import restore_tf_graph
 8 | 
 9 | def load_policy(fpath, itr='last', deterministic=False):
10 | 
11 |     # handle which epoch to load from
12 |     if itr=='last':
13 |         saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11]
14 |         itr = '%d'%max(saves) if len(saves) > 0 else ''
15 |     else:
16 |         itr = '%d'%itr
17 | 
18 |     # load the things!
19 |     sess = tf.Session(graph=tf.Graph())
20 |     model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr))
21 | 
22 |     # get the correct op for executing actions
23 |     if deterministic and 'mu' in model.keys():
24 |         # 'deterministic' is only a valid option for SAC policies
25 |         print('Using deterministic action op.')
26 |         action_op = model['mu']
27 |     else:
28 |         print('Using default action op.')
29 |         action_op = model['pi']
30 | 
31 |     # make function for producing an action given a single state
32 |     get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0]
33 | 
34 |     # try to load environment from save
35 |     # (sometimes this will fail because the environment could not be pickled)
36 |     try:
37 |         state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl'))
38 |         env = state['env']
39 |     except:
40 |         env = None
41 | 
42 |     return env, get_action, sess


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/readme.md:
--------------------------------------------------------------------------------
1 | # Utils
2 | 
3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo).


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/run_utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os.path as osp
 3 | 
 4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data')
 5 | 
 6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True):
 7 | 
 8 |     # Make base path
 9 |     ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ''
10 |     relpath = ''.join([ymd_time, exp_name])
11 |     
12 |     if seed is not None:
13 |         # Make a seed-specific subfolder in the experiment directory.
14 |         if datestamp:
15 |             hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
16 |             subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)])
17 |         else:
18 |             subfolder = ''.join([exp_name, '_s', str(seed)])
19 |         relpath = osp.join(relpath, subfolder)
20 | 
21 |     data_dir = data_dir or DEFAULT_DATA_DIR
22 |     logger_kwargs = dict(output_dir=osp.join(data_dir, relpath), 
23 |                          exp_name=exp_name)
24 |     return logger_kwargs


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/safe_rl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | def convert_json(obj):
 4 |     """ Convert obj to a version which can be serialized with JSON. """
 5 |     if is_json_serializable(obj):
 6 |         return obj
 7 |     else:
 8 |         if isinstance(obj, dict):
 9 |             return {convert_json(k): convert_json(v) 
10 |                     for k,v in obj.items()}
11 | 
12 |         elif isinstance(obj, tuple):
13 |             return (convert_json(x) for x in obj)
14 | 
15 |         elif isinstance(obj, list):
16 |             return [convert_json(x) for x in obj]
17 | 
18 |         elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 |             return convert_json(obj.__name__)
20 | 
21 |         elif hasattr(obj,'__dict__') and obj.__dict__:
22 |             obj_dict = {convert_json(k): convert_json(v) 
23 |                         for k,v in obj.__dict__.items()}
24 |             return {str(obj): obj_dict}
25 | 
26 |         return str(obj)
27 | 
28 | def is_json_serializable(v):
29 |     try:
30 |         json.dumps(v)
31 |         return True
32 |     except:
33 |         return False


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/baseline/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | import sys
 5 | 
 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
 7 |     "Safety Starter Agents is designed to work with Python 3.6 and greater. " \
 8 |     + "Please install it before proceeding."
 9 | 
10 | setup(
11 |     name='safe_rl',
12 |     packages=['safe_rl'],
13 |     install_requires=[
14 |         'gym~=0.15.3',
15 |         'joblib==0.14.0',
16 |         'matplotlib==3.1.1',
17 |         'mpi4py==3.0.2',
18 |         'mujoco_py==2.0.2.7',
19 |         'numpy~=1.17.4',
20 |         'seaborn==0.8.1',
21 |         'tensorflow==1.15.4',
22 |     ],
23 | )
24 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg1/ensemble-cem/ensemble-cem_s10/config.yml:
--------------------------------------------------------------------------------
 1 | arguments:
 2 |     config: ./data/config.yml
 3 |     correct: 0
 4 |     dir: data/cg1/
 5 |     ensemble: 5
 6 |     episode: 10
 7 |     epoch: 70
 8 |     level: 1
 9 |     load: null
10 |     name: ensemble-cem
11 |     obs_stack: false
12 |     optimizer: cem
13 |     render: false
14 |     robot: car
15 |     save: false
16 |     seed: 10
17 |     task: goal
18 |     test: false
19 | cost_config:
20 |     batch: 2000
21 |     load: false
22 |     load_folder: null
23 |     max_ratio: 3
24 |     model_param:
25 |         boosting_type: gbdt
26 |         learning_rate: 0.3
27 |         max_depth: 8
28 |         n_estimators: 400
29 |         n_jobs: 1
30 |         num_leaves: 12
31 |     safe_buffer_size: 50000
32 |     save: false
33 |     save_folder: null
34 |     unsafe_buffer_size: 10000
35 | dynamic_config:
36 |     activation: relu
37 |     batch_size: 256
38 |     buffer_size: 500000
39 |     data_split: 0.8
40 |     hidden_sizes:
41 |     - 1024
42 |     - 1024
43 |     - 1024
44 |     learning_rate: 0.001
45 |     load: false
46 |     load_folder: null
47 |     n_epochs: 70
48 |     save: false
49 |     save_folder: null
50 |     test_freq: 5
51 |     test_ratio: 0.15
52 | exp_name: ensemble-cem
53 | mpc_config:
54 |     CCE:
55 |         alpha: 0.1
56 |         epsilon: 0.01
57 |         init_mean: 0
58 |         init_var: 1
59 |         max_iters: 8
60 |         minimal_elites: 5
61 |         num_elites: 12
62 |         popsize: 500
63 |     CEM:
64 |         alpha: 0.1
65 |         epsilon: 0.01
66 |         init_mean: 0
67 |         init_var: 1
68 |         max_iters: 8
69 |         num_elites: 12
70 |         popsize: 500
71 |     RANDOM:
72 |         popsize: 5000
73 |     gamma: 0.98
74 |     horizon: 8
75 |     optimizer: CEM
76 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg1/weights/config.yml:
--------------------------------------------------------------------------------
 1 | arguments:
 2 |     config: ./config.yml
 3 |     dir: data/cg1
 4 |     ensemble: 0
 5 |     episode: 10
 6 |     epoch: 80
 7 |     level: 1
 8 |     load: null
 9 |     name: rce
10 |     optimizer: rce
11 |     render: false
12 |     robot: car
13 |     save: true
14 |     seed: 1
15 |     test: false
16 | cost_config:
17 |     batch: 2000
18 |     load: false
19 |     load_folder: null
20 |     max_ratio: 3
21 |     model_param:
22 |         boosting_type: gbdt
23 |         learning_rate: 0.3
24 |         max_depth: 8
25 |         n_estimators: 400
26 |         n_jobs: 1
27 |         num_leaves: 12
28 |     safe_buffer_size: 50000
29 |     save: true
30 |     save_folder: data/cg1/rce/rce_s1
31 |     unsafe_buffer_size: 10000
32 | dynamic_config:
33 |     activation: relu
34 |     batch_size: 256
35 |     buffer_size: 500000
36 |     data_split: 0.8
37 |     hidden_sizes:
38 |     - 1024
39 |     - 1024
40 |     - 1024
41 |     learning_rate: 0.001
42 |     load: false
43 |     load_folder: null
44 |     n_ensembles: 4
45 |     n_epochs: 70
46 |     save: true
47 |     save_folder: data/cg1/rce/rce_s1
48 |     test_freq: 5
49 |     test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 |     CEM:
53 |         alpha: 0.1
54 |         epsilon: 0.01
55 |         init_mean: 0
56 |         init_var: 1
57 |         max_iters: 8
58 |         num_elites: 12
59 |         popsize: 500
60 |     RANDOM:
61 |         popsize: 5000
62 |     RCE:
63 |         alpha: 0.1
64 |         epsilon: 0.01
65 |         init_mean: 0
66 |         init_var: 1
67 |         max_iters: 8
68 |         minimal_elites: 5
69 |         num_elites: 12
70 |         popsize: 500
71 |     gamma: 0.98
72 |     horizon: 8
73 |     optimizer: RCE
74 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/cg2/weights/config.yml:
--------------------------------------------------------------------------------
 1 | arguments:
 2 |     config: ./config.yml
 3 |     dir: data/cg2
 4 |     ensemble: 0
 5 |     episode: 10
 6 |     epoch: 80
 7 |     level: 2
 8 |     load: null
 9 |     name: rce
10 |     optimizer: rce
11 |     render: false
12 |     robot: car
13 |     save: true
14 |     seed: 1
15 |     test: false
16 | cost_config:
17 |     batch: 2000
18 |     load: false
19 |     load_folder: null
20 |     max_ratio: 3
21 |     model_param:
22 |         boosting_type: gbdt
23 |         learning_rate: 0.3
24 |         max_depth: 8
25 |         n_estimators: 400
26 |         n_jobs: 1
27 |         num_leaves: 12
28 |     safe_buffer_size: 50000
29 |     save: true
30 |     save_folder: data/cg2/rce/rce_s1
31 |     unsafe_buffer_size: 10000
32 | dynamic_config:
33 |     activation: relu
34 |     batch_size: 256
35 |     buffer_size: 500000
36 |     data_split: 0.8
37 |     hidden_sizes:
38 |     - 1024
39 |     - 1024
40 |     - 1024
41 |     learning_rate: 0.001
42 |     load: false
43 |     load_folder: null
44 |     n_ensembles: 4
45 |     n_epochs: 70
46 |     save: true
47 |     save_folder: data/cg2/rce/rce_s1
48 |     test_freq: 5
49 |     test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 |     CEM:
53 |         alpha: 0.1
54 |         epsilon: 0.01
55 |         init_mean: 0
56 |         init_var: 1
57 |         max_iters: 8
58 |         num_elites: 12
59 |         popsize: 500
60 |     RANDOM:
61 |         popsize: 5000
62 |     RCE:
63 |         alpha: 0.1
64 |         epsilon: 0.01
65 |         init_mean: 0
66 |         init_var: 1
67 |         max_iters: 8
68 |         minimal_elites: 5
69 |         num_elites: 12
70 |         popsize: 500
71 |     gamma: 0.98
72 |     horizon: 8
73 |     optimizer: RCE
74 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/TestFigure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/TestFigure3.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg1-Cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Cost.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg1-Reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg1-Reward.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg2-Cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Cost.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/figures/pg2-Reward.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/data/figures/pg2-Reward.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/pg1/weights/config.yml:
--------------------------------------------------------------------------------
 1 | arguments:
 2 |     config: ./config.yml
 3 |     dir: data/pg1
 4 |     ensemble: 0
 5 |     episode: 10
 6 |     epoch: 80
 7 |     level: 1
 8 |     load: null
 9 |     name: rce
10 |     optimizer: rce
11 |     render: false
12 |     robot: point
13 |     save: true
14 |     seed: 1
15 |     test: false
16 | cost_config:
17 |     batch: 2000
18 |     load: false
19 |     load_folder: null
20 |     max_ratio: 3
21 |     model_param:
22 |         boosting_type: gbdt
23 |         learning_rate: 0.3
24 |         max_depth: 8
25 |         n_estimators: 400
26 |         n_jobs: 1
27 |         num_leaves: 12
28 |     safe_buffer_size: 50000
29 |     save: true
30 |     save_folder: data/pg1/rce/rce_s1
31 |     unsafe_buffer_size: 10000
32 | dynamic_config:
33 |     activation: relu
34 |     batch_size: 256
35 |     buffer_size: 500000
36 |     data_split: 0.8
37 |     hidden_sizes:
38 |     - 1024
39 |     - 1024
40 |     - 1024
41 |     learning_rate: 0.001
42 |     load: false
43 |     load_folder: null
44 |     n_ensembles: 4
45 |     n_epochs: 70
46 |     save: true
47 |     save_folder: data/pg1/rce/rce_s1
48 |     test_freq: 5
49 |     test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 |     CEM:
53 |         alpha: 0.1
54 |         epsilon: 0.01
55 |         init_mean: 0
56 |         init_var: 1
57 |         max_iters: 8
58 |         num_elites: 12
59 |         popsize: 500
60 |     RANDOM:
61 |         popsize: 5000
62 |     RCE:
63 |         alpha: 0.1
64 |         epsilon: 0.01
65 |         init_mean: 0
66 |         init_var: 1
67 |         max_iters: 8
68 |         minimal_elites: 5
69 |         num_elites: 12
70 |         popsize: 500
71 |     gamma: 0.98
72 |     horizon: 8
73 |     optimizer: RCE
74 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/data/pg2/weights/config.yml:
--------------------------------------------------------------------------------
 1 | arguments:
 2 |     config: ./config.yml
 3 |     dir: data/pg2
 4 |     ensemble: 0
 5 |     episode: 10
 6 |     epoch: 80
 7 |     level: 2
 8 |     load: null
 9 |     name: rce
10 |     optimizer: rce
11 |     render: false
12 |     robot: point
13 |     save: true
14 |     seed: 1
15 |     test: false
16 | cost_config:
17 |     batch: 2000
18 |     load: false
19 |     load_folder: null
20 |     max_ratio: 3
21 |     model_param:
22 |         boosting_type: gbdt
23 |         learning_rate: 0.3
24 |         max_depth: 8
25 |         n_estimators: 400
26 |         n_jobs: 1
27 |         num_leaves: 12
28 |     safe_buffer_size: 50000
29 |     save: true
30 |     save_folder: data/pg2/rce/rce_s1
31 |     unsafe_buffer_size: 10000
32 | dynamic_config:
33 |     activation: relu
34 |     batch_size: 256
35 |     buffer_size: 500000
36 |     data_split: 0.8
37 |     hidden_sizes:
38 |     - 1024
39 |     - 1024
40 |     - 1024
41 |     learning_rate: 0.001
42 |     load: false
43 |     load_folder: null
44 |     n_ensembles: 4
45 |     n_epochs: 70
46 |     save: true
47 |     save_folder: data/pg2/rce/rce_s1
48 |     test_freq: 5
49 |     test_ratio: 0.15
50 | exp_name: rce
51 | mpc_config:
52 |     CEM:
53 |         alpha: 0.1
54 |         epsilon: 0.01
55 |         init_mean: 0
56 |         init_var: 1
57 |         max_iters: 8
58 |         num_elites: 12
59 |         popsize: 500
60 |     RANDOM:
61 |         popsize: 5000
62 |     RCE:
63 |         alpha: 0.1
64 |         epsilon: 0.01
65 |         init_mean: 0
66 |         init_var: 1
67 |         max_iters: 8
68 |         minimal_elites: 5
69 |         num_elites: 12
70 |         popsize: 500
71 |     gamma: 0.98
72 |     horizon: 8
73 |     optimizer: RCE
74 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/build/lib/safety_gym/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/build/lib/safety_gym/random_agent.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import gym
 5 | import safety_gym  # noqa
 6 | import numpy as np  # noqa
 7 | 
 8 | def run_random(env_name):
 9 |     env = gym.make(env_name)
10 |     obs = env.reset()
11 |     done = False
12 |     ep_ret = 0
13 |     ep_cost = 0
14 |     while True:
15 |         if done:
16 |             print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost))
17 |             ep_ret, ep_cost = 0, 0
18 |             obs = env.reset()
19 |         assert env.observation_space.contains(obs)
20 |         act = env.action_space.sample()
21 |         assert env.action_space.contains(act)
22 |         obs, reward, done, info = env.step(act)
23 |         print(obs['magnetometer'], obs['gyro'])
24 |         # print('reward', reward)
25 |         ep_ret += reward
26 |         ep_cost += info.get('cost', 0)
27 |         env.render()
28 | 
29 | 
30 | if __name__ == '__main__':
31 | 
32 |     parser = argparse.ArgumentParser()
33 |     parser.add_argument('--env', default='Safexp-CarGoal1-v0')
34 |     args = parser.parse_args()
35 |     run_random(args.env)
36 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/dist/safety_gym-0.0.0-py3.6.egg


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/env/safety_gym.png


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/envs/__init__.py:
--------------------------------------------------------------------------------
1 | import safety_gym.envs.suite


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/envs/mujoco.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | # This file is just to get around a baselines import hack.
 5 | # env_type is set based on the final part of the entry_point module name.
 6 | # In the regular gym mujoco envs this is 'mujoco'.
 7 | # We want baselines to treat these as mujoco envs, so we redirect from here,
 8 | # and ensure the registry entries are pointing at this file as well.
 9 | from safety_gym.envs.engine import *  # noqa
10 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/random_agent.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import argparse
 4 | import gym
 5 | import safety_gym  # noqa
 6 | import numpy as np  # noqa
 7 | 
 8 | def run_random(env_name):
 9 |     env = gym.make(env_name)
10 |     obs = env.reset()
11 |     done = False
12 |     ep_ret = 0
13 |     ep_cost = 0
14 |     while True:
15 |         if done:
16 |             print('Episode Return: %.3f \t Episode Cost: %.3f'%(ep_ret, ep_cost))
17 |             ep_ret, ep_cost = 0, 0
18 |             obs = env.reset()
19 |         assert env.observation_space.contains(obs)
20 |         act = env.action_space.sample()
21 |         assert env.action_space.contains(act)
22 |         obs, reward, done, info = env.step(act)
23 |         print(obs['magnetometer'], obs['gyro'])
24 |         # print('reward', reward)
25 |         ep_ret += reward
26 |         ep_cost += info.get('cost', 0)
27 |         env.render()
28 | 
29 | 
30 | if __name__ == '__main__':
31 | 
32 |     parser = argparse.ArgumentParser()
33 |     parser.add_argument('--env', default='Safexp-CarGoal1-v0')
34 |     args = parser.parse_args()
35 |     run_random(args.env)
36 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/test/test_envs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | import gym
 5 | import safety_gym.envs  # noqa
 6 | 
 7 | 
 8 | class TestEnvs(unittest.TestCase):
 9 |     def check_env(self, env_name):
10 |         ''' Run a single environment for a single episode '''
11 |         print('running', env_name)
12 |         env = gym.make(env_name)
13 |         env.reset()
14 |         done = False
15 |         while not done:
16 |             _, _, done, _ = env.step(env.action_space.sample())
17 | 
18 |     def test_envs(self):
19 |         ''' Run all the bench envs '''
20 |         for env_spec in gym.envs.registry.all():
21 |             if 'Safexp' in env_spec.id:
22 |                 self.check_env(env_spec.id)
23 | 
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/test/test_goal.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | import numpy as np
 5 | 
 6 | from safety_gym.envs.engine import Engine, ResamplingError
 7 | 
 8 | 
 9 | class TestGoal(unittest.TestCase):
10 |     def rollout_env(self, env):
11 |         ''' roll an environment until it is done '''
12 |         done = False
13 |         while not done:
14 |             _, _, done, _ = env.step([1,0])
15 | 
16 |     def test_resample(self):
17 |         ''' Episode should end with resampling failure '''
18 |         config = {
19 |             'robot_base': 'xmls/point.xml',
20 |             'num_steps': 1001,
21 |             'placements_extents': [-1, -1, 1, 1],
22 |             'goal_size': 1.414,
23 |             'goal_keepout': 1.414,
24 |             'goal_locations': [(1, 1)],
25 |             'robot_keepout': 1.414,
26 |             'robot_locations': [(-1, -1)],
27 |             'robot_rot': np.sin(np.pi / 4),
28 |             'terminate_resample_failure': True,
29 |             '_seed': 0,
30 |         }
31 |         env = Engine(config)
32 |         env.reset()
33 |         self.assertEqual(env.steps, 0)
34 |         # Move the robot towards the goal
35 |         self.rollout_env(env)
36 |         # Check that the environment terminated early
37 |         self.assertLess(env.steps, 1000)
38 | 
39 |         # Try again with the raise
40 |         config['terminate_resample_failure'] = False
41 |         env = Engine(config)
42 |         env.reset()
43 |         # Move the robot towards the goal, which should cause resampling failure
44 |         with self.assertRaises(ResamplingError):
45 |             self.rollout_env(env)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     unittest.main()
50 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/safety_gym/xmls/README.md:
--------------------------------------------------------------------------------
 1 | # xmls
 2 | 
 3 | These are mujoco XML files which are used as bases for the simulations.
 4 | 
 5 | Some design goals for them:
 6 | 
 7 | - XML should be complete and simulate-able as-is
 8 |     - Include a floor geom which is a plane
 9 |     - Include joint sensor for the robot which provide observation
10 |     - Include actuators which provide control
11 | - Default positions should all be neutral
12 |     - position 0,0,0 should be resting on the floor, not intersecting it
13 |     - robot should start at the origin
14 | - Scene should be clear of other objects
15 |     - no obstacles or things to manipulate
16 |     - only the robot in the scene
17 | 
18 | Requirements for the robot
19 | - Position joints should be separate and named `x`, `y`, and `z`
20 | - 0, 0, 0 position should be resting on the floor above the origin at a neutral position
21 | - First 6 sensors should be (in order):
22 |     - joint positions for x, y, z (absolute position in the scene)
23 |     - joint velocities for x, y, z (absolute velocity in the scene)
24 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/env/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | import sys
 5 | 
 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
 7 |     "Safety Gym is designed to work with Python 3.6 and greater. " \
 8 |     + "Please install it before proceeding."
 9 | 
10 | setup(
11 |     name='safety_gym',
12 |     packages=['safety_gym'],
13 |     install_requires=[
14 |         'gym~=0.15.3',
15 |         'joblib~=0.14.0',
16 |         'mujoco_py==2.0.2.7',
17 |         'numpy~=1.17.4',
18 |         'xmltodict~=0.12.0',
19 |     ],
20 | )
21 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/.gitignore:
--------------------------------------------------------------------------------
 1 | *.*~
 2 | __pycache__/
 3 | *.pkl
 4 | data/
 5 | **/*.egg-info
 6 | .python-version
 7 | .idea/
 8 | .vscode/
 9 | .DS_Store
10 | _build/
11 | data/
12 | .ipynb_checkpoints/
13 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @Author: Zuxin Liu
 3 | @Email: zuxinl@andrew.cmu.edu
 4 | @Date:   2020-03-24 10:59:16
 5 | @LastEditTime: 2020-05-26 00:19:29
 6 | @Description:
 7 | '''
 8 | 
 9 | from mbrl.controllers import MPC as MPC
10 | from mbrl.controllers import SafeMPC
11 | from mbrl.models.model import RegressionModel
12 | from mbrl.models.ensemble import RegressionModelEnsemble
13 | from mbrl.models.constraint_model import CostModel
14 | 
15 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from .mpc_controller import MPC
2 | from .safe_mpc_controller import SafeMPC
3 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/mbrl/models/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem import CEMOptimizer
2 | from .random import RandomOptimizer
3 | from .rce import RCEOptimizer


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/mbrl/optimizers/optimizer.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @Author: Zuxin Liu
 3 | @Email: zuxinl@andrew.cmu.edu
 4 | @Date:   2020-03-24 01:02:01
 5 | @LastEditTime: 2020-03-24 10:49:27
 6 | @Description:
 7 | '''
 8 | 
 9 | from __future__ import absolute_import
10 | from __future__ import print_function
11 | from __future__ import division
12 | 
13 | 
14 | class Optimizer:
15 |     def __init__(self, *args, **kwargs):
16 |         pass
17 | 
18 |     def setup(self, cost_function):
19 |         raise NotImplementedError("Must be implemented in subclass.")
20 | 
21 |     def reset(self):
22 |         raise NotImplementedError("Must be implemented in subclass.")
23 | 
24 |     def obtain_solution(self, *args, **kwargs):
25 |         raise NotImplementedError("Must be implemented in subclass.")
26 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg1_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_random.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg1_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg1_rce.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg2_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_random.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/cg2_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/cg2_rce.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_random.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_rce.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_trpo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpo.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg1_trpol.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg1_trpol.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_random.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_random.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_rce.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_rce.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_trpo_10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpo_10.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/media/pg2_trpol_10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe-mbrl/media/pg2_trpol_10.gif


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/requirements.txt:
--------------------------------------------------------------------------------
1 | joblib==0.14.1
2 | matplotlib==3.1.3
3 | mpi4py==3.0.3
4 | psutil==5.7.2
5 | PyYAML==5.4
6 | tqdm==4.48.0
7 | seaborn==0.8.1
8 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | @Author: Zuxin Liu
3 | @Email: zuxinl@andrew.cmu.edu
4 | @Date:   2020-05-23 16:02:07
5 | @LastEditTime: 2020-05-23 16:02:44
6 | @Description:
7 | '''
8 | 
9 | 


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/mpi_pytorch.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import numpy as np
 3 | import os
 4 | import torch
 5 | from mpi4py import MPI
 6 | from utils.mpi_tools import broadcast, mpi_avg, num_procs, proc_id
 7 | 
 8 | def setup_pytorch_for_mpi():
 9 |     """
10 |     Avoid slowdowns caused by each separate process's PyTorch using
11 |     more than its fair share of CPU resources.
12 |     """
13 |     #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
14 |     if torch.get_num_threads()==1:
15 |         return
16 |     fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1)
17 |     torch.set_num_threads(fair_num_threads)
18 |     #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
19 | 
20 | def mpi_avg_grads(module):
21 |     """ Average contents of gradient buffers across MPI processes. """
22 |     if num_procs()==1:
23 |         return
24 |     for p in module.parameters():
25 |         p_grad_numpy = p.grad.numpy()   # numpy view of tensor data
26 |         avg_p_grad = mpi_avg(p.grad)
27 |         p_grad_numpy[:] = avg_p_grad[:]
28 | 
29 | def sync_params(module):
30 |     """ Sync all parameters of module across all MPI processes. """
31 |     if num_procs()==1:
32 |         return
33 |     for p in module.parameters():
34 |         p_numpy = p.data.numpy()
35 |         broadcast(p_numpy)


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/run_entrypoint.py:
--------------------------------------------------------------------------------
 1 | import zlib
 2 | import pickle
 3 | import base64
 4 | 
 5 | if __name__ == '__main__':
 6 |     import argparse
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument('encoded_thunk')
 9 |     args = parser.parse_args()
10 |     thunk = pickle.loads(zlib.decompress(base64.b64decode(args.encoded_thunk)))
11 |     thunk()


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | def convert_json(obj):
 4 |     """ Convert obj to a version which can be serialized with JSON. """
 5 |     if is_json_serializable(obj):
 6 |         return obj
 7 |     else:
 8 |         if isinstance(obj, dict):
 9 |             return {convert_json(k): convert_json(v) 
10 |                     for k,v in obj.items()}
11 | 
12 |         elif isinstance(obj, tuple):
13 |             return (convert_json(x) for x in obj)
14 | 
15 |         elif isinstance(obj, list):
16 |             return [convert_json(x) for x in obj]
17 | 
18 |         elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 |             return convert_json(obj.__name__)
20 | 
21 |         elif hasattr(obj,'__dict__') and obj.__dict__:
22 |             obj_dict = {convert_json(k): convert_json(v) 
23 |                         for k,v in obj.__dict__.items()}
24 |             return {str(obj): obj_dict}
25 | 
26 |         return str(obj)
27 | 
28 | def is_json_serializable(v):
29 |     try:
30 |         json.dumps(v)
31 |         return True
32 |     except:
33 |         return False


--------------------------------------------------------------------------------
/Safe-RL/safe-mbrl/utils/user_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | 
 4 | # Default neural network backend for each algo
 5 | # (Must be either 'tf1' or 'pytorch')
 6 | DEFAULT_BACKEND = {
 7 |     'vpg': 'pytorch',
 8 |     'trpo': 'tf1',
 9 |     'ppo': 'pytorch',
10 |     'ddpg': 'pytorch',
11 |     'td3': 'pytorch',
12 |     'sac': 'pytorch'
13 | }
14 | 
15 | # Where experiment outputs are saved by default:
16 | #DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data')
17 | DEFAULT_DATA_DIR = osp.join(osp.abspath('./'),'data')
18 | 
19 | # Whether to automatically insert a date and time stamp into the names of
20 | # save directories:
21 | FORCE_DATESTAMP = False
22 | 
23 | # Whether GridSearch provides automatically-generated default shorthands:
24 | DEFAULT_SHORTHAND = True
25 | 
26 | # Tells the GridSearch how many seconds to pause for before launching 
27 | # experiments.
28 | WAIT_BEFORE_LAUNCH = 5


--------------------------------------------------------------------------------
/Safe-RL/safeRL/.gitignore:
--------------------------------------------------------------------------------
 1 | safe_recovery/output
 2 | safe_recovery/logging
 3 | safe_recovery/saved_models
 4 | safe_recovery/old_policies
 5 | 
 6 | 
 7 | 
 8 | 
 9 | *.o
10 | *.pyc
11 | 
12 | # Compiled source #
13 | ###################
14 | *.com
15 | *.class
16 | *.dll
17 | *.exe
18 | *.o
19 | *.so
20 | 
21 | # Packages #
22 | ############
23 | # it's better to unpack these files and commit the raw source
24 | # git has its own built in compression methods
25 | *.7z
26 | *.dmg
27 | *.gz
28 | *.iso
29 | *.jar
30 | *.rar
31 | *.tar
32 | *.zip


--------------------------------------------------------------------------------
/Safe-RL/safeRL/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "MADRaS"]
2 | 	path = MADRaS
3 | 	url = https://github.com/madras-simulator/MADRaS
4 | [submodule "safe-grid-gym"]
5 | 	path = safe-grid-gym
6 | 	url = https://github.com/david-lindner/safe-grid-gym
7 | 	branch = safe_recovery
8 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [2019] [Harshit Sikchi]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/README.md~:
--------------------------------------------------------------------------------
 1 | # HCOPE
 2 | High-Confidence Off-Policy Evaluation.
 3 |    
 4 | 
 5 | Python Implementation of HCOPE lower bound evaluation as given in the paper:
 6 | Thomas, Philip S., Georgios Theocharous, and Mohammad Ghavamzadeh. "High-Confidence Off-Policy Evaluation." AAAI. 2015.
 7 | 
 8 | 
 9 | ## Importance Sampling
10 | 
11 | Implementation of:    
12 | * Simple Importance Sampling   
13 | * Per-Decision Importance Sampling    
14 | * Normalized Per-Decision Importance Sampling (NPDIS) Estimator    
15 | * Weighted Importance Sampling (WIS) Estimator   
16 | * Weighted Per-Decision Importance Sampling (WPDIS) Estimator    
17 | * Consistent Weighted Per-Decision Importance Sampling (CWPDIS) Estimator   
18 |     
19 | Comparision of different importance sampling estimators:
20 | ![Different Importance sampling estimators](http://url/to/img.png)
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/citation.cff:
--------------------------------------------------------------------------------
 1 | # YAML 1.2
 2 | ---
 3 | authors: 
 4 |   -
 5 |     affiliation: "University of Texas at Austin"
 6 |     family-names: Sikchi
 7 |     given-names: Harshit
 8 | cff-version: "1.1.0"
 9 | license: MIT
10 | message: "If you use this software, please cite it using these metadata."
11 | repository-code: "https://github.com/hari-sikchi/safeRL"
12 | title: safeRL
13 | ...
14 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/importance_sampling/importance_sampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/importance_sampling/importance_sampling.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_+_0.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_+_0.1.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_minus_0.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_minus_0.1.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_dist_random.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_dist_random.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/IS_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/IS_variance.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/Result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Result.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/Theorem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/Theorem.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safe_actions.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions.gif


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safe_actions_instability.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safe_actions_instability.gif


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_layer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_layer.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_optimization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_optimization.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/results/safety_signal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safeRL/results/safety_signal.png


--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/optimizers.py:
--------------------------------------------------------------------------------
 1 | # Code in this file is copied and adapted from
 2 | # https://github.com/openai/evolution-strategies-starter.
 3 | 
 4 | from __future__ import absolute_import
 5 | from __future__ import division
 6 | from __future__ import print_function
 7 | 
 8 | import numpy as np
 9 | 
10 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES
11 | class Optimizer(object):
12 |     def __init__(self, w_policy):
13 |         self.w_policy = w_policy.flatten()
14 |         self.dim = w_policy.size
15 |         self.t = 0
16 | 
17 |     def update(self, globalg):
18 |         self.t += 1
19 |         step = self._compute_step(globalg)
20 |         ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5)
21 |         return self.w_policy + step, ratio
22 | 
23 |     def _compute_step(self, globalg):
24 |         raise NotImplementedError
25 | 
26 | 
27 | class SGD(Optimizer):
28 |     def __init__(self, pi, stepsize):
29 |         Optimizer.__init__(self, pi)
30 |         self.stepsize = stepsize
31 | 
32 |     def _compute_step(self, globalg):
33 |         step = -self.stepsize * globalg
34 |         return step
35 | 
36 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/shared_noise.py:
--------------------------------------------------------------------------------
 1 | # Code in this file is copied and adapted from
 2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es
 3 | 
 4 | import ray
 5 | import numpy as np
 6 | 
 7 | @ray.remote
 8 | def create_shared_noise():
 9 |     """
10 |     Create a large array of noise to be shared by all workers. Used 
11 |     for avoiding the communication of the random perturbations delta.
12 |     """
13 | 
14 |     seed = 12345
15 |     count = 250000000
16 |     noise = np.random.RandomState(seed).randn(count).astype(np.float64)
17 |     return noise
18 | 
19 | 
20 | class SharedNoiseTable(object):
21 |     def __init__(self, noise, seed = 11):
22 | 
23 |         self.rg = np.random.RandomState(seed)
24 |         self.noise = noise
25 |         assert self.noise.dtype == np.float64
26 | 
27 |     def get(self, i, dim):
28 |         return self.noise[i:i + dim]
29 | 
30 |     def get_mod(self, i, dim,ratio):
31 |         return ratio*self.noise[i:i + dim]
32 | 
33 | 
34 |     def sample_index(self, dim):
35 |         return self.rg.randint(0, len(self.noise) - dim + 1)
36 | 
37 |     def get_delta(self, dim):
38 |         idx = self.sample_index(dim)
39 |         return idx, self.get(idx, dim)
40 | 
41 | 
42 |     def get_delta_mod(self, dim,ratio):
43 |         idx = self.sample_index(dim)
44 |         return idx, ratio*self.get(idx, dim)
45 | 
46 | 


--------------------------------------------------------------------------------
/Safe-RL/safeRL/safe_exploration/utils.py:
--------------------------------------------------------------------------------
 1 | # Code in this file is copied and adapted from
 2 | # https://github.com/openai/evolution-strategies-starter.
 3 | 
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | def itergroups(items, group_size):
 8 |     assert group_size >= 1
 9 |     group = []
10 |     for x in items:
11 |         group.append(x)
12 |         if len(group) == group_size:
13 |             yield tuple(group)
14 |             del group[:]
15 |     if group:
16 |         yield tuple(group)
17 | 
18 | 
19 | 
20 | def batched_weighted_sum(weights, vecs, batch_size):
21 |     total = 0
22 |     num_items_summed = 0
23 |     for batch_weights, batch_vecs in zip(itergroups(weights, batch_size),
24 |                                          itergroups(vecs, batch_size)):
25 |         assert len(batch_weights) == len(batch_vecs) <= batch_size
26 |         total += np.dot(np.asarray(batch_weights, dtype=np.float64),
27 |                         np.asarray(batch_vecs, dtype=np.float64))
28 |         num_items_summed += len(batch_weights)
29 |     return total, num_items_summed
30 | 
31 | def plot_info(param_dict, logdir):
32 |     for key, value in param_dict.items():
33 |         x = value[0]
34 |         y = value[1]
35 |         x_name = value[2]
36 |         y_name = value[3]
37 |         print(x,y)
38 |         plt.plot(x, y)
39 |         plt.title(key)
40 |         plt.xlabel(x_name)
41 |         plt.ylabel(y_name)
42 |         plt.savefig((logdir + "/plot_"  +key + ".png"))
43 |         plt.clf()
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.dockerignore:
--------------------------------------------------------------------------------
 1 | examples
 2 | htmlcov
 3 | .travis.yml
 4 | .gitignore
 5 | .git
 6 | *.pyc
 7 | .ipynb_checkpoints
 8 | **/__pycache__
 9 | safe_learning.egg-info
10 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .idea
 3 | .ipynb_checkpoints
 4 | htmlcov
 5 | .coverage
 6 | .cache
 7 | safe_learning.egg-info
 8 | __pycache__
 9 | docs/safe_learning.*
10 | docs/_build
11 | *.swp
12 | *.DS_Store
13 | .pytest_cache
14 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | sudo: required
 4 | services:
 5 |   - docker
 6 | 
 7 | env:
 8 |   - PYTHON=python2
 9 |   - PYTHON=python3
10 | 
11 | # Setup anaconda
12 | install:
13 |   #  Disabled since docker pull does not affect cache
14 |   #  Fixed in Docker 1.13 with --cache-from
15 |   #  - docker pull befelix/lyapunov-learning-private:${PYTHON} || true
16 |   - docker build -f Dockerfile.${PYTHON} -t test-image .
17 |   - docker ps -a
18 | 
19 | # Run tests
20 | script:
21 |   - docker run test-image scripts/test_code.sh
22 | 
23 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda update conda --yes --quiet \
11 |   && conda install python=3.5 pip numpy scipy pandas --yes --quiet \
12 |   && conda clean --yes --all \
13 |   && hash -r
14 | 
15 | # Get the requirements files (seperate from the main body)
16 | COPY requirements.txt requirements_dev.txt /reqs/
17 | 
18 | # Install requirements and clean up
19 | RUN pip --no-cache-dir install -r /reqs/requirements.txt \
20 |   && pip --no-cache-dir install -r /reqs/requirements_dev.txt \
21 |   && pip install jupyter jupyterlab dumb-init \
22 |   && rm -rf /root/.cache \
23 |   && rm -rf /reqs
24 | 
25 | # Manually install GPflow and clean up
26 | RUN git clone --depth=1 --branch=0.4.0 https://github.com/GPflow/GPflow.git \
27 |   && cd GPflow \
28 |   && python setup.py install \
29 |   && rm -rf /GPflow
30 | 
31 | # Output scrubber for jupyter
32 | ADD scripts/jupyter_output.py /
33 | 
34 | RUN jupyter notebook --generate-config \
35 |   && cat /jupyter_output.py >> /root/.jupyter/jupyter_notebook_config.py \
36 |   && rm /jupyter_output.py
37 | 
38 | WORKDIR /code
39 | 
40 | # Make sure Ctrl+C commands can be forwarded
41 | ENTRYPOINT ["dumb-init", "--"]
42 | 
43 | CMD python setup.py develop \
44 |   && jupyter lab --ip="0.0.0.0" --no-browser --allow-root
45 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.python2:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda:4.5.11
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda install python=2.7 --yes --quiet \
11 |   && conda clean --yes --all \
12 |   && hash -r
13 | 
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 |   && pip install pip==18.1 \
18 |   && pip install numpy==1.14.5 \
19 |   && pip install -e .[test] --process-dependency-links \
20 |   && rm -rf /root/.cache
21 | 
22 | WORKDIR /code
23 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Dockerfile.python3:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3:4.5.11
 2 | 
 3 | # Install build essentials and clean up
 4 | RUN apt-get update --quiet \
 5 |   && apt-get install -y --no-install-recommends --quiet build-essential \
 6 |   && apt-get clean \
 7 |   && rm -rf /var/lib/apt/lists/*
 8 | 
 9 | # Update conda, install packages, and clean up
10 | RUN conda install python=3.5 --yes --quiet \
11 | #  && conda clean --yes --all \
12 |   && hash -r
13 | 
14 | # Copy the main code
15 | COPY . /code
16 | RUN cd /code \
17 |   && pip install pip==18.1 \
18 |   && pip install numpy==1.14.5 \
19 |   && pip install -e .[test] --process-dependency-links \
20 |   && rm -rf /root/.cache
21 | 
22 | WORKDIR /code
23 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Felix Berkenkamp
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help
 2 | 
 3 | help:
 4 | 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 5 | 
 6 | doc: ## Build documentation (docs/_build/html/index.html)
 7 | 	cd docs && $(MAKE) html
 8 | 
 9 | coverage: ## Construct coverage (htmlcov/index.html)
10 | 	coverage html
11 | 
12 | test-local: ## Test the local installation of the code
13 | 	./scripts/test_code.sh
14 | 
15 | test: docker ## Test the docker images
16 | 	docker run safe_learning_py2 make test-local
17 | 	docker run safe_learning_py3 make test-local
18 | 
19 | dev: ## Mount current code as volume and run jupyterlab for development
20 | 	docker build -f Dockerfile.dev -t safe_learning_dev .
21 | 	docker run -p 8888:8888 -v $(shell pwd):/code safe_learning_dev
22 | 
23 | docker: ## Build the docker images
24 | 	docker build -f Dockerfile.python2 -t safe_learning_py2 .
25 | 	docker build -f Dockerfile.python3 -t safe_learning_py3 .
26 | 
27 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = SafeLearning
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/_templates/template.rst:
--------------------------------------------------------------------------------
1 | {{ name }}
2 | {{ underline }}
3 | 
4 | .. currentmodule:: {{ module }}
5 | .. auto{{ objtype }}:: {{ objname }}   {% if objtype == "class" %}
6 |    :members:
7 |    :inherited-members:
8 |    {% endif %}
9 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/api.rst:
--------------------------------------------------------------------------------
1 | API Documentation
2 | *****************
3 | 
4 | .. automodule:: safe_learning
5 | 
6 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to the Safe Learning documentation!
 2 | ===========================================
 3 | 
 4 | .. include::  introduction.rst
 5 | 
 6 | .. toctree::
 7 |    :caption: Contents
 8 |    :maxdepth: 3
 9 | 
10 |    api
11 | 
12 | Indices and tables
13 | ==================
14 | 
15 | * :ref:`genindex`
16 | * :ref:`modindex`
17 | * :ref:`search`
18 | 
19 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/introduction.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 | 
4 | TODO


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=SafeLearning
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | numpydoc >= 0.6
3 | sphinx_rtd_theme >= 0.1.8
4 | mock
5 | 
6 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/examples/README.rst:
--------------------------------------------------------------------------------
 1 | Example notebooks for the library
 2 | =================================
 3 | 
 4 | Introductions
 5 | -------------
 6 | - `1d_region_of_attraction_estimate.ipynb <./1d_region_of_attraction_estimate.ipynb>`_ shows how to estimate and learn the region of attraction for a fixed policy.
 7 | - `basic_dynamic_programming.ipynb <./basic_dynamic_programming.ipynb>`_ does basic dynamic programming with piecewise linear function approximators for the mountain car example.
 8 | - `reinforcement_learning_pendulum.ipynb <./reinforcement_learning_pendulum.ipynb>`_ does approximate policy iteration in an actor-critic framework with neural networks for the inverted pendulum.
 9 | - `reinforcement_learning_cartpole.ipynb <./reinforcement_learning_cartpole.ipynb>`_ does the same as above for the cart-pole (i.e., the inverted pendulum on a cart).
10 | 
11 | Experiments
12 | -----------
13 | - `1d_example.ipynb <./1d_example.ipynb>`_ contains a 1D example including plots of the sets.
14 | - `inverted_pendulum.ipynb <./inverted_pendulum.ipynb>`_ contains a full neural network example with an inverted pendulum.
15 | - `adaptive_safety_verification.ipynb <./adaptive_safety_verification.ipynb>`_ investigates the benefits of an adaptive discretization in identifying safe sets for the inverted pendulum.
16 | - `lyapunov_function_learning.ipynb <./lyapunov_function_learning.ipynb>`_ demonstrates how a parameterized Lyapunov candidate for the inverted pendulum can be trained with the machine learning approach in [1]_.
17 | 
18 | .. [1] S. M. Richards, F. Berkenkamp, A. Krause,
19 |   `The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems <https://arxiv.org/abs/1808.00924>`_. Conference on Robot Learning (CoRL), 2018.
20 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.0,<1.15
2 | scipy>=1.0.0<=1.2.1
3 | gpflow==0.4.0
4 | matplotlib<=4.0.0
5 | scs==2.0.2
6 | cvxpy>=1,<=1.0.15
7 | tensorflow>=1.6.0,<=1.12.0
8 | future<=0.18.0
9 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | mock
2 | flake8>=3.0,<=3.5.0
3 | pytest==4.6.9
4 | pytest-cov==2.8.1
5 | pydocstyle>=2.0,<2.1
6 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/safe_learning/configuration.py:
--------------------------------------------------------------------------------
 1 | """General configuration class for dtypes."""
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | class Configuration(object):
 9 |     """Configuration class."""
10 | 
11 |     def __init__(self):
12 |         """Initialization."""
13 |         super(Configuration, self).__init__()
14 | 
15 |         # Dtype for computations
16 |         self.dtype = tf.float64
17 | 
18 |         # Batch size for stability verification
19 |         self.gp_batch_size = 10000
20 | 
21 |     @property
22 |     def np_dtype(self):
23 |         """Return the numpy dtype."""
24 |         return self.dtype.as_numpy_dtype
25 | 
26 |     def __repr__(self):
27 |         """Print the parameters."""
28 |         params = ['Configuration parameters:', '']
29 |         for param, value in self.__dict__.items():
30 |             params.append('{}: {}'.format(param, value.__repr__()))
31 | 
32 |         return '\n'.join(params)
33 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/scripts/jupyter_output.py:
--------------------------------------------------------------------------------
 1 | def scrub_output_pre_save(model, **kwargs):
 2 |     """scrub output before saving notebooks"""
 3 |     # only run on notebooks
 4 |     if model['type'] != 'notebook':
 5 |         return
 6 |     # only run on nbformat v4
 7 |     if model['content']['nbformat'] != 4:
 8 |         return
 9 | 
10 |     for cell in model['content']['cells']:
11 |         if cell['cell_type'] != 'code':
12 |             continue
13 |         cell['outputs'] = []
14 |         cell['execution_count'] = None
15 | 
16 | c.FileContentsManager.pre_save_hook = scrub_output_pre_save
17 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_learning/scripts/test_code.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | module="safe_learning"
 4 | 
 5 | get_script_dir () {
 6 |      SOURCE="${BASH_SOURCE[0]}"
 7 |      # While $SOURCE is a symlink, resolve it
 8 |      while [ -h "$SOURCE" ]; do
 9 |           DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
10 |           SOURCE="$( readlink "$SOURCE" )"
11 |           # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
12 |           [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
13 |      done
14 |      DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
15 |      echo "$DIR"
16 | }
17 | 
18 | # Change to script root
19 | cd $(get_script_dir)/..
20 | GREEN='\033[0;32m'
21 | NC='\033[0m'
22 | 
23 | # Run style tests
24 | echo -e "${GREEN}Running style tests.${NC}"
25 | flake8 $module --exclude test*.py,__init__.py --ignore=E402,E731,W503 --show-source || { exit 1; }
26 | 
27 | # Ignore import errors for __init__ and tests
28 | flake8 $module --filename=__init__.py,test*.py --ignore=F,E402,W503 --show-source || { exit 1; }
29 | 
30 | echo -e "${GREEN}Testing docstring conventions.${NC}"
31 | # Test docstring conventions
32 | pydocstyle $module --convention=numpy || { exit 1; }
33 | 
34 | # Run unit tests
35 | echo -e "${GREEN}Running unit tests.${NC}"
36 | pytest --doctest-modules --cov --cov-fail-under=80 $module || { exit 1; }
37 | 
38 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 
3 | __pycache__/
4 | .vscode/
5 | result/
6 | old/
7 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/GPSG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/GPSG.png


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Akifumi Wachi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safe_near_optimal_mdp/data/simple/random_settings.npz


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/simple_make_rand_settings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division, print_function, absolute_import
 2 | 
 3 | import GPy
 4 | import numpy as np
 5 | import arguments
 6 | 
 7 | from safemdp.grid_world import (draw_gp_sample, compute_S_hat0)
 8 | 
 9 | 
10 | args = arguments.safemdp_argparse()
11 | 
12 | # Define world
13 | world_shape = args.world_shape
14 | step_size = args.step_size
15 | 
16 | # Define GP for safety
17 | noise_safety = args.noise_safety
18 | safety_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.),
19 |                              variance=1., ARD=True)
20 | safety_lik = GPy.likelihoods.Gaussian(variance=noise_safety ** 2)
21 | safety_lik.constrain_bounded(1e-6, 10000.)
22 | 
23 | # Define GP for reward
24 | noise_reward = args.noise_reward
25 | reward_kernel = GPy.kern.RBF(input_dim=2, lengthscale=(2., 2.),
26 |                              variance=1., ARD=True)
27 | reward_lik = GPy.likelihoods.Gaussian(variance=noise_reward ** 2)
28 | reward_lik.constrain_bounded(1e-6, 10000.)
29 | 
30 | # Safety and Reward functions
31 | safety, _ = draw_gp_sample(safety_kernel, world_shape, step_size)
32 | reward, _ = draw_gp_sample(reward_kernel, world_shape, step_size)
33 | 
34 | # Set the minimum value for reward as zero
35 | reward -= min(reward)
36 | 
37 | # Safety threhsold, Lipschitz constant, scaling factors for confidence interval
38 | h = args.h
39 | 
40 | # Initialize safe sets
41 | S0 = np.zeros((np.prod(world_shape), 5), dtype=bool)
42 | S0[:, 0] = True
43 | S_hat0 = compute_S_hat0(np.nan, world_shape, 4, safety, step_size, h)
44 | start_pos = np.random.choice(np.where(S_hat0)[0])
45 | 
46 | # Save the problem settings as a npz file
47 | np.savez('data/simple/random_settings_new', safety=safety, reward=reward,
48 |          start_pos=start_pos)
49 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_near_optimal_mdp/utils/reward_utilities.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | __all__ = ['RewardObj']
 5 | 
 6 | 
 7 | class RewardObj(object):
 8 |     """Reward Object in MDPs.
 9 | 
10 |     Parameters
11 |     ----------
12 |     gp_r: GPy.core.GPRegression
13 |         A Gaussian process model that can be used to determine the reward.
14 |     beta_r: float
15 |         The confidence interval used by the GP model.
16 |     """
17 |     def __init__(self, gp_r, beta_r):
18 |         super(RewardObj, self).__init__()
19 | 
20 |         # Scalar for gp confidence intervals
21 |         self.beta = beta_r
22 |         # GP model
23 |         self.gp = gp_r
24 | 
25 |     def add_gp_observations(self, x_new, y_new):
26 |         """Add observations to the gp."""
27 |         # Update GP with observations
28 |         self.gp.set_XY(np.vstack((self.gp.X, x_new)),
29 |                        np.vstack((self.gp.Y, y_new)))
30 | 


--------------------------------------------------------------------------------
/Safe-RL/safe_rl_papers/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Chi Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/__init__.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.util import deprecation as deprecation
2 | deprecation._PRINT_DEPRECATION_WARNINGS = False
3 | 
4 | from safe_rl.pg.algos import ppo, ppo_lagrangian, trpo, trpo_lagrangian, cpo
5 | from safe_rl.sac.sac import sac


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/pg/trust_region.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from safe_rl.pg.utils import EPS
 4 | 
 5 | 
 6 | """
 7 | Tensorflow utilities for trust region optimization
 8 | """
 9 | 
10 | def flat_concat(xs):
11 |     return tf.concat([tf.reshape(x,(-1,)) for x in xs], axis=0)
12 | 
13 | def flat_grad(f, params):
14 |     return flat_concat(tf.gradients(xs=params, ys=f))
15 | 
16 | def hessian_vector_product(f, params):
17 |     # for H = grad**2 f, compute Hx
18 |     g = flat_grad(f, params)
19 |     x = tf.placeholder(tf.float32, shape=g.shape)
20 |     return x, flat_grad(tf.reduce_sum(g*x), params)
21 | 
22 | def assign_params_from_flat(x, params):
23 |     flat_size = lambda p : int(np.prod(p.shape.as_list())) # the 'int' is important for scalars
24 |     splits = tf.split(x, [flat_size(p) for p in params])
25 |     new_params = [tf.reshape(p_new, p.shape) for p, p_new in zip(params, splits)]
26 |     return tf.group([tf.assign(p, p_new) for p, p_new in zip(params, new_params)])
27 | 
28 | 
29 | """
30 | Conjugate gradient
31 | """
32 | 
33 | def cg(Ax, b, cg_iters=10):
34 |     x = np.zeros_like(b)
35 |     r = b.copy() # Note: should be 'b - Ax(x)', but for x=0, Ax(x)=0. Change if doing warm start.
36 |     p = r.copy()
37 |     r_dot_old = np.dot(r,r)
38 |     for _ in range(cg_iters):
39 |         z = Ax(p)
40 |         alpha = r_dot_old / (np.dot(p, z) + EPS)
41 |         x += alpha * p
42 |         r -= alpha * z
43 |         r_dot_new = np.dot(r,r)
44 |         p = r + (r_dot_new / r_dot_old) * p
45 |         r_dot_old = r_dot_new
46 |     return x


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/pg/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.signal
 3 | 
 4 | EPS = 1e-8
 5 | 
 6 | def combined_shape(length, shape=None):
 7 |     if shape is None:
 8 |         return (length,)
 9 |     return (length, shape) if np.isscalar(shape) else (length, *shape)
10 | 
11 | def keys_as_sorted_list(dict):
12 |     return sorted(list(dict.keys()))
13 | 
14 | def values_as_sorted_list(dict):
15 |     return [dict[k] for k in keys_as_sorted_list(dict)]
16 | 
17 | def discount_cumsum(x, discount):
18 |     """
19 |     magic from rllab for computing discounted cumulative sums of vectors.
20 | 
21 |     input: 
22 |         vector x, 
23 |         [x0, 
24 |          x1, 
25 |          x2]
26 | 
27 |     output:
28 |         [x0 + discount * x1 + discount^2 * x2,  
29 |          x1 + discount * x2,
30 |          x2]
31 |     """
32 |     return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]
33 | 


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/safety-starter-agents/safe_rl/sac/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/load_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import joblib
 4 | import os
 5 | import os.path as osp
 6 | import tensorflow as tf
 7 | from safe_rl.utils.logx import restore_tf_graph
 8 | 
 9 | def load_policy(fpath, itr='last', deterministic=False):
10 | 
11 |     # handle which epoch to load from
12 |     if itr=='last':
13 |         saves = [int(x[11:]) for x in os.listdir(fpath) if 'simple_save' in x and len(x)>11]
14 |         itr = '%d'%max(saves) if len(saves) > 0 else ''
15 |     else:
16 |         itr = '%d'%itr
17 | 
18 |     # load the things!
19 |     sess = tf.Session(graph=tf.Graph())
20 |     model = restore_tf_graph(sess, osp.join(fpath, 'simple_save'+itr))
21 | 
22 |     # get the correct op for executing actions
23 |     if deterministic and 'mu' in model.keys():
24 |         # 'deterministic' is only a valid option for SAC policies
25 |         print('Using deterministic action op.')
26 |         action_op = model['mu']
27 |     else:
28 |         print('Using default action op.')
29 |         action_op = model['pi']
30 | 
31 |     # make function for producing an action given a single state
32 |     get_action = lambda x : sess.run(action_op, feed_dict={model['x']: x[None,:]})[0]
33 | 
34 |     # try to load environment from save
35 |     # (sometimes this will fail because the environment could not be pickled)
36 |     try:
37 |         state = joblib.load(osp.join(fpath, 'vars'+itr+'.pkl'))
38 |         env = state['env']
39 |     except:
40 |         env = None
41 | 
42 |     return env, get_action, sess


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/readme.md:
--------------------------------------------------------------------------------
1 | # Utils
2 | 
3 | The various utilities here are copied over from [Spinning Up in Deep RL](https://github.com/openai/spinningup/tree/master/spinup/utils). We prefer to copy/paste here, instead of import, to minimize installation hassle (you don't have to install Spinning Up to use this repo).


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/run_utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os.path as osp
 3 | 
 4 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(osp.dirname(__file__)))),'data')
 5 | 
 6 | def setup_logger_kwargs(exp_name, seed=None, data_dir=None, datestamp=True):
 7 | 
 8 |     # Make base path
 9 |     ymd_time = time.strftime("%Y-%m-%d_") if datestamp else ''
10 |     relpath = ''.join([ymd_time, exp_name])
11 |     
12 |     if seed is not None:
13 |         # Make a seed-specific subfolder in the experiment directory.
14 |         if datestamp:
15 |             hms_time = time.strftime("%Y-%m-%d_%H-%M-%S")
16 |             subfolder = ''.join([hms_time, '-', exp_name, '_s', str(seed)])
17 |         else:
18 |             subfolder = ''.join([exp_name, '_s', str(seed)])
19 |         relpath = osp.join(relpath, subfolder)
20 | 
21 |     data_dir = data_dir or DEFAULT_DATA_DIR
22 |     logger_kwargs = dict(output_dir=osp.join(data_dir, relpath), 
23 |                          exp_name=exp_name)
24 |     return logger_kwargs


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/safe_rl/utils/serialization_utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | def convert_json(obj):
 4 |     """ Convert obj to a version which can be serialized with JSON. """
 5 |     if is_json_serializable(obj):
 6 |         return obj
 7 |     else:
 8 |         if isinstance(obj, dict):
 9 |             return {convert_json(k): convert_json(v) 
10 |                     for k,v in obj.items()}
11 | 
12 |         elif isinstance(obj, tuple):
13 |             return (convert_json(x) for x in obj)
14 | 
15 |         elif isinstance(obj, list):
16 |             return [convert_json(x) for x in obj]
17 | 
18 |         elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
19 |             return convert_json(obj.__name__)
20 | 
21 |         elif hasattr(obj,'__dict__') and obj.__dict__:
22 |             obj_dict = {convert_json(k): convert_json(v) 
23 |                         for k,v in obj.__dict__.items()}
24 |             return {str(obj): obj_dict}
25 | 
26 |         return str(obj)
27 | 
28 | def is_json_serializable(v):
29 |     try:
30 |         json.dumps(v)
31 |         return True
32 |     except:
33 |         return False


--------------------------------------------------------------------------------
/Safe-RL/safety-starter-agents/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | import sys
 5 | 
 6 | assert sys.version_info.major == 3 and sys.version_info.minor >= 6, \
 7 |     "Safety Starter Agents is designed to work with Python 3.6 and greater. " \
 8 |     + "Please install it before proceeding."
 9 | 
10 | setup(
11 |     name='safe_rl',
12 |     packages=['safe_rl'],
13 |     install_requires=[
14 |         'gym~=0.15.3',
15 |         'joblib==0.14.0',
16 |         'matplotlib==3.1.1',
17 |         'mpi4py==3.0.2',
18 |         'mujoco_py==2.0.2.7',
19 |         'numpy~=1.17.4',
20 |         'seaborn==0.8.1',
21 |         'tensorflow==1.13.1',
22 |     ],
23 | )
24 | 


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/README.md:
--------------------------------------------------------------------------------
1 | # vertex-net
2 | This repository contains source code of the paper:
3 | 
4 | Liyuan Zheng, Yuanyuan Shi, Lillian J. Ratliff, and Baosen Zhang, "Safe Reinforcement Learning of Control-Affine Systems with Vertex Networks",
5 | [[ArXiv]](https://arxiv.org/abs/2003.09488)
6 | 


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/algos/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/envs/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/nets/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/policy_net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | use_cuda = torch.cuda.is_available()
 6 | device   = torch.device("cuda" if use_cuda else "cpu")
 7 | 
 8 | 
 9 | class PolicyNetwork(nn.Module):
10 |     def __init__(self, env, obs_dim, action_dim, hidden_dim, init_w=3e-3):
11 |         super(PolicyNetwork, self).__init__()
12 | 
13 |         self.env = env
14 | 
15 |         self.linear1 = nn.Linear(obs_dim, hidden_dim)
16 |         self.linear2 = nn.Linear(hidden_dim, hidden_dim)
17 |         self.linear3 = nn.Linear(hidden_dim, action_dim)
18 | 
19 |         self.linear3.weight.data.uniform_(-init_w, init_w)
20 |         self.linear3.bias.data.uniform_(-init_w, init_w)
21 | 
22 |     def forward(self, state):
23 |         x = F.relu(self.linear1(state))
24 |         x = F.relu(self.linear2(x))
25 |         x = self.env.max_action * torch.tanh(self.linear3(x))
26 |         return x
27 | 
28 |     def get_action(self, state):
29 |         state = torch.FloatTensor(state).unsqueeze(0).to(device)
30 |         action = self.forward(state)
31 |         return action.detach().cpu().numpy()[0]


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/value_net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class ValueNetwork(nn.Module):
 7 |     def __init__(self, obs_dim, action_dim, hidden_dim, init_w=3e-3):
 8 |         super(ValueNetwork, self).__init__()
 9 | 
10 |         self.linear1 = nn.Linear(obs_dim + action_dim, hidden_dim)
11 |         self.linear2 = nn.Linear(hidden_dim, hidden_dim)
12 |         self.linear3 = nn.Linear(hidden_dim, 1)
13 | 
14 |         self.linear3.weight.data.uniform_(-init_w, init_w)
15 |         self.linear3.bias.data.uniform_(-init_w, init_w)
16 | 
17 |     def forward(self, state, action):
18 |         x = torch.cat((state, action), dim=1)
19 |         x = F.relu(self.linear1(x))
20 |         x = F.relu(self.linear2(x))
21 |         x = self.linear3(x)
22 |         return x


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/nets/vertex_policy_net.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | use_cuda = torch.cuda.is_available()
 6 | device   = torch.device("cuda" if use_cuda else "cpu")
 7 | 
 8 | 
 9 | class VertexPolicyNetwork(nn.Module):
10 |     def __init__(self, env, obs_dim, num_vertex, hidden_dim, init_w=3e-3):
11 |         super(VertexPolicyNetwork, self).__init__()
12 | 
13 |         self.env = env
14 | 
15 |         self.linear1 = nn.Linear(obs_dim, hidden_dim)
16 |         self.linear2 = nn.Linear(hidden_dim, hidden_dim)
17 |         self.linear3 = nn.Linear(hidden_dim, num_vertex)
18 | 
19 |         self.linear3.weight.data.uniform_(-init_w, init_w)
20 |         self.linear3.bias.data.uniform_(-init_w, init_w)
21 | 
22 |     def forward(self, state):
23 |         x = F.relu(self.linear1(state))
24 |         x = F.relu(self.linear2(x))
25 |         x = F.softmax(self.linear3(x), dim=1)
26 |         action_vertex = self.env.get_action_vertex(state.numpy())
27 |         action_vertex = torch.FloatTensor(action_vertex).to(device)
28 |         x = torch.bmm(x.unsqueeze(1), action_vertex).squeeze(1)
29 |         # x = torch.sum(x * action_vertex, dim=1).unsqueeze(1)
30 |         return x
31 | 
32 |     def get_action(self, state):
33 |         state = torch.FloatTensor(state).unsqueeze(0).to(device)
34 |         action = self.forward(state)
35 |         return action.detach().cpu().numpy()[0]


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chauncygu/Safe-Reinforcement-Learning-Baselines/d0f51aaab18336343d8bb933de3e119e14e2c755/Safe-RL/vertex-net/utils/__init__.py


--------------------------------------------------------------------------------
/Safe-RL/vertex-net/utils/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | 
 4 | 
 5 | class ReplayBuffer:
 6 |     def __init__(self, capacity):
 7 |         self.capacity = capacity
 8 |         self.buffer = []
 9 |         self.position = 0
10 | 
11 |     def push(self, state, action, reward, next_state, done):
12 |         if len(self.buffer) < self.capacity:
13 |             self.buffer.append(None)
14 |         self.buffer[self.position] = (state, action, reward, next_state, done)
15 |         self.position = (self.position + 1) % self.capacity
16 | 
17 |     def sample(self, batch_size):
18 |         batch = random.sample(self.buffer, batch_size)
19 |         state, action, reward, next_state, done = map(np.stack, zip(*batch))
20 |         return state, action, reward, next_state, done
21 | 
22 |     def __len__(self):
23 |         return len(self.buffer)


--------------------------------------------------------------------------------