├── .gitignore ├── README.md ├── algorithms ├── cmbpo.py ├── rl_algorithm.py └── utils.py ├── buffers ├── cpobuffer.py ├── modelbuffer.py └── utils.py ├── cmbpo.yml ├── configs ├── baseconfig │ ├── __init__.py │ ├── base.py │ ├── main.py │ └── utils.py ├── cmbpo_antsafe.py ├── cmbpo_hcs.py ├── cmbpo_hs.py ├── cpo_hcs.py └── trpo_hcs.py ├── envs ├── __init__.py ├── mujoco_safety_gym │ ├── __init__.py │ └── envs │ │ ├── __init__.py │ │ ├── ant.py │ │ ├── ant_viz.py │ │ ├── assets │ │ ├── ant.xml │ │ ├── ant_viz.xml │ │ ├── fetch │ │ │ ├── pick_and_place.xml │ │ │ ├── push.xml │ │ │ ├── reach.xml │ │ │ ├── robot.xml │ │ │ ├── shared.xml │ │ │ └── slide.xml │ │ ├── half_cheetah.xml │ │ ├── hopper.xml │ │ ├── humanoid.xml │ │ └── textures │ │ │ ├── block.png │ │ │ └── block_hidden.png │ │ ├── fetch │ │ ├── pick_and_place.py │ │ ├── push.py │ │ ├── reach.py │ │ └── slide.py │ │ ├── fetch_env.py │ │ ├── half_cheetah.py │ │ ├── hopper.py │ │ ├── humanoid.py │ │ ├── mujoco_env.py │ │ └── robot_env.py ├── utils.py └── wrappers │ ├── __init__.py │ └── normalize_action.py ├── models ├── base_model.py ├── fake_env.py ├── pens │ ├── __init__.py │ ├── fc.py │ ├── logger.py │ ├── pe.py │ ├── pe_factory.py │ └── utils.py └── statics.py ├── network └── ac_network.py ├── policies ├── base_policy.py ├── cpo_policy.py └── utils.py ├── requirements.txt ├── samplers ├── __init__.py ├── base_sampler.py ├── cpo_sampler.py ├── model_sampler.py ├── simple_sampler.py └── utils.py ├── scripts ├── console_scripts.py └── run.py ├── setup.py └── utilities ├── instrument.py ├── logging.py ├── logx.py ├── mpi_tf.py ├── mpi_tools.py ├── serialization_utils.py ├── trust_region.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/README.md -------------------------------------------------------------------------------- /algorithms/cmbpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/algorithms/cmbpo.py -------------------------------------------------------------------------------- /algorithms/rl_algorithm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/algorithms/rl_algorithm.py -------------------------------------------------------------------------------- /algorithms/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/algorithms/utils.py -------------------------------------------------------------------------------- /buffers/cpobuffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/buffers/cpobuffer.py -------------------------------------------------------------------------------- /buffers/modelbuffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/buffers/modelbuffer.py -------------------------------------------------------------------------------- /buffers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/buffers/utils.py -------------------------------------------------------------------------------- /cmbpo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/cmbpo.yml -------------------------------------------------------------------------------- /configs/baseconfig/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/baseconfig/__init__.py -------------------------------------------------------------------------------- /configs/baseconfig/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/baseconfig/base.py -------------------------------------------------------------------------------- /configs/baseconfig/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/baseconfig/main.py -------------------------------------------------------------------------------- /configs/baseconfig/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/baseconfig/utils.py -------------------------------------------------------------------------------- /configs/cmbpo_antsafe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/cmbpo_antsafe.py -------------------------------------------------------------------------------- /configs/cmbpo_hcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/cmbpo_hcs.py -------------------------------------------------------------------------------- /configs/cmbpo_hs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/cmbpo_hs.py -------------------------------------------------------------------------------- /configs/cpo_hcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/cpo_hcs.py -------------------------------------------------------------------------------- /configs/trpo_hcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/configs/trpo_hcs.py -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/__init__.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/__init__.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/ant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/ant.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/ant_viz.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/ant_viz.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/ant.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/ant.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/ant_viz.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/ant_viz.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/pick_and_place.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/pick_and_place.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/push.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/push.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/reach.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/reach.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/robot.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/robot.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/shared.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/shared.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/fetch/slide.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/fetch/slide.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/half_cheetah.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/half_cheetah.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/hopper.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/hopper.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/humanoid.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/humanoid.xml -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/textures/block.png -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/fetch/pick_and_place.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/fetch/pick_and_place.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/fetch/push.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/fetch/push.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/fetch/reach.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/fetch/reach.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/fetch/slide.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/fetch/slide.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/fetch_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/fetch_env.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/half_cheetah.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/half_cheetah.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/hopper.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/humanoid.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/humanoid.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/mujoco_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/mujoco_env.py -------------------------------------------------------------------------------- /envs/mujoco_safety_gym/envs/robot_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/mujoco_safety_gym/envs/robot_env.py -------------------------------------------------------------------------------- /envs/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/utils.py -------------------------------------------------------------------------------- /envs/wrappers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/wrappers/__init__.py -------------------------------------------------------------------------------- /envs/wrappers/normalize_action.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/envs/wrappers/normalize_action.py -------------------------------------------------------------------------------- /models/base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/base_model.py -------------------------------------------------------------------------------- /models/fake_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/fake_env.py -------------------------------------------------------------------------------- /models/pens/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/pens/fc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/pens/fc.py -------------------------------------------------------------------------------- /models/pens/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/pens/logger.py -------------------------------------------------------------------------------- /models/pens/pe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/pens/pe.py -------------------------------------------------------------------------------- /models/pens/pe_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/pens/pe_factory.py -------------------------------------------------------------------------------- /models/pens/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/pens/utils.py -------------------------------------------------------------------------------- /models/statics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/models/statics.py -------------------------------------------------------------------------------- /network/ac_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/network/ac_network.py -------------------------------------------------------------------------------- /policies/base_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/policies/base_policy.py -------------------------------------------------------------------------------- /policies/cpo_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/policies/cpo_policy.py -------------------------------------------------------------------------------- /policies/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/policies/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/requirements.txt -------------------------------------------------------------------------------- /samplers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /samplers/base_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/samplers/base_sampler.py -------------------------------------------------------------------------------- /samplers/cpo_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/samplers/cpo_sampler.py -------------------------------------------------------------------------------- /samplers/model_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/samplers/model_sampler.py -------------------------------------------------------------------------------- /samplers/simple_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/samplers/simple_sampler.py -------------------------------------------------------------------------------- /samplers/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/samplers/utils.py -------------------------------------------------------------------------------- /scripts/console_scripts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/scripts/console_scripts.py -------------------------------------------------------------------------------- /scripts/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/scripts/run.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/setup.py -------------------------------------------------------------------------------- /utilities/instrument.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/instrument.py -------------------------------------------------------------------------------- /utilities/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/logging.py -------------------------------------------------------------------------------- /utilities/logx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/logx.py -------------------------------------------------------------------------------- /utilities/mpi_tf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/mpi_tf.py -------------------------------------------------------------------------------- /utilities/mpi_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/mpi_tools.py -------------------------------------------------------------------------------- /utilities/serialization_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/serialization_utils.py -------------------------------------------------------------------------------- /utilities/trust_region.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/trust_region.py -------------------------------------------------------------------------------- /utilities/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anyboby/Constrained-Model-Based-Policy-Optimization/HEAD/utilities/utils.py --------------------------------------------------------------------------------