├── .gitignore
├── README.md
├── data_fusion_discrete
└── maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32
│ └── 2019_05_14_02_33_17_0
│ └── itr_2800.pkl
├── inverse_rl
├── __init__.py
├── __pycache__
│ └── __init__.cpython-35.pyc
├── algos
│ ├── __pycache__
│ │ ├── batch_polopt.cpython-35.pyc
│ │ ├── irl_batch_polopt.cpython-35.pyc
│ │ ├── irl_npo.cpython-35.pyc
│ │ ├── irl_trpo.cpython-35.pyc
│ │ ├── meta_irl_batch_polopt.cpython-35.pyc
│ │ ├── meta_irl_npo.cpython-35.pyc
│ │ ├── meta_irl_trpo.cpython-35.pyc
│ │ ├── npo.cpython-35.pyc
│ │ ├── penalty_lbfgs_optimizer.cpython-35.pyc
│ │ └── trpo.cpython-35.pyc
│ ├── batch_polopt.py
│ ├── irl_batch_polopt.py
│ ├── irl_npo.py
│ ├── irl_trpo.py
│ ├── meta_irl_batch_polopt.py
│ ├── meta_irl_npo.py
│ ├── meta_irl_trpo.py
│ ├── npo.py
│ ├── penalty_lbfgs_optimizer.py
│ └── trpo.py
├── envs
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── env_utils.cpython-35.pyc
│ │ └── point_maze_env.cpython-35.pyc
│ ├── assets
│ │ └── twod_maze.xml
│ ├── dynamic_mjc
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── mjc_models.cpython-35.pyc
│ │ │ └── model_builder.cpython-35.pyc
│ │ ├── mjc_models.py
│ │ └── model_builder.py
│ ├── env_utils.py
│ ├── point_maze_env.py
│ └── utils.py
├── models
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ ├── airl_state.cpython-35.pyc
│ │ ├── architectures.cpython-35.pyc
│ │ ├── fusion_manager.cpython-35.pyc
│ │ ├── imitation_learning.cpython-35.pyc
│ │ ├── old_imitation_learning.cpython-35.pyc
│ │ ├── pretrain.cpython-35.pyc
│ │ └── tf_util.cpython-35.pyc
│ ├── airl_state.py
│ ├── architectures.py
│ ├── fusion_manager.py
│ ├── imitation_learning.py
│ ├── info_airl_state_test.py
│ ├── info_airl_state_train.py
│ ├── old_imitation_learning.py
│ ├── pretrain.py
│ └── tf_util.py
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-35.pyc
│ ├── general.cpython-35.pyc
│ ├── hyper_sweep.cpython-35.pyc
│ ├── hyperparametrized.cpython-35.pyc
│ ├── log_utils.cpython-35.pyc
│ └── math_utils.cpython-35.pyc
│ ├── general.py
│ ├── hyper_sweep.py
│ ├── hyperparametrized.py
│ ├── log_utils.py
│ └── math_utils.py
├── rllab
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── circle.yml
├── contrib
│ ├── __init__.py
│ ├── alexbeloi
│ │ ├── __init__.py
│ │ ├── examples
│ │ │ ├── __init__.py
│ │ │ ├── trpois_cartpole.py
│ │ │ └── vpgis_cartpole.py
│ │ └── is_sampler.py
│ ├── bichengcao
│ │ ├── __init__.py
│ │ └── examples
│ │ │ ├── __init__.py
│ │ │ ├── trpo_gym_Acrobot-v1.py
│ │ │ ├── trpo_gym_CartPole-v0.py
│ │ │ ├── trpo_gym_CartPole-v1.py
│ │ │ ├── trpo_gym_MountainCar-v0.py
│ │ │ └── trpo_gym_Pendulum-v0.py
│ └── rllab_hyperopt
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── example
│ │ ├── __init__.py
│ │ ├── main.py
│ │ ├── score.py
│ │ └── task.py
│ │ └── visualize_hyperopt_results.ipynb
├── docker
│ ├── Dockerfile
│ ├── gpu_Dockerfile
│ ├── gpu_tf_Dockerfile
│ └── tester_Dockerfile
├── docs
│ ├── Makefile
│ ├── conf.py
│ ├── index.rst
│ └── user
│ │ ├── cluster.rst
│ │ ├── cluster_1.png
│ │ ├── cluster_2.png
│ │ ├── cluster_3.png
│ │ ├── experiments.rst
│ │ ├── gym_integration.rst
│ │ ├── implement_algo_advanced.rst
│ │ ├── implement_algo_basic.rst
│ │ ├── implement_env.rst
│ │ └── installation.rst
├── environment.yml
├── examples
│ ├── __init__.py
│ ├── cluster_demo.py
│ ├── cluster_gym_mujoco_demo.py
│ ├── ddpg_cartpole.py
│ ├── nop_cartpole.py
│ ├── point_env.py
│ ├── trpo_cartpole.py
│ ├── trpo_cartpole_pickled.py
│ ├── trpo_cartpole_recurrent.py
│ ├── trpo_gym_cartpole.py
│ ├── trpo_gym_pendulum.py
│ ├── trpo_gym_tf_cartpole.py
│ ├── trpo_point.py
│ ├── trpo_swimmer.py
│ ├── vpg_1.py
│ └── vpg_2.py
├── rllab
│ ├── __init__.py
│ ├── algos
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── batch_polopt.py
│ │ ├── cem.py
│ │ ├── cma_es.py
│ │ ├── cma_es_lib.py
│ │ ├── ddpg.py
│ │ ├── erwr.py
│ │ ├── nop.py
│ │ ├── npo.py
│ │ ├── ppo.py
│ │ ├── reps.py
│ │ ├── tnpg.py
│ │ ├── trpo.py
│ │ ├── util.py
│ │ └── vpg.py
│ ├── baselines
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── gaussian_conv_baseline.py
│ │ ├── gaussian_mlp_baseline.py
│ │ ├── linear_feature_baseline.py
│ │ └── zero_baseline.py
│ ├── config.py
│ ├── config_personal_template.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── lasagne_helpers.py
│ │ ├── lasagne_layers.py
│ │ ├── lasagne_powered.py
│ │ ├── network.py
│ │ ├── parameterized.py
│ │ └── serializable.py
│ ├── distributions
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── bernoulli.py
│ │ ├── categorical.py
│ │ ├── delta.py
│ │ ├── diagonal_gaussian.py
│ │ ├── recurrent_categorical.py
│ │ └── recurrent_diagonal_gaussian.py
│ ├── envs
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── box2d
│ │ │ ├── __init__.py
│ │ │ ├── box2d_env.py
│ │ │ ├── box2d_viewer.py
│ │ │ ├── car_parking_env.py
│ │ │ ├── cartpole_env.py
│ │ │ ├── cartpole_swingup_env.py
│ │ │ ├── double_pendulum_env.py
│ │ │ ├── models
│ │ │ │ ├── car_parking.xml
│ │ │ │ ├── car_parking.xml.rb
│ │ │ │ ├── cartpole.xml.mako
│ │ │ │ ├── double_pendulum.xml.mako
│ │ │ │ └── mountain_car.xml.mako
│ │ │ ├── mountain_car_env.py
│ │ │ └── parser
│ │ │ │ ├── __init__.py
│ │ │ │ ├── xml_attr_types.py
│ │ │ │ ├── xml_box2d.py
│ │ │ │ └── xml_types.py
│ │ ├── env_spec.py
│ │ ├── grid_world_env.py
│ │ ├── gym_env.py
│ │ ├── identification_env.py
│ │ ├── mujoco
│ │ │ ├── __init__.py
│ │ │ ├── ant_env.py
│ │ │ ├── gather
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant_gather_env.py
│ │ │ │ ├── embedded_viewer.py
│ │ │ │ ├── gather_env.py
│ │ │ │ ├── point_gather_env.py
│ │ │ │ └── swimmer_gather_env.py
│ │ │ ├── half_cheetah_env.py
│ │ │ ├── hill
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant_hill_env.py
│ │ │ │ ├── half_cheetah_hill_env.py
│ │ │ │ ├── hill_env.py
│ │ │ │ ├── hopper_hill_env.py
│ │ │ │ ├── swimmer3d_hill_env.py
│ │ │ │ ├── terrain.py
│ │ │ │ └── walker2d_hill_env.py
│ │ │ ├── hopper_env.py
│ │ │ ├── humanoid_env.py
│ │ │ ├── inverted_double_pendulum_env.py
│ │ │ ├── maze
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ant_maze_env.py
│ │ │ │ ├── maze_env.py
│ │ │ │ ├── maze_env_utils.py
│ │ │ │ ├── point_maze_env.py
│ │ │ │ └── swimmer_maze_env.py
│ │ │ ├── mujoco_env.py
│ │ │ ├── point_env.py
│ │ │ ├── simple_humanoid_env.py
│ │ │ ├── swimmer3d_env.py
│ │ │ ├── swimmer_env.py
│ │ │ └── walker2d_env.py
│ │ ├── noisy_env.py
│ │ ├── normalized_env.py
│ │ ├── occlusion_env.py
│ │ ├── proxy_env.py
│ │ └── sliding_mem_env.py
│ ├── exploration_strategies
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── gaussian_strategy.py
│ │ └── ou_strategy.py
│ ├── misc
│ │ ├── __init__.py
│ │ ├── autoargs.py
│ │ ├── console.py
│ │ ├── ext.py
│ │ ├── instrument.py
│ │ ├── krylov.py
│ │ ├── logger.py
│ │ ├── mako_utils.py
│ │ ├── meta.py
│ │ ├── nb_utils.py
│ │ ├── overrides.py
│ │ ├── resolve.py
│ │ ├── special.py
│ │ ├── tabulate.py
│ │ ├── tensor_utils.py
│ │ └── viewer2d.py
│ ├── mujoco_py
│ │ ├── .rvmrc
│ │ ├── Gemfile
│ │ ├── Gemfile.lock
│ │ ├── __init__.py
│ │ ├── codegen.rb
│ │ ├── gen_binding.sh
│ │ ├── glfw.py
│ │ ├── mjconstants.py
│ │ ├── mjcore.py
│ │ ├── mjextra.py
│ │ ├── mjlib.py
│ │ ├── mjtypes.py
│ │ ├── mjviewer.py
│ │ └── util.py
│ ├── optimizers
│ │ ├── __init__.py
│ │ ├── conjugate_gradient_optimizer.py
│ │ ├── first_order_optimizer.py
│ │ ├── hessian_free_optimizer.py
│ │ ├── hf.py
│ │ ├── lbfgs_optimizer.py
│ │ ├── minibatch_dataset.py
│ │ └── penalty_lbfgs_optimizer.py
│ ├── plotter
│ │ ├── __init__.py
│ │ └── plotter.py
│ ├── policies
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── categorical_conv_policy.py
│ │ ├── categorical_gru_policy.py
│ │ ├── categorical_mlp_policy.py
│ │ ├── deterministic_mlp_policy.py
│ │ ├── gaussian_gru_policy.py
│ │ ├── gaussian_mlp_policy.py
│ │ └── uniform_control_policy.py
│ ├── q_functions
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── continuous_mlp_q_function.py
│ ├── regressors
│ │ ├── __init__.py
│ │ ├── categorical_mlp_regressor.py
│ │ ├── gaussian_conv_regressor.py
│ │ ├── gaussian_mlp_regressor.py
│ │ └── product_regressor.py
│ ├── sampler
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── parallel_sampler.py
│ │ ├── stateful_pool.py
│ │ └── utils.py
│ ├── spaces
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── box.py
│ │ ├── discrete.py
│ │ └── product.py
│ └── viskit
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── frontend.py
│ │ ├── static
│ │ ├── css
│ │ │ ├── bootstrap.min.css
│ │ │ └── dropdowns-enhancement.css
│ │ └── js
│ │ │ ├── bootstrap.min.js
│ │ │ ├── dropdowns-enhancement.js
│ │ │ ├── jquery-1.10.2.min.js
│ │ │ ├── jquery.loadTemplate-1.5.6.js
│ │ │ └── plotly-latest.min.js
│ │ └── templates
│ │ └── main.html
├── sandbox
│ ├── __init__.py
│ └── rocky
│ │ ├── __init__.py
│ │ └── tf
│ │ ├── __init__.py
│ │ ├── algos
│ │ ├── __init__.py
│ │ ├── batch_polopt.py
│ │ ├── npg.py
│ │ ├── npo.py
│ │ ├── trpo.py
│ │ └── vpg.py
│ │ ├── core
│ │ ├── __init__.py
│ │ ├── layers.py
│ │ ├── layers_powered.py
│ │ ├── network.py
│ │ └── parameterized.py
│ │ ├── distributions
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── bernoulli.py
│ │ ├── categorical.py
│ │ ├── diagonal_gaussian.py
│ │ ├── recurrent_categorical.py
│ │ └── recurrent_diagonal_gaussian.py
│ │ ├── envs
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── parallel_vec_env_executor.py
│ │ └── vec_env_executor.py
│ │ ├── launchers
│ │ ├── __init__.py
│ │ ├── trpo_cartpole.py
│ │ ├── trpo_cartpole_recurrent.py
│ │ └── vpg_cartpole.py
│ │ ├── misc
│ │ ├── __init__.py
│ │ └── tensor_utils.py
│ │ ├── optimizers
│ │ ├── __init__.py
│ │ ├── conjugate_gradient_optimizer.py
│ │ ├── first_order_optimizer.py
│ │ ├── lbfgs_optimizer.py
│ │ └── penalty_lbfgs_optimizer.py
│ │ ├── policies
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── categorical_conv_policy.py
│ │ ├── categorical_gru_policy.py
│ │ ├── categorical_lstm_policy.py
│ │ ├── categorical_mlp_policy.py
│ │ ├── deterministic_mlp_policy.py
│ │ ├── gaussian_gru_policy.py
│ │ ├── gaussian_lstm_policy.py
│ │ ├── gaussian_mlp_policy.py
│ │ ├── latent_gaussian_mlp_policy.py
│ │ └── uniform_control_policy.py
│ │ ├── q_functions
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── continuous_mlp_q_function.py
│ │ ├── regressors
│ │ ├── __init__.py
│ │ ├── bernoulli_mlp_regressor.py
│ │ ├── categorical_mlp_regressor.py
│ │ ├── deterministic_mlp_regressor.py
│ │ └── gaussian_mlp_regressor.py
│ │ ├── samplers
│ │ ├── __init__.py
│ │ ├── batch_sampler.py
│ │ └── vectorized_sampler.py
│ │ └── spaces
│ │ ├── __init__.py
│ │ ├── box.py
│ │ ├── discrete.py
│ │ └── product.py
├── scripts
│ ├── __init__.py
│ ├── resume_training.py
│ ├── run_experiment_lite.py
│ ├── setup_ec2_for_rllab.py
│ ├── setup_linux.sh
│ ├── setup_mujoco.sh
│ ├── setup_osx.sh
│ ├── sim_env.py
│ ├── sim_policy.py
│ ├── submit_gym.py
│ └── sync_s3.py
├── setup.py
├── tests
│ ├── __init__.py
│ ├── algos
│ │ ├── __init__.py
│ │ └── test_trpo.py
│ ├── envs
│ │ ├── __init__.py
│ │ ├── test_envs.py
│ │ └── test_maze_env.py
│ ├── regression_tests
│ │ ├── __init__.py
│ │ └── test_issue_3.py
│ ├── test_algos.py
│ ├── test_baselines.py
│ ├── test_instrument.py
│ ├── test_networks.py
│ ├── test_sampler.py
│ ├── test_serializable.py
│ ├── test_spaces.py
│ └── test_stateful_pool.py
└── vendor
│ └── mujoco_models
│ ├── ant.xml
│ ├── green_ball.xml
│ ├── half_cheetah.xml
│ ├── hill_ant_env.xml.mako
│ ├── hill_half_cheetah_env.xml.mako
│ ├── hill_hopper_env.xml.mako
│ ├── hill_swimmer3d_env.xml.mako
│ ├── hill_walker2d_env.xml.mako
│ ├── hopper.xml
│ ├── humanoid.xml
│ ├── inverted_double_pendulum.xml
│ ├── inverted_double_pendulum.xml.mako
│ ├── point.xml
│ ├── red_ball.xml
│ ├── simple_humanoid.xml
│ ├── swimmer.xml
│ ├── swimmer3d.xml
│ ├── utils.mako
│ └── walker2d.xml
└── scripts
├── maze_data_collect.py
├── maze_visualize_reward.py
├── maze_wall_meta_irl.py
└── maze_wall_meta_irl_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Meta-Inverse Reinforcement Learning with Probabilistic Context Variables
2 | Lantao Yu*, Tianhe Yu*, Chelsea Finn, Stefano Ermon.
3 | The 33rd Conference on Neural Information Processing Systems. (NeurIPS 2019)
4 | [[Paper]](https://arxiv.org/pdf/1909.09314.pdf) [[Website]](https://sites.google.com/view/pemirl)
5 |
6 | ### Usage
7 | Requirement: The rllab package used in this project is provided [here](https://github.com/ermongroup/MetaIRL/tree/master/rllab).
8 |
9 | To get expert trajectories for downstream tasks:
10 | ```
11 | python scripts/maze_data_collect.py
12 | ```
13 |
14 | After getting expert trajectories, run Meta-Inverse RL to learn context dependent reward functions:
15 | ```
16 | python scripts/maze_wall_meta_irl.py
17 | ```
18 | We provided a pretrained IRL model [here](https://github.com/ermongroup/MetaIRL/tree/master/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0), which will be loaded by the following codes by default.
19 |
20 | To visualize the context-dependent reward function (Figure 2 in the paper):
21 | ```
22 | python scripts/maze_visualize_reward.py
23 | ```
24 |
25 | To use the context-dependent reward function to train a new policy under new dynamics:
26 | ```
27 | python scripts/maze_wall_meta_irl_test.py
28 | ```
--------------------------------------------------------------------------------
/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl
--------------------------------------------------------------------------------
/inverse_rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__init__.py
--------------------------------------------------------------------------------
/inverse_rl/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/npo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/trpo.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/algos/irl_trpo.py:
--------------------------------------------------------------------------------
1 | from inverse_rl.algos.irl_npo import IRLNPO
2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
3 |
4 |
5 | class IRLTRPO(IRLNPO):
6 | """
7 | Trust Region Policy Optimization
8 | """
9 |
10 | def __init__(
11 | self,
12 | optimizer=None,
13 | optimizer_args=None,
14 | **kwargs):
15 | if optimizer is None:
16 | if optimizer_args is None:
17 | optimizer_args = dict()
18 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
19 | super(IRLTRPO, self).__init__(optimizer=optimizer, **kwargs)
20 |
--------------------------------------------------------------------------------
/inverse_rl/algos/meta_irl_trpo.py:
--------------------------------------------------------------------------------
1 | from inverse_rl.algos.meta_irl_npo import MetaIRLNPO
2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
3 |
4 |
5 | class MetaIRLTRPO(MetaIRLNPO):
6 | """
7 | Trust Region Policy Optimization
8 | """
9 |
10 | def __init__(
11 | self,
12 | optimizer=None,
13 | optimizer_args=None,
14 | **kwargs):
15 | if optimizer is None:
16 | if optimizer_args is None:
17 | optimizer_args = dict()
18 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
19 | super(MetaIRLTRPO, self).__init__(optimizer=optimizer, **kwargs)
20 |
--------------------------------------------------------------------------------
/inverse_rl/algos/trpo.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from inverse_rl.algos.npo import NPO
4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
5 |
6 |
7 | class TRPO(NPO):
8 | """
9 | Trust Region Policy Optimization
10 | """
11 |
12 | def __init__(
13 | self,
14 | optimizer=None,
15 | optimizer_args=None,
16 | **kwargs):
17 | if optimizer is None:
18 | if optimizer_args is None:
19 | optimizer_args = dict()
20 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
21 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
22 |
--------------------------------------------------------------------------------
/inverse_rl/envs/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from gym.envs import register
4 |
5 | LOGGER = logging.getLogger(__name__)
6 |
7 | _REGISTERED = False
8 | def register_custom_envs():
9 | global _REGISTERED
10 | if _REGISTERED:
11 | return
12 | _REGISTERED = True
13 |
14 | LOGGER.info("Registering custom gym environments")
15 | register(id='PointMazeRight-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
16 | kwargs={'sparse_reward': False, 'direction': 1, 'discrete': True})
17 | register(id='PointMazeLeft-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
18 | kwargs={'sparse_reward': False, 'direction': 0, 'discrete': True})
19 | register(id='PointMazeRightCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
20 | kwargs={'sparse_reward': False, 'direction': 1, 'discrete': False})
21 | register(id='PointMazeLeftCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
22 | kwargs={'sparse_reward': False, 'direction': 0, 'discrete': False})
23 |
24 |
--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/assets/twod_maze.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__init__.py
--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/envs/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def flat_to_one_hot(val, ndim):
4 | """
5 |
6 | >>> flat_to_one_hot(2, ndim=4)
7 | array([ 0., 0., 1., 0.])
8 | >>> flat_to_one_hot(4, ndim=5)
9 | array([ 0., 0., 0., 0., 1.])
10 | >>> flat_to_one_hot(np.array([2, 4, 3]), ndim=5)
11 | array([[ 0., 0., 1., 0., 0.],
12 | [ 0., 0., 0., 0., 1.],
13 | [ 0., 0., 0., 1., 0.]])
14 | """
15 | shape =np.array(val).shape
16 | v = np.zeros(shape + (ndim,))
17 | if len(shape) == 1:
18 | v[np.arange(shape[0]), val] = 1.0
19 | else:
20 | v[val] = 1.0
21 | return v
22 |
23 | def one_hot_to_flat(val):
24 | """
25 | >>> one_hot_to_flat(np.array([0,0,0,0,1]))
26 | 4
27 | >>> one_hot_to_flat(np.array([0,0,1,0]))
28 | 2
29 | >>> one_hot_to_flat(np.array([[0,0,1,0], [1,0,0,0], [0,1,0,0]]))
30 | array([2, 0, 1])
31 | """
32 | idxs = np.array(np.where(val == 1.0))[-1]
33 | if len(val.shape) == 1:
34 | return int(idxs)
35 | return idxs
--------------------------------------------------------------------------------
/inverse_rl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__init__.py
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/airl_state.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/airl_state.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/architectures.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/architectures.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/pretrain.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/pretrain.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/tf_util.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/models/architectures.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from inverse_rl.models.tf_util import relu_layer, linear
3 |
4 |
5 | def make_relu_net(layers=2, dout=1, d_hidden=32):
6 | def relu_net(x, last_layer_bias=True):
7 | out = x
8 | for i in range(layers):
9 | out = relu_layer(out, dout=d_hidden, name='l%d'%i)
10 | out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias)
11 | return out
12 | return relu_net
13 |
14 |
15 | def relu_net(x, layers=2, dout=1, d_hidden=32):
16 | out = x
17 | for i in range(layers):
18 | out = relu_layer(out, dout=d_hidden, name='l%d'%i)
19 | out = linear(out, dout=dout, name='lfinal')
20 | return out
21 |
22 |
23 | def linear_net(x, dout=1):
24 | out = x
25 | out = linear(out, dout=dout, name='lfinal')
26 | return out
27 |
28 |
29 | def feedforward_energy(obs_act, ff_arch=relu_net):
30 | # for trajectories, using feedforward nets rather than RNNs
31 | dimOU = int(obs_act.get_shape()[2])
32 | orig_shape = tf.shape(obs_act)
33 |
34 | obs_act = tf.reshape(obs_act, [-1, dimOU])
35 | outputs = ff_arch(obs_act)
36 | dOut = int(outputs.get_shape()[-1])
37 |
38 | new_shape = tf.stack([orig_shape[0],orig_shape[1], dOut])
39 | outputs = tf.reshape(outputs, new_shape)
40 | return outputs
41 |
42 |
43 | def rnn_trajectory_energy(obs_act):
44 | """
45 | Operates on trajectories
46 | """
47 | # for trajectories
48 | dimOU = int(obs_act.get_shape()[2])
49 |
50 | cell = tf.contrib.rnn.GRUCell(num_units=dimOU)
51 | cell_out = tf.contrib.rnn.OutputProjectionWrapper(cell, 1)
52 | outputs, hidden = tf.nn.dynamic_rnn(cell_out, obs_act, time_major=False, dtype=tf.float32)
53 | return outputs
54 |
55 |
--------------------------------------------------------------------------------
/inverse_rl/models/tf_util.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | REG_VARS = 'reg_vars'
5 |
6 | def linear(X, dout, name, bias=True):
7 | with tf.variable_scope(name):
8 | dX = int(X.get_shape()[-1])
9 | W = tf.get_variable('W', shape=(dX, dout))
10 | tf.add_to_collection(REG_VARS, W)
11 | if bias:
12 | b = tf.get_variable('b', initializer=tf.constant(np.zeros(dout).astype(np.float32)))
13 | else:
14 | b = 0
15 | return tf.matmul(X, W)+b
16 |
17 | def discounted_reduce_sum(X, discount, axis=-1):
18 | if discount != 1.0:
19 | disc = tf.cumprod(discount*tf.ones_like(X), axis=axis)
20 | else:
21 | disc = 1.0
22 | return tf.reduce_sum(X*disc, axis=axis)
23 |
24 | def assert_shape(tens, shape):
25 | assert tens.get_shape().is_compatible_with(shape)
26 |
27 | def relu_layer(X, dout, name):
28 | return tf.nn.relu(linear(X, dout, name))
29 |
30 | def softplus_layer(X, dout, name):
31 | return tf.nn.softplus(linear(X, dout, name))
32 |
33 | def tanh_layer(X, dout, name):
34 | return tf.nn.tanh(linear(X, dout, name))
35 |
36 | def get_session_config():
37 | session_config = tf.ConfigProto()
38 | session_config.gpu_options.allow_growth = True
39 | #session_config.gpu_options.per_process_gpu_memory_fraction = 0.2
40 | return session_config
41 |
42 |
43 | def load_prior_params(pkl_fname, key='irl_params'):
44 | import joblib
45 | with tf.Session(config=get_session_config()):
46 | params = joblib.load(pkl_fname)
47 |
48 | tf.reset_default_graph()
49 | #joblib.dump(params, file_name, compress=3)
50 | params = params[key]
51 | #print(params)
52 | assert params is not None
53 | return params
54 |
--------------------------------------------------------------------------------
/inverse_rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from inverse_rl.utils.general import *
2 |
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/general.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/general.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc
--------------------------------------------------------------------------------
/inverse_rl/utils/general.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import time
3 |
4 | def flatten_list(lol):
5 | return [ a for b in lol for a in b ]
6 |
7 | class TrainingIterator(object):
8 | def __init__(self, itrs, heartbeat=float('inf')):
9 | self.itrs = itrs
10 | self.heartbeat_time = heartbeat
11 | self.__vals = {}
12 |
13 | def random_idx(self, N, size):
14 | return np.random.randint(0, N, size=size)
15 |
16 | @property
17 | def itr(self):
18 | return self.__itr
19 |
20 | @property
21 | def heartbeat(self):
22 | return self.__heartbeat
23 |
24 | @property
25 | def elapsed(self):
26 | assert self.heartbeat, 'elapsed is only valid when heartbeat=True'
27 | return self.__elapsed
28 |
29 | def itr_message(self):
30 | return '==> Itr %d/%d (elapsed:%.2f)' % (self.itr+1, self.itrs, self.elapsed)
31 |
32 | def record(self, key, value):
33 | if key in self.__vals:
34 | self.__vals[key].append(value)
35 | else:
36 | self.__vals[key] = [value]
37 |
38 | def pop(self, key):
39 | vals = self.__vals.get(key, [])
40 | del self.__vals[key]
41 | return vals
42 |
43 | def pop_mean(self, key):
44 | return np.mean(self.pop(key))
45 |
46 | def __iter__(self):
47 | prev_time = time.time()
48 | self.__heartbeat = False
49 | for i in range(self.itrs):
50 | self.__itr = i
51 | cur_time = time.time()
52 | if (cur_time-prev_time) > self.heartbeat_time or i==(self.itrs-1):
53 | self.__heartbeat = True
54 | self.__elapsed = cur_time-prev_time
55 | prev_time = cur_time
56 | yield self
57 | self.__heartbeat = False
--------------------------------------------------------------------------------
/inverse_rl/utils/hyperparametrized.py:
--------------------------------------------------------------------------------
1 | CLSNAME = '__clsname__'
2 | _HYPER_ = '__hyper__'
3 | _HYPERNAME_ = '__hyper_clsname__'
4 |
5 |
6 | def extract_hyperparams(obj):
7 | if any([isinstance(obj, type_) for type_ in (int, float, str)]):
8 | return obj
9 | elif isinstance(type(obj), Hyperparametrized):
10 | hypers = getattr(obj, _HYPER_)
11 | hypers[CLSNAME] = getattr(obj, _HYPERNAME_)
12 | for attr in hypers:
13 | hypers[attr] = extract_hyperparams(hypers[attr])
14 | return hypers
15 | return type(obj).__name__
16 |
17 | class Hyperparametrized(type):
18 | def __new__(self, clsname, bases, clsdict):
19 | old_init = clsdict.get('__init__', bases[0].__init__)
20 | def init_wrapper(inst, *args, **kwargs):
21 | hyper = getattr(inst, _HYPER_, {})
22 | hyper.update(kwargs)
23 | setattr(inst, _HYPER_, hyper)
24 |
25 | if getattr(inst, _HYPERNAME_, None) is None:
26 | setattr(inst, _HYPERNAME_, clsname)
27 | return old_init(inst, *args, **kwargs)
28 | clsdict['__init__'] = init_wrapper
29 |
30 | cls = super(Hyperparametrized, self).__new__(self, clsname, bases, clsdict)
31 | return cls
32 |
33 |
34 | class HyperparamWrapper(object, metaclass=Hyperparametrized):
35 | def __init__(self, **hyper_kwargs):
36 | pass
37 |
38 | if __name__ == "__main__":
39 | class Algo1(object, metaclass=Hyperparametrized):
40 | def __init__(self, hyper1=1.0, hyper2=2.0, model1=None):
41 | pass
42 |
43 |
44 | class Algo2(Algo1):
45 | def __init__(self, hyper3=5.0, **kwargs):
46 | super(Algo2, self).__init__(**kwargs)
47 |
48 |
49 | class Model1(object, metaclass=Hyperparametrized):
50 | def __init__(self, hyper1=None):
51 | pass
52 |
53 |
54 | def get_params_json(**kwargs):
55 | hyper_dict = extract_hyperparams(HyperparamWrapper(**kwargs))
56 | del hyper_dict[CLSNAME]
57 | return hyper_dict
58 |
59 | m1 = Model1(hyper1='Test')
60 | a1 = Algo2(hyper1=1.0, hyper2=5.0, hyper3=10.0, model1=m1)
61 |
62 | print( isinstance(type(a1), Hyperparametrized))
63 | print(get_params_json(a1=a1))
64 |
--------------------------------------------------------------------------------
/inverse_rl/utils/math_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy as sp
3 | import scipy.stats
4 |
5 | def rle(inarray):
6 | """ run length encoding. Partial credit to R rle function.
7 | Multi datatype arrays catered for including non Numpy
8 | returns: tuple (runlengths, startpositions, values) """
9 | ia = np.array(inarray) # force numpy
10 | n = len(ia)
11 | if n == 0:
12 | return (None, None, None)
13 | else:
14 | y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe)
15 | i = np.append(np.where(y), n - 1) # must include last element posi
16 | z = np.diff(np.append(-1, i)) # run lengths
17 | p = np.cumsum(np.append(0, z))[:-1] # positions
18 | return(z, p, ia[i])
19 |
20 | def split_list_by_lengths(values, lengths):
21 | """
22 |
23 | >>> split_list_by_lengths([0,0,0,1,1,1,2,2,2], [2,2,5])
24 | [[0, 0], [0, 1], [1, 1, 2, 2, 2]]
25 | """
26 | assert np.sum(lengths) == len(values)
27 | idxs = np.cumsum(lengths)
28 | idxs = np.insert(idxs, 0, 0)
29 | return [ values[idxs[i]:idxs[i+1] ] for i in range(len(idxs)-1)]
30 |
31 | def clip_sing(X, clip_val=1):
32 | U, E, V = np.linalg.svd(X, full_matrices=False)
33 | E = np.clip(E, -clip_val, clip_val)
34 | return U.dot(np.diag(E)).dot(V)
35 |
36 | def gauss_log_pdf(params, x):
37 | mean, log_diag_std = params
38 | N, d = mean.shape
39 | cov = np.square(np.exp(log_diag_std))
40 | diff = x-mean
41 | exp_term = -0.5 * np.sum(np.square(diff)/cov, axis=1)
42 | norm_term = -0.5*d*np.log(2*np.pi)
43 | var_term = -0.5 * np.sum(np.log(cov), axis=1)
44 | log_probs = norm_term + var_term + exp_term
45 | return log_probs #sp.stats.multivariate_normal.logpdf(x, mean=mean, cov=cov)
46 |
47 | def categorical_log_pdf(params, x, one_hot=True):
48 | if not one_hot:
49 | raise NotImplementedError()
50 | probs = params[0]
51 | return np.log(np.max(probs * x, axis=1))
52 |
53 |
--------------------------------------------------------------------------------
/rllab/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | *.pyc
3 | *-checkpoint.ipynb
4 | .DS_Store
5 | *.h5
6 | *.log
7 | *.npz
8 | secrets.py
9 | *.avi
10 | *.mp4
11 | build
12 | build_linux
13 | .idea
14 | .sublime-project
15 | run_experiment.sh
16 | scratch-notebooks
17 | launch_scripts
18 | *.sh.e*
19 | *.sh.o*
20 | MUJOCO_LOG.TXT
21 | vendor/mujoco
22 | .project
23 | .pydevproject
24 | *.pdf
25 | .env
26 | snippets
27 | private
28 | lua
29 | iterate.dat
30 | .env
31 | src/
32 | .settings
33 | .pods
34 | docs/_build
35 | blackbox.zip
36 | blackbox
37 | rllab/config_personal.py
38 | *.swp
39 |
--------------------------------------------------------------------------------
/rllab/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 rllab contributors
4 |
5 | rllab uses a shared copyright model: each contributor holds copyright over
6 | their contributions to rllab. The project versioning records all such
7 | contribution and copyright details.
8 | By contributing to the rllab repository through pull-request, comment,
9 | or otherwise, the contributor releases their content to the license and
10 | copyright terms herein.
11 |
12 | Permission is hereby granted, free of charge, to any person obtaining a copy
13 | of this software and associated documentation files (the "Software"), to deal
14 | in the Software without restriction, including without limitation the rights
15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 | copies of the Software, and to permit persons to whom the Software is
17 | furnished to do so, subject to the following conditions:
18 |
19 | The above copyright notice and this permission notice shall be included in all
20 | copies or substantial portions of the Software.
21 |
22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 | SOFTWARE.
29 |
--------------------------------------------------------------------------------
/rllab/circle.yml:
--------------------------------------------------------------------------------
1 | machine:
2 | services:
3 | - docker
4 |
5 | dependencies:
6 | cache_directories:
7 | - "~/docker"
8 | override:
9 | - docker info
10 | - if [[ -e ~/docker/image.tar ]]; then docker load -i ~/docker/image.tar; fi
11 | - docker build -t tester -f docker/tester_Dockerfile .
12 | - mkdir -p ~/docker; docker save tester > ~/docker/image.tar
13 |
14 | test:
15 | override:
16 | - docker run tester /bin/bash -li -c "CIRCLECI=true nose2"
17 |
--------------------------------------------------------------------------------
/rllab/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/examples/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/trpois_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.algos.tnpg import TNPG
3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
4 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
5 | from rllab.envs.normalized_env import normalize
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 | from contrib.alexbeloi.is_sampler import ISSampler
8 |
9 | """
10 | Example using VPG with ISSampler, iterations alternate between live and
11 | importance sampled iterations.
12 | """
13 |
14 | env = normalize(CartpoleEnv())
15 |
16 | policy = GaussianMLPPolicy(
17 | env_spec=env.spec,
18 | # The neural network policy should have two hidden layers, each with 32 hidden units.
19 | hidden_sizes=(32, 32)
20 | )
21 |
22 | baseline = LinearFeatureBaseline(env_spec=env.spec)
23 |
24 | optimizer_args = dict(
25 | # debug_nan=True,
26 | # reg_coeff=0.1,
27 | # cg_iters=2
28 | )
29 |
30 | algo = TRPO(
31 | env=env,
32 | policy=policy,
33 | baseline=baseline,
34 | batch_size=4000,
35 | max_path_length=100,
36 | n_itr=200,
37 | discount=0.99,
38 | step_size=0.01,
39 | sampler_cls=ISSampler,
40 | sampler_args=dict(n_backtrack=1),
41 | optimizer_args=optimizer_args
42 | )
43 | algo.train()
44 |
--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/vpgis_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.vpg import VPG
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 | from contrib.alexbeloi.is_sampler import ISSampler
7 |
8 | """
9 | Example using VPG with ISSampler, iterations alternate between live and
10 | importance sampled iterations.
11 | """
12 |
13 | env = normalize(CartpoleEnv())
14 |
15 | policy = GaussianMLPPolicy(
16 | env_spec=env.spec,
17 | # The neural network policy should have two hidden layers, each with 32 hidden units.
18 | hidden_sizes=(32, 32)
19 | )
20 |
21 | baseline = LinearFeatureBaseline(env_spec=env.spec)
22 |
23 | algo = VPG(
24 | env=env,
25 | policy=policy,
26 | baseline=baseline,
27 | batch_size=4000,
28 | max_path_length=100,
29 | n_itr=40,
30 | discount=0.99,
31 | step_size=0.01,
32 | sampler_cls=ISSampler,
33 | sampler_args=dict(n_backtrack=1),
34 | )
35 | algo.train()
36 |
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/examples/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | env = normalize(GymEnv("Acrobot-v1"))
11 |
12 | policy = CategoricalMLPPolicy(
13 | env_spec=env.spec,
14 | hidden_sizes=(32, 32)
15 | )
16 |
17 | baseline = LinearFeatureBaseline(env_spec=env.spec)
18 |
19 | algo = TRPO(
20 | env=env,
21 | policy=policy,
22 | baseline=baseline,
23 | batch_size=4000,
24 | max_path_length=env.horizon,
25 | n_itr=50,
26 | discount=0.99,
27 | step_size=0.01,
28 | plot=True,
29 | )
30 | algo.train()
31 |
32 |
33 | run_experiment_lite(
34 | run_task,
35 | n_parallel=1,
36 | snapshot_mode="last",
37 | plot=True,
38 | )
39 |
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | env = normalize(GymEnv("CartPole-v0"))
11 |
12 | policy = CategoricalMLPPolicy(
13 | env_spec=env.spec,
14 | hidden_sizes=(32, 32)
15 | )
16 |
17 | baseline = LinearFeatureBaseline(env_spec=env.spec)
18 |
19 | algo = TRPO(
20 | env=env,
21 | policy=policy,
22 | baseline=baseline,
23 | batch_size=4000,
24 | max_path_length=env.horizon,
25 | n_itr=50,
26 | discount=0.99,
27 | step_size=0.01,
28 | plot=True,
29 | )
30 | algo.train()
31 |
32 |
33 | run_experiment_lite(
34 | run_task,
35 | n_parallel=1,
36 | snapshot_mode="last",
37 | plot=True,
38 | )
39 |
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | env = normalize(GymEnv("CartPole-v1"))
11 |
12 | policy = CategoricalMLPPolicy(
13 | env_spec=env.spec,
14 | hidden_sizes=(32, 32)
15 | )
16 |
17 | baseline = LinearFeatureBaseline(env_spec=env.spec)
18 |
19 | algo = TRPO(
20 | env=env,
21 | policy=policy,
22 | baseline=baseline,
23 | batch_size=4000,
24 | max_path_length=env.horizon,
25 | n_itr=50,
26 | discount=0.99,
27 | step_size=0.01,
28 | plot=True,
29 | )
30 | algo.train()
31 |
32 |
33 | run_experiment_lite(
34 | run_task,
35 | n_parallel=1,
36 | snapshot_mode="last",
37 | plot=True,
38 | )
39 |
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py:
--------------------------------------------------------------------------------
1 | # This doesn't work. After 150 iterations still didn't learn anything.
2 |
3 | from rllab.algos.trpo import TRPO
4 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
5 | from rllab.envs.gym_env import GymEnv
6 | from rllab.envs.normalized_env import normalize
7 | from rllab.misc.instrument import run_experiment_lite
8 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
9 |
10 |
11 | def run_task(*_):
12 | env = normalize(GymEnv("MountainCar-v0"))
13 |
14 | policy = CategoricalMLPPolicy(
15 | env_spec=env.spec,
16 | hidden_sizes=(32, 32)
17 | )
18 |
19 | baseline = LinearFeatureBaseline(env_spec=env.spec)
20 |
21 | algo = TRPO(
22 | env=env,
23 | policy=policy,
24 | baseline=baseline,
25 | batch_size=4000,
26 | max_path_length=env.horizon,
27 | n_itr=150,
28 | discount=0.99,
29 | step_size=0.1,
30 | plot=True,
31 | )
32 | algo.train()
33 |
34 |
35 | run_experiment_lite(
36 | run_task,
37 | n_parallel=1,
38 | snapshot_mode="last",
39 | plot=True,
40 | )
41 |
--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | env = normalize(GymEnv("Pendulum-v0"))
11 |
12 | policy = GaussianMLPPolicy(
13 | env_spec=env.spec,
14 | hidden_sizes=(32, 32)
15 | )
16 |
17 | baseline = LinearFeatureBaseline(env_spec=env.spec)
18 |
19 | algo = TRPO(
20 | env=env,
21 | policy=policy,
22 | baseline=baseline,
23 | batch_size=4000,
24 | max_path_length=env.horizon,
25 | n_itr=50,
26 | discount=0.99,
27 | step_size=0.01,
28 | plot=True,
29 | )
30 | algo.train()
31 |
32 |
33 | run_experiment_lite(
34 | run_task,
35 | n_parallel=1,
36 | snapshot_mode="last",
37 | plot=True,
38 | )
39 |
--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/example/__init__.py
--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/main.py:
--------------------------------------------------------------------------------
1 | '''
2 | Main module to launch an example hyperopt search on EC2.
3 |
4 | Launch this from outside the rllab main dir. Otherwise, rllab will try to ship the logfiles being written by this process,
5 | which will fail because tar doesn't want to tar files that are being written to. Alternatively, disable the packaging of
6 | log files by rllab, but I couldn't quickly find how to do this.
7 |
8 | You can use Jupyter notebook visualize_hyperopt_results.ipynb to inspect results.
9 | '''
10 | from hyperopt import hp
11 |
12 | from contrib.rllab_hyperopt.core import launch_hyperopt_search
13 | # the functions to run the task and process result do not need to be in separate files. They do need to be separate from
14 | # the main file though. Also, anything you import in the module that contains run_task needs to be on the Rllab AMI.
15 | # Therefore, since I use pandas to process results, I have put them in separate files here.
16 | from contrib.rllab_hyperopt.example.score import process_result
17 | from contrib.rllab_hyperopt.example.task import run_task
18 |
19 | # define a search space. See https://github.com/hyperopt/hyperopt/wiki/FMin, sect 2 for more detail
20 | param_space = {'step_size': hp.uniform('step_size', 0.01, 0.1),
21 | 'seed': hp.choice('seed',[0, 1, 2])}
22 |
23 | # just by way of example, pass a different config to run_experiment_lite
24 | run_experiment_kwargs = dict(
25 | n_parallel=16,
26 | aws_config=dict(instance_type="c4.4xlarge",spot_price='0.7')
27 | )
28 |
29 | launch_hyperopt_search(
30 | run_task, # the task to run
31 | process_result, # the function that will process results and return a score
32 | param_space, # param search space
33 | hyperopt_experiment_key='test12', # key for hyperopt DB, and also exp_prefix for run_experiment_lite
34 | n_hyperopt_workers=3, # nr of local workers AND nr of EC2 instances that will be started in parallel
35 | hyperopt_max_evals=5, # nr of parameter values to eval
36 | result_timeout=600, # wait this long for results from S3 before timing out
37 | run_experiment_kwargs=run_experiment_kwargs) # additional kwargs to pass to run_experiment_lite
--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/score.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 | from rllab import config
5 |
6 | def process_result(exp_prefix, exp_name):
7 | # Open the default rllab path for storing results
8 | result_path = os.path.join(config.LOG_DIR, "s3", exp_prefix, exp_name, 'progress.csv')
9 | print("Processing result from",result_path)
10 |
11 | # This example uses pandas to easily read in results and create a simple smoothed learning curve
12 | df = pd.read_csv(result_path)
13 | curve = df['AverageReturn'].rolling(window=max(1,int(0.05*df.shape[0])), min_periods=1, center=True).mean().values.flatten()
14 | max_ix = curve.argmax()
15 | max_score = curve.max()
16 |
17 | # The result dict can contain arbitrary values, but ALWAYS needs to have a "loss" entry.
18 | return dict(
19 | max_score=max_score,
20 | max_iter=max_ix,
21 | scores=curve, # returning the curve allows you to plot best, worst etc curve later
22 | loss=-max_score
23 | )
--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/task.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 |
7 | def run_task(v):
8 | env = normalize(CartpoleEnv())
9 |
10 | policy = GaussianMLPPolicy(
11 | env_spec=env.spec,
12 | # The neural network policy should have two hidden layers, each with 32 hidden units.
13 | hidden_sizes=(32, 32)
14 | )
15 |
16 | baseline = LinearFeatureBaseline(env_spec=env.spec)
17 |
18 | algo = TRPO(
19 | env=env,
20 | policy=policy,
21 | baseline=baseline,
22 | batch_size=4000,
23 | max_path_length=100,
24 | n_itr=40,
25 | discount=0.99,
26 | step_size=v["step_size"],
27 | # Uncomment both lines (this and the plot parameter below) to enable plotting
28 | # plot=True,
29 | )
30 | algo.train()
--------------------------------------------------------------------------------
/rllab/docker/tester_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neocxi/rllab_exp_gpu_tf:py3
2 |
3 | RUN bash -c 'source activate rllab3 && conda install -y nomkl && conda uninstall -y scipy && conda install -y scipy'
4 |
5 | ADD . /root/code/rllab
6 | WORKDIR /root/code/rllab
7 |
--------------------------------------------------------------------------------
/rllab/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. rllab documentation master file, created by
2 | sphinx-quickstart on Mon Feb 15 20:07:12 2016.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to rllab
7 | ================
8 |
9 | rllab is a framework for developing and evaluating reinforcement learning algorithms.
10 |
11 | rllab is a work in progress, input is welcome. The available documentation is limited for now.
12 |
13 | User Guide
14 | ==========
15 |
16 | The rllab user guide explains how to install rllab, how to run experiments, and how to implement new MDPs and new algorithms.
17 |
18 | .. toctree::
19 | :maxdepth: 2
20 |
21 | user/installation
22 | user/experiments
23 | user/gym_integration
24 | user/implement_env
25 | user/implement_algo_basic
26 | user/implement_algo_advanced
27 | user/cluster
28 |
29 |
30 | Citing rllab
31 | ============
32 |
33 | If you use rllab for academic research, you are highly encouraged to cite the following paper:
34 |
35 | - Yan Duan, Xi Chen, Rein Houthooft, John Schulman, Pieter Abbeel. "`Benchmarking Deep Reinforcement Learning for Continuous Control `_. *Proceedings of the 33rd International Conference on Machine Learning (ICML), 2016.*
36 |
37 |
38 | Indices and tables
39 | ==================
40 |
41 | * :ref:`genindex`
42 | * :ref:`modindex`
43 | * :ref:`search`
44 |
45 |
--------------------------------------------------------------------------------
/rllab/docs/user/cluster_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_1.png
--------------------------------------------------------------------------------
/rllab/docs/user/cluster_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_2.png
--------------------------------------------------------------------------------
/rllab/docs/user/cluster_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_3.png
--------------------------------------------------------------------------------
/rllab/docs/user/installation.rst:
--------------------------------------------------------------------------------
1 | .. _installation:
2 |
3 |
4 | ============
5 | Installation
6 | ============
7 |
8 | Preparation
9 | ===========
10 |
11 | You need to edit your :code:`PYTHONPATH` to include the rllab directory:
12 |
13 | .. code-block:: bash
14 |
15 | export PYTHONPATH=path_to_rllab:$PYTHONPATH
16 |
17 | Express Install
18 | ===============
19 |
20 | The fastest way to set up dependencies for rllab is via running the setup script.
21 |
22 | - On Linux, run the following:
23 |
24 | .. code-block:: bash
25 |
26 | ./scripts/setup_linux.sh
27 |
28 | - On Mac OS X, run the following:
29 |
30 | .. code-block:: bash
31 |
32 | ./scripts/setup_osx.sh
33 |
34 | The script sets up a conda environment, which is similar to :code:`virtualenv`. To start using it, run the following:
35 |
36 | .. code-block:: bash
37 |
38 | source activate rllab3
39 |
40 |
41 | Optionally, if you would like to run experiments that depends on the Mujoco environment, you can set it up by running the following command:
42 |
43 | .. code-block:: bash
44 |
45 | ./scripts/setup_mujoco.sh
46 |
47 | and follow the instructions. You need to have the zip file for Mujoco v1.31 and the license file ready.
48 |
49 |
50 |
51 | Manual Install
52 | ==============
53 |
54 | Anaconda
55 | ------------
56 |
57 | :code:`rllab` assumes that you are using Anaconda Python distribution. You can download it from `https://www.continuum.io/downloads`. Make sure to download the installer for Python 2.7.
58 |
59 |
60 | System dependencies for pygame
61 | ------------------------------
62 |
63 | A few environments in rllab are implemented using Box2D, which uses pygame for visualization.
64 | It requires a few system dependencies to be installed first.
65 |
66 | On Linux, run the following:
67 |
68 | .. code-block:: bash
69 |
70 | sudo apt-get install swig
71 | sudo apt-get build-dep python-pygame
72 |
73 | On Mac OS X, run the following:
74 |
75 | .. code-block:: bash
76 |
77 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi
78 |
79 | System dependencies for scipy
80 | -----------------------------
81 |
82 | This step is only needed under Linux:
83 |
84 | .. code-block:: bash
85 |
86 | sudo apt-get install build-dep python-scipy
87 |
88 | Install Python modules
89 | ----------------------
90 |
91 | .. code-block:: bash
92 |
93 | conda env create -f environment.yml
94 |
--------------------------------------------------------------------------------
/rllab/environment.yml:
--------------------------------------------------------------------------------
1 | name: rllab3
2 | channels:
3 | - https://conda.anaconda.org/kne
4 | - https://conda.anaconda.org/tlatorre
5 | - https://conda.anaconda.org/cjs14
6 | - https://conda.anaconda.org/menpo
7 | - jjhelmus
8 | - soumith
9 | dependencies:
10 | - python==3.5.2
11 | - numpy==1.12.0
12 | - scipy
13 | - path.py
14 | - python-dateutil
15 | - joblib==0.10.3
16 | - mako
17 | - ipywidgets
18 | - numba
19 | - flask
20 | - pybox2d
21 | - pygame
22 | - h5py
23 | - matplotlib
24 | - opencv3=3.1.0
25 | - scikit-learn
26 | - pytorch==0.1.9
27 | - torchvision==0.1.6
28 | - mpi4py
29 | - pandas
30 | - pip:
31 | - Pillow
32 | - atari-py
33 | - pyprind
34 | - ipdb
35 | - boto3
36 | - PyOpenGL
37 | - nose2
38 | - pyzmq
39 | - tqdm
40 | - msgpack-python
41 | - git+https://github.com/inksci/mujoco-py-v0.5.7.git
42 | # - mujoco-py==1.50.1.68
43 | - cached_property
44 | - line_profiler
45 | - cloudpickle
46 | - Cython
47 | - redis
48 | - keras==1.2.1
49 | - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano
50 | - git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne
51 | - git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly
52 | - awscli
53 | - git+https://github.com/openai/gym.git@v0.7.4#egg=gym
54 | - pyglet
55 | - git+https://github.com/neocxi/prettytensor.git
56 | - jupyter
57 | - progressbar2
58 | - chainer==1.18.0
59 | - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp35-cp35m-linux_x86_64.whl; 'linux' in sys_platform
60 | - https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow_gpu-1.7.0-py3-none-any.whl; sys_platform == 'darwin'
61 | - numpy-stl==2.2.0
62 | - nibabel==2.1.0
63 | - pylru==1.0.9
64 | - hyperopt
65 | - polling
66 |
--------------------------------------------------------------------------------
/rllab/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/examples/__init__.py
--------------------------------------------------------------------------------
/rllab/examples/cluster_demo.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import stub, run_experiment_lite
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 | import sys
8 |
9 |
10 | def run_task(v):
11 | env = normalize(CartpoleEnv())
12 |
13 | policy = GaussianMLPPolicy(
14 | env_spec=env.spec,
15 | # The neural network policy should have two hidden layers, each with 32 hidden units.
16 | hidden_sizes=(32, 32)
17 | )
18 |
19 | baseline = LinearFeatureBaseline(env_spec=env.spec)
20 |
21 | algo = TRPO(
22 | env=env,
23 | policy=policy,
24 | baseline=baseline,
25 | batch_size=4000,
26 | max_path_length=100,
27 | n_itr=40,
28 | discount=0.99,
29 | step_size=v["step_size"],
30 | # Uncomment both lines (this and the plot parameter below) to enable plotting
31 | # plot=True,
32 | )
33 | algo.train()
34 |
35 |
36 | for step_size in [0.01, 0.05, 0.1]:
37 | for seed in [1, 11, 21, 31, 41]:
38 | run_experiment_lite(
39 | run_task,
40 | exp_prefix="first_exp",
41 | # Number of parallel workers for sampling
42 | n_parallel=1,
43 | # Only keep the snapshot parameters for the last iteration
44 | snapshot_mode="last",
45 | # Specifies the seed for the experiment. If this is not provided, a random seed
46 | # will be used
47 | seed=seed,
48 | # mode="local",
49 | mode="ec2",
50 | variant=dict(step_size=step_size, seed=seed)
51 | # plot=True,
52 | # terminate_machine=False,
53 | )
54 | sys.exit()
55 |
--------------------------------------------------------------------------------
/rllab/examples/cluster_gym_mujoco_demo.py:
--------------------------------------------------------------------------------
1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
2 | from rllab.envs.normalized_env import normalize
3 | from sandbox.rocky.tf.envs.base import TfEnv
4 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
5 | from sandbox.rocky.tf.algos.trpo import TRPO
6 | from rllab.misc.instrument import run_experiment_lite
7 | from rllab.envs.gym_env import GymEnv
8 | import sys
9 |
10 | from rllab.misc.instrument import VariantGenerator, variant
11 |
12 |
13 | class VG(VariantGenerator):
14 |
15 | @variant
16 | def step_size(self):
17 | return [0.01, 0.05, 0.1]
18 |
19 | @variant
20 | def seed(self):
21 | return [1, 11, 21, 31, 41]
22 |
23 |
24 | def run_task(vv):
25 |
26 | env = TfEnv(normalize(GymEnv('HalfCheetah-v1', record_video=False, record_log=False)))
27 |
28 | policy = GaussianMLPPolicy(
29 | env_spec=env.spec,
30 | # The neural network policy should have two hidden layers, each with 32 hidden units.
31 | hidden_sizes=(32, 32),
32 | name="policy"
33 | )
34 |
35 | baseline = LinearFeatureBaseline(env_spec=env.spec)
36 |
37 | algo = TRPO(
38 | env=env,
39 | policy=policy,
40 | baseline=baseline,
41 | batch_size=4000,
42 | max_path_length=100,
43 | n_itr=40,
44 | discount=0.99,
45 | step_size=vv["step_size"],
46 | # Uncomment both lines (this and the plot parameter below) to enable plotting
47 | # plot=True,
48 | )
49 | algo.train()
50 |
51 |
52 | variants = VG().variants()
53 |
54 | for v in variants:
55 |
56 | run_experiment_lite(
57 | run_task,
58 | exp_prefix="first_exp",
59 | # Number of parallel workers for sampling
60 | n_parallel=1,
61 | # Only keep the snapshot parameters for the last iteration
62 | snapshot_mode="last",
63 | # Specifies the seed for the experiment. If this is not provided, a random seed
64 | # will be used
65 | seed=v["seed"],
66 | # mode="local",
67 | mode="ec2",
68 | variant=v,
69 | # plot=True,
70 | # terminate_machine=False,
71 | )
72 | sys.exit()
73 |
--------------------------------------------------------------------------------
/rllab/examples/ddpg_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.ddpg import DDPG
2 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
3 | from rllab.envs.normalized_env import normalize
4 | from rllab.misc.instrument import run_experiment_lite
5 | from rllab.exploration_strategies.ou_strategy import OUStrategy
6 | from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
7 | from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
8 |
9 |
10 | def run_task(*_):
11 | env = normalize(CartpoleEnv())
12 |
13 | policy = DeterministicMLPPolicy(
14 | env_spec=env.spec,
15 | # The neural network policy should have two hidden layers, each with 32 hidden units.
16 | hidden_sizes=(32, 32)
17 | )
18 |
19 | es = OUStrategy(env_spec=env.spec)
20 |
21 | qf = ContinuousMLPQFunction(env_spec=env.spec)
22 |
23 | algo = DDPG(
24 | env=env,
25 | policy=policy,
26 | es=es,
27 | qf=qf,
28 | batch_size=32,
29 | max_path_length=100,
30 | epoch_length=1000,
31 | min_pool_size=10000,
32 | n_epochs=1000,
33 | discount=0.99,
34 | scale_reward=0.01,
35 | qf_learning_rate=1e-3,
36 | policy_learning_rate=1e-4,
37 | # Uncomment both lines (this and the plot parameter below) to enable plotting
38 | # plot=True,
39 | )
40 | algo.train()
41 |
42 | run_experiment_lite(
43 | run_task,
44 | # Number of parallel workers for sampling
45 | n_parallel=1,
46 | # Only keep the snapshot parameters for the last iteration
47 | snapshot_mode="last",
48 | # Specifies the seed for the experiment. If this is not provided, a random seed
49 | # will be used
50 | seed=1,
51 | # plot=True,
52 | )
53 |
--------------------------------------------------------------------------------
/rllab/examples/nop_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.nop import NOP
2 | from rllab.baselines.zero_baseline import ZeroBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.uniform_control_policy import UniformControlPolicy
6 |
7 | env = normalize(CartpoleEnv())
8 |
9 | policy = UniformControlPolicy(
10 | env_spec=env.spec,
11 | # The neural network policy should have two hidden layers, each with 32 hidden units.
12 | )
13 |
14 | baseline = ZeroBaseline(env_spec=env.spec)
15 |
16 | algo = NOP(
17 | env=env,
18 | policy=policy,
19 | baseline=baseline,
20 | batch_size=4000,
21 | max_path_length=100,
22 | n_itr=40,
23 | discount=0.99,
24 | step_size=0.01,
25 | )
26 | algo.train()
27 |
--------------------------------------------------------------------------------
/rllab/examples/point_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.base import Env
2 | from rllab.spaces import Box
3 | from rllab.envs.base import Step
4 | import numpy as np
5 |
6 |
7 | class PointEnv(Env):
8 | @property
9 | def observation_space(self):
10 | return Box(low=-np.inf, high=np.inf, shape=(2,))
11 |
12 | @property
13 | def action_space(self):
14 | return Box(low=-0.1, high=0.1, shape=(2,))
15 |
16 | def reset(self):
17 | self._state = np.random.uniform(-1, 1, size=(2,))
18 | observation = np.copy(self._state)
19 | return observation
20 |
21 | def step(self, action):
22 | self._state = self._state + action
23 | x, y = self._state
24 | reward = - (x ** 2 + y ** 2) ** 0.5
25 | done = abs(x) < 0.01 and abs(y) < 0.01
26 | next_observation = np.copy(self._state)
27 | return Step(observation=next_observation, reward=reward, done=done)
28 |
29 | def render(self):
30 | print('current state:', self._state)
31 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 |
7 | env = normalize(CartpoleEnv())
8 |
9 | policy = GaussianMLPPolicy(
10 | env_spec=env.spec,
11 | # The neural network policy should have two hidden layers, each with 32 hidden units.
12 | hidden_sizes=(32, 32)
13 | )
14 |
15 | baseline = LinearFeatureBaseline(env_spec=env.spec)
16 |
17 | algo = TRPO(
18 | env=env,
19 | policy=policy,
20 | baseline=baseline,
21 | batch_size=4000,
22 | max_path_length=100,
23 | n_itr=40,
24 | discount=0.99,
25 | step_size=0.01,
26 | )
27 | algo.train()
28 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole_pickled.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | env = normalize(CartpoleEnv())
11 |
12 | policy = GaussianMLPPolicy(
13 | env_spec=env.spec,
14 | # The neural network policy should have two hidden layers, each with 32 hidden units.
15 | hidden_sizes=(32, 32)
16 | )
17 |
18 | baseline = LinearFeatureBaseline(env_spec=env.spec)
19 |
20 | algo = TRPO(
21 | env=env,
22 | policy=policy,
23 | baseline=baseline,
24 | batch_size=4000,
25 | max_path_length=100,
26 | n_itr=1000,
27 | discount=0.99,
28 | step_size=0.01,
29 | # Uncomment both lines (this and the plot parameter below) to enable plotting
30 | #plot=True
31 | )
32 | algo.train()
33 |
34 |
35 | run_experiment_lite(
36 | run_task,
37 | # Number of parallel workers for sampling
38 | n_parallel=2,
39 | # Only keep the snapshot parameters for the last iteration
40 | snapshot_mode="last",
41 | # Specifies the seed for the experiment. If this is not provided, a random seed
42 | # will be used
43 | seed=1,
44 | #plot=True
45 | )
46 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole_recurrent.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_gru_policy import GaussianGRUPolicy
6 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp
7 | from rllab.misc.instrument import run_experiment_lite
8 |
9 |
10 | def run_task(*_):
11 | env = normalize(CartpoleEnv())
12 |
13 | policy = GaussianGRUPolicy(
14 | env_spec=env.spec,
15 | )
16 |
17 | baseline = LinearFeatureBaseline(env_spec=env.spec)
18 |
19 | algo = TRPO(
20 | env=env,
21 | policy=policy,
22 | baseline=baseline,
23 | batch_size=4000,
24 | max_path_length=100,
25 | n_itr=10,
26 | discount=0.99,
27 | step_size=0.01,
28 | optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
29 | )
30 | algo.train()
31 |
32 |
33 | run_experiment_lite(
34 | run_task,
35 | n_parallel=1,
36 | seed=1,
37 | )
38 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | # Please note that different environments with different action spaces may
11 | # require different policies. For example with a Discrete action space, a
12 | # CategoricalMLPPolicy works, but for a Box action space may need to use
13 | # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
14 | env = normalize(GymEnv("CartPole-v0"))
15 |
16 | policy = CategoricalMLPPolicy(
17 | env_spec=env.spec,
18 | # The neural network policy should have two hidden layers, each with 32 hidden units.
19 | hidden_sizes=(32, 32)
20 | )
21 |
22 | baseline = LinearFeatureBaseline(env_spec=env.spec)
23 |
24 | algo = TRPO(
25 | env=env,
26 | policy=policy,
27 | baseline=baseline,
28 | batch_size=4000,
29 | max_path_length=env.horizon,
30 | n_itr=50,
31 | discount=0.99,
32 | step_size=0.01,
33 | # Uncomment both lines (this and the plot parameter below) to enable plotting
34 | # plot=True,
35 | )
36 | algo.train()
37 |
38 |
39 | run_experiment_lite(
40 | run_task,
41 | # Number of parallel workers for sampling
42 | n_parallel=1,
43 | # Only keep the snapshot parameters for the last iteration
44 | snapshot_mode="last",
45 | # Specifies the seed for the experiment. If this is not provided, a random seed
46 | # will be used
47 | seed=1,
48 | # plot=True,
49 | )
50 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_pendulum.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.gym_env import GymEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.misc.instrument import run_experiment_lite
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 |
8 |
9 | def run_task(*_):
10 | # Please note that different environments with different action spaces may require different
11 | # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
12 | # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
13 | env = normalize(GymEnv("Pendulum-v0"))
14 |
15 | policy = GaussianMLPPolicy(
16 | env_spec=env.spec,
17 | # The neural network policy should have two hidden layers, each with 32 hidden units.
18 | hidden_sizes=(32, 32)
19 | )
20 |
21 | baseline = LinearFeatureBaseline(env_spec=env.spec)
22 |
23 | algo = TRPO(
24 | env=env,
25 | policy=policy,
26 | baseline=baseline,
27 | batch_size=4000,
28 | max_path_length=env.horizon,
29 | n_itr=50,
30 | discount=0.99,
31 | step_size=0.01,
32 | # Uncomment both lines (this and the plot parameter below) to enable plotting
33 | # plot=True,
34 | )
35 | algo.train()
36 |
37 |
38 | run_experiment_lite(
39 | run_task,
40 | # Number of parallel workers for sampling
41 | n_parallel=1,
42 | # Only keep the snapshot parameters for the last iteration
43 | snapshot_mode="last",
44 | # Specifies the seed for the experiment. If this is not provided, a random seed
45 | # will be used
46 | seed=1,
47 | # plot=True,
48 | )
49 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_tf_cartpole.py:
--------------------------------------------------------------------------------
1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
2 | from rllab.envs.gym_env import GymEnv
3 | from rllab.envs.normalized_env import normalize
4 | from rllab.misc.instrument import stub, run_experiment_lite
5 |
6 | from sandbox.rocky.tf.envs.base import TfEnv
7 | from sandbox.rocky.tf.policies.categorical_mlp_policy import CategoricalMLPPolicy
8 | from sandbox.rocky.tf.algos.trpo import TRPO
9 |
10 | stub(globals())
11 |
12 | # Need to wrap in a tf environment and force_reset to true
13 | # see https://github.com/openai/rllab/issues/87#issuecomment-282519288
14 | env = TfEnv(normalize(GymEnv("CartPole-v0", force_reset=True)))
15 |
16 | policy = CategoricalMLPPolicy(
17 | name="policy",
18 | env_spec=env.spec,
19 | # The neural network policy should have two hidden layers, each with 32 hidden units.
20 | hidden_sizes=(32, 32)
21 | )
22 |
23 | baseline = LinearFeatureBaseline(env_spec=env.spec)
24 |
25 | algo = TRPO(
26 | env=env,
27 | policy=policy,
28 | baseline=baseline,
29 | batch_size=4000,
30 | max_path_length=200,
31 | n_itr=120,
32 | discount=0.99,
33 | step_size=0.01,
34 | # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
35 | )
36 |
37 | run_experiment_lite(
38 | algo.train(),
39 | n_parallel=1,
40 | snapshot_mode="last",
41 | seed=1
42 | )
43 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_point.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from examples.point_env import PointEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 |
7 | env = normalize(PointEnv())
8 | policy = GaussianMLPPolicy(
9 | env_spec=env.spec,
10 | )
11 | baseline = LinearFeatureBaseline(env_spec=env.spec)
12 | algo = TRPO(
13 | env=env,
14 | policy=policy,
15 | baseline=baseline,
16 | )
17 | algo.train()
18 |
--------------------------------------------------------------------------------
/rllab/examples/trpo_swimmer.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
4 | from rllab.envs.normalized_env import normalize
5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 |
7 | env = normalize(SwimmerEnv())
8 |
9 | policy = GaussianMLPPolicy(
10 | env_spec=env.spec,
11 | # The neural network policy should have two hidden layers, each with 32 hidden units.
12 | hidden_sizes=(32, 32)
13 | )
14 |
15 | baseline = LinearFeatureBaseline(env_spec=env.spec)
16 |
17 | algo = TRPO(
18 | env=env,
19 | policy=policy,
20 | baseline=baseline,
21 | batch_size=4000,
22 | max_path_length=500,
23 | n_itr=40,
24 | discount=0.99,
25 | step_size=0.01,
26 | )
27 | algo.train()
28 |
--------------------------------------------------------------------------------
/rllab/rllab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/algos/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/algos/base.py:
--------------------------------------------------------------------------------
1 | class Algorithm(object):
2 | pass
3 |
4 |
5 | class RLAlgorithm(Algorithm):
6 |
7 | def train(self):
8 | raise NotImplementedError
9 |
--------------------------------------------------------------------------------
/rllab/rllab/algos/erwr.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.vpg import VPG
2 | from rllab.optimizers.lbfgs_optimizer import LbfgsOptimizer
3 | from rllab.core.serializable import Serializable
4 |
5 |
6 | class ERWR(VPG, Serializable):
7 | """
8 | Episodic Reward Weighted Regression [1]_
9 |
10 | Notes
11 | -----
12 | This does not implement the original RwR [2]_ that deals with "immediate reward problems" since
13 | it doesn't find solutions that optimize for temporally delayed rewards.
14 |
15 | .. [1] Kober, Jens, and Jan R. Peters. "Policy search for motor primitives in robotics." Advances in neural information processing systems. 2009.
16 | .. [2] Peters, Jan, and Stefan Schaal. "Using reward-weighted regression for reinforcement learning of task space control." Approximate Dynamic Programming and Reinforcement Learning, 2007. ADPRL 2007. IEEE International Symposium on. IEEE, 2007.
17 | """
18 |
19 | def __init__(
20 | self,
21 | optimizer=None,
22 | optimizer_args=None,
23 | positive_adv=None,
24 | **kwargs):
25 | Serializable.quick_init(self, locals())
26 | if optimizer is None:
27 | if optimizer_args is None:
28 | optimizer_args = dict()
29 | optimizer = LbfgsOptimizer(**optimizer_args)
30 | super(ERWR, self).__init__(
31 | optimizer=optimizer,
32 | positive_adv=True if positive_adv is None else positive_adv,
33 | **kwargs
34 | )
35 |
36 |
--------------------------------------------------------------------------------
/rllab/rllab/algos/nop.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.batch_polopt import BatchPolopt
2 | from rllab.misc.overrides import overrides
3 |
4 |
5 | class NOP(BatchPolopt):
6 | """
7 | NOP (no optimization performed) policy search algorithm
8 | """
9 |
10 | def __init__(
11 | self,
12 | **kwargs):
13 | super(NOP, self).__init__(**kwargs)
14 |
15 | @overrides
16 | def init_opt(self):
17 | pass
18 |
19 | @overrides
20 | def optimize_policy(self, itr, samples_data):
21 | pass
22 |
23 | @overrides
24 | def get_itr_snapshot(self, itr, samples_data):
25 | return dict()
26 |
--------------------------------------------------------------------------------
/rllab/rllab/algos/ppo.py:
--------------------------------------------------------------------------------
1 | from rllab.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer
2 | from rllab.algos.npo import NPO
3 | from rllab.core.serializable import Serializable
4 |
5 |
6 | class PPO(NPO, Serializable):
7 | """
8 | Penalized Policy Optimization.
9 | """
10 |
11 | def __init__(
12 | self,
13 | optimizer=None,
14 | optimizer_args=None,
15 | **kwargs):
16 | Serializable.quick_init(self, locals())
17 | if optimizer is None:
18 | if optimizer_args is None:
19 | optimizer_args = dict()
20 | optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
21 | super(PPO, self).__init__(optimizer=optimizer, **kwargs)
22 |
--------------------------------------------------------------------------------
/rllab/rllab/algos/tnpg.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.npo import NPO
2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
3 | from rllab.misc import ext
4 |
5 |
6 | class TNPG(NPO):
7 | """
8 | Truncated Natural Policy Gradient.
9 | """
10 |
11 | def __init__(
12 | self,
13 | optimizer=None,
14 | optimizer_args=None,
15 | **kwargs):
16 | if optimizer is None:
17 | default_args = dict(max_backtracks=1)
18 | if optimizer_args is None:
19 | optimizer_args = default_args
20 | else:
21 | optimizer_args = dict(default_args, **optimizer_args)
22 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
23 | super(TNPG, self).__init__(optimizer=optimizer, **kwargs)
24 |
--------------------------------------------------------------------------------
/rllab/rllab/algos/trpo.py:
--------------------------------------------------------------------------------
1 | from rllab.algos.npo import NPO
2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
3 | from rllab.core.serializable import Serializable
4 |
5 |
6 | class TRPO(NPO):
7 | """
8 | Trust Region Policy Optimization
9 | """
10 |
11 | def __init__(
12 | self,
13 | optimizer=None,
14 | optimizer_args=None,
15 | **kwargs):
16 | if optimizer is None:
17 | if optimizer_args is None:
18 | optimizer_args = dict()
19 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
20 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
21 |
--------------------------------------------------------------------------------
/rllab/rllab/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/baselines/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/baselines/base.py:
--------------------------------------------------------------------------------
1 | from rllab.misc import autoargs
2 |
3 |
4 | class Baseline(object):
5 |
6 | def __init__(self, env_spec):
7 | self._mdp_spec = env_spec
8 |
9 | @property
10 | def algorithm_parallelized(self):
11 | return False
12 |
13 | def get_param_values(self):
14 | raise NotImplementedError
15 |
16 | def set_param_values(self, val):
17 | raise NotImplementedError
18 |
19 | def fit(self, paths):
20 | raise NotImplementedError
21 |
22 | def predict(self, path):
23 | raise NotImplementedError
24 |
25 | @classmethod
26 | @autoargs.add_args
27 | def add_args(cls, parser):
28 | pass
29 |
30 | @classmethod
31 | @autoargs.new_from_args
32 | def new_from_args(cls, args, mdp):
33 | pass
34 |
35 | def log_diagnostics(self, paths):
36 | """
37 | Log extra information per iteration based on the collected paths
38 | """
39 | pass
40 |
--------------------------------------------------------------------------------
/rllab/rllab/baselines/gaussian_conv_baseline.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.misc.overrides import overrides
5 | from rllab.core.parameterized import Parameterized
6 | from rllab.baselines.base import Baseline
7 | from rllab.regressors.gaussian_conv_regressor import GaussianConvRegressor
8 |
9 |
10 | class GaussianConvBaseline(Baseline, Parameterized):
11 |
12 | def __init__(
13 | self,
14 | env_spec,
15 | subsample_factor=1.,
16 | regressor_args=None,
17 | ):
18 | Serializable.quick_init(self, locals())
19 | super(GaussianConvBaseline, self).__init__(env_spec)
20 | if regressor_args is None:
21 | regressor_args = dict()
22 |
23 | self._regressor = GaussianConvRegressor(
24 | input_shape=env_spec.observation_space.shape,
25 | output_dim=1,
26 | name="vf",
27 | **regressor_args
28 | )
29 |
30 | @overrides
31 | def fit(self, paths):
32 | observations = np.concatenate([p["observations"] for p in paths])
33 | returns = np.concatenate([p["returns"] for p in paths])
34 | self._regressor.fit(observations, returns.reshape((-1, 1)))
35 |
36 | @overrides
37 | def predict(self, path):
38 | return self._regressor.predict(path["observations"]).flatten()
39 |
40 | @overrides
41 | def get_param_values(self, **tags):
42 | return self._regressor.get_param_values(**tags)
43 |
44 | @overrides
45 | def set_param_values(self, flattened_params, **tags):
46 | self._regressor.set_param_values(flattened_params, **tags)
47 |
--------------------------------------------------------------------------------
/rllab/rllab/baselines/gaussian_mlp_baseline.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.core.parameterized import Parameterized
5 | from rllab.baselines.base import Baseline
6 | from rllab.misc.overrides import overrides
7 | from rllab.regressors.gaussian_mlp_regressor import GaussianMLPRegressor
8 |
9 |
10 | class GaussianMLPBaseline(Baseline, Parameterized):
11 |
12 | def __init__(
13 | self,
14 | env_spec,
15 | subsample_factor=1.,
16 | num_seq_inputs=1,
17 | regressor_args=None,
18 | ):
19 | Serializable.quick_init(self, locals())
20 | super(GaussianMLPBaseline, self).__init__(env_spec)
21 | if regressor_args is None:
22 | regressor_args = dict()
23 |
24 | self._regressor = GaussianMLPRegressor(
25 | input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
26 | output_dim=1,
27 | name="vf",
28 | **regressor_args
29 | )
30 |
31 | @overrides
32 | def fit(self, paths):
33 | observations = np.concatenate([p["observations"] for p in paths])
34 | returns = np.concatenate([p["returns"] for p in paths])
35 | self._regressor.fit(observations, returns.reshape((-1, 1)))
36 |
37 | @overrides
38 | def predict(self, path):
39 | return self._regressor.predict(path["observations"]).flatten()
40 |
41 | @overrides
42 | def get_param_values(self, **tags):
43 | return self._regressor.get_param_values(**tags)
44 |
45 | @overrides
46 | def set_param_values(self, flattened_params, **tags):
47 | self._regressor.set_param_values(flattened_params, **tags)
48 |
--------------------------------------------------------------------------------
/rllab/rllab/baselines/linear_feature_baseline.py:
--------------------------------------------------------------------------------
1 | from rllab.baselines.base import Baseline
2 | from rllab.misc.overrides import overrides
3 | import numpy as np
4 |
5 |
6 | class LinearFeatureBaseline(Baseline):
7 | def __init__(self, env_spec, reg_coeff=1e-5):
8 | self._coeffs = None
9 | self._reg_coeff = reg_coeff
10 |
11 | @overrides
12 | def get_param_values(self, **tags):
13 | return self._coeffs
14 |
15 | @overrides
16 | def set_param_values(self, val, **tags):
17 | self._coeffs = val
18 |
19 | def _features(self, path):
20 | o = np.clip(path["observations"], -10, 10)
21 | l = len(path["rewards"])
22 | al = np.arange(l).reshape(-1, 1) / 100.0
23 | return np.concatenate([o, o ** 2, al, al ** 2, al ** 3, np.ones((l, 1))], axis=1)
24 |
25 | @overrides
26 | def fit(self, paths):
27 | featmat = np.concatenate([self._features(path) for path in paths])
28 | returns = np.concatenate([path["returns"] for path in paths])
29 | reg_coeff = self._reg_coeff
30 | for _ in range(5):
31 | self._coeffs = np.linalg.lstsq(
32 | featmat.T.dot(featmat) + reg_coeff * np.identity(featmat.shape[1]),
33 | featmat.T.dot(returns)
34 | )[0]
35 | if not np.any(np.isnan(self._coeffs)):
36 | break
37 | reg_coeff *= 10
38 |
39 | @overrides
40 | def predict(self, path):
41 | if self._coeffs is None:
42 | return np.zeros(len(path["rewards"]))
43 | return self._features(path).dot(self._coeffs)
44 |
--------------------------------------------------------------------------------
/rllab/rllab/baselines/zero_baseline.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from rllab.baselines.base import Baseline
3 | from rllab.misc.overrides import overrides
4 |
5 |
6 | class ZeroBaseline(Baseline):
7 |
8 | def __init__(self, env_spec):
9 | pass
10 |
11 | @overrides
12 | def get_param_values(self, **kwargs):
13 | return None
14 |
15 | @overrides
16 | def set_param_values(self, val, **kwargs):
17 | pass
18 |
19 | @overrides
20 | def fit(self, paths):
21 | pass
22 |
23 | @overrides
24 | def predict(self, path):
25 | return np.zeros_like(path["rewards"])
26 |
--------------------------------------------------------------------------------
/rllab/rllab/config.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import os
3 |
4 | PROJECT_PATH = osp.abspath(osp.join(osp.dirname(__file__), '..'))
5 |
6 | LOG_DIR = PROJECT_PATH + "/data"
7 |
8 | USE_TF = False
9 |
10 | DOCKER_IMAGE = "DOCKER_IMAGE"
11 |
12 | DOCKERFILE_PATH = "/path/to/Dockerfile"
13 |
14 | KUBE_PREFIX = "rllab_"
15 |
16 | DOCKER_LOG_DIR = "/tmp/expt"
17 |
18 | POD_DIR = PROJECT_PATH + "/.pods"
19 |
20 | AWS_S3_PATH = None
21 |
22 | AWS_IMAGE_ID = None
23 |
24 | AWS_INSTANCE_TYPE = "m4.xlarge"
25 |
26 | AWS_KEY_NAME = "AWS_KEY_NAME"
27 |
28 | AWS_SPOT = True
29 |
30 | AWS_SPOT_PRICE = '1.0'
31 |
32 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", None)
33 |
34 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", None)
35 |
36 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab"
37 |
38 | AWS_SECURITY_GROUPS = ["rllab"]
39 |
40 | AWS_SECURITY_GROUP_IDS = []
41 |
42 | AWS_NETWORK_INTERFACES = []
43 |
44 | AWS_EXTRA_CONFIGS = dict()
45 |
46 | AWS_REGION_NAME = "us-east-1"
47 |
48 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*.pod/*"]
49 |
50 | DOCKER_CODE_DIR = "/root/code/rllab"
51 |
52 | AWS_CODE_SYNC_S3_PATH = "s3://to/be/overriden/in/personal"
53 |
54 | # whether to use fast code sync
55 | FAST_CODE_SYNC = True
56 |
57 | FAST_CODE_SYNC_IGNORES = [".git", "data", ".pods"]
58 |
59 | KUBE_DEFAULT_RESOURCES = {
60 | "requests": {
61 | "cpu": 0.8,
62 | }
63 | }
64 |
65 | KUBE_DEFAULT_NODE_SELECTOR = {
66 | "aws/type": "m4.xlarge",
67 | }
68 |
69 | MUJOCO_KEY_PATH = osp.expanduser("~/.mujoco")
70 | # MUJOCO_KEY_PATH = osp.join(osp.dirname(__file__), "../vendor/mujoco")
71 |
72 | ENV = {}
73 |
74 | EBS_OPTIMIZED = True
75 |
76 | if osp.exists(osp.join(osp.dirname(__file__), "config_personal.py")):
77 | from .config_personal import *
78 | else:
79 | print("Creating your personal config from template...")
80 | from shutil import copy
81 | copy(osp.join(PROJECT_PATH, "rllab/config_personal_template.py"), osp.join(PROJECT_PATH, "rllab/config_personal.py"))
82 | from .config_personal import *
83 | print("Personal config created, but you should probably edit it before further experiments " \
84 | "are run")
85 | if 'CIRCLECI' not in os.environ:
86 | print("Exiting.")
87 | import sys; sys.exit(0)
88 |
89 | LABEL = ""
90 |
--------------------------------------------------------------------------------
/rllab/rllab/config_personal_template.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | USE_GPU = False
4 |
5 | DOCKER_IMAGE = "dementrock/rllab3-shared"
6 |
7 | KUBE_PREFIX = "template_"
8 |
9 | DOCKER_LOG_DIR = "/tmp/expt"
10 |
11 | AWS_IMAGE_ID = "ami-67c5d00d"
12 |
13 | if USE_GPU:
14 | AWS_INSTANCE_TYPE = "g2.2xlarge"
15 | else:
16 | AWS_INSTANCE_TYPE = "c4.2xlarge"
17 |
18 | AWS_KEY_NAME = "research_virginia"
19 |
20 | AWS_SPOT = True
21 |
22 | AWS_SPOT_PRICE = '10.0'
23 |
24 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab"
25 |
26 | AWS_SECURITY_GROUPS = ["rllab"]
27 |
28 | AWS_REGION_NAME = "us-west-2"
29 |
30 | AWS_CODE_SYNC_S3_PATH = "e"
31 |
32 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*src/*",
33 | "*.pods/*", "*tests/*", "*examples/*", "docs/*"]
34 |
35 | LOCAL_CODE_DIR = ""
36 |
37 | AWS_S3_PATH = ""
38 |
39 | LABEL = "template"
40 |
41 | DOCKER_CODE_DIR = "/root/code/rllab"
42 |
43 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", "")
44 |
45 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", "")
46 |
--------------------------------------------------------------------------------
/rllab/rllab/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/core/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/core/lasagne_powered.py:
--------------------------------------------------------------------------------
1 | from rllab.core.parameterized import Parameterized
2 | from rllab.misc.overrides import overrides
3 | import lasagne.layers as L
4 |
5 |
6 | class LasagnePowered(Parameterized):
7 | def __init__(self, output_layers):
8 | self._output_layers = output_layers
9 | super(LasagnePowered, self).__init__()
10 |
11 | @property
12 | def output_layers(self):
13 | return self._output_layers
14 |
15 | @overrides
16 | def get_params_internal(self, **tags): # this gives ALL the vars (not the params values)
17 | return L.get_all_params( # this lasagne function also returns all var below the passed layers
18 | L.concat(self._output_layers),
19 | **tags
20 | )
21 |
--------------------------------------------------------------------------------
/rllab/rllab/core/serializable.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | import sys
3 |
4 |
5 | class Serializable(object):
6 |
7 | def __init__(self, *args, **kwargs):
8 | self.__args = args
9 | self.__kwargs = kwargs
10 |
11 | def quick_init(self, locals_):
12 | if getattr(self, "_serializable_initialized", False):
13 | return
14 | if sys.version_info >= (3, 0):
15 | spec = inspect.getfullargspec(self.__init__)
16 | # Exclude the first "self" parameter
17 | if spec.varkw:
18 | kwargs = locals_[spec.varkw]
19 | else:
20 | kwargs = dict()
21 | else:
22 | spec = inspect.getargspec(self.__init__)
23 | if spec.keywords:
24 | kwargs = locals_[spec.keywords]
25 | else:
26 | kwargs = dict()
27 | if spec.varargs:
28 | varargs = locals_[spec.varargs]
29 | else:
30 | varargs = tuple()
31 | in_order_args = [locals_[arg] for arg in spec.args][1:]
32 | self.__args = tuple(in_order_args) + varargs
33 | self.__kwargs = kwargs
34 | setattr(self, "_serializable_initialized", True)
35 |
36 | def __getstate__(self):
37 | return {"__args": self.__args, "__kwargs": self.__kwargs}
38 |
39 | def __setstate__(self, d):
40 | out = type(self)(*d["__args"], **d["__kwargs"])
41 | self.__dict__.update(out.__dict__)
42 |
43 | @classmethod
44 | def clone(cls, obj, **kwargs):
45 | assert isinstance(obj, Serializable)
46 | d = obj.__getstate__()
47 |
48 | # Split the entries in kwargs between positional and keyword arguments
49 | # and update d['__args'] and d['__kwargs'], respectively.
50 | if sys.version_info >= (3, 0):
51 | spec = inspect.getfullargspec(obj.__init__)
52 | else:
53 | spec = inspect.getargspec(obj.__init__)
54 | in_order_args = spec.args[1:]
55 |
56 | d["__args"] = list(d["__args"])
57 | for kw, val in kwargs.items():
58 | if kw in in_order_args:
59 | d["__args"][in_order_args.index(kw)] = val
60 | else:
61 | d["__kwargs"][kw] = val
62 |
63 | out = type(obj).__new__(type(obj))
64 | out.__setstate__(d)
65 | return out
66 |
--------------------------------------------------------------------------------
/rllab/rllab/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/distributions/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/distributions/base.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as TT
2 |
3 | class Distribution(object):
4 |
5 | @property
6 | def dim(self):
7 | raise NotImplementedError
8 |
9 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
10 | """
11 | Compute the symbolic KL divergence of two distributions
12 | """
13 | raise NotImplementedError
14 |
15 | def kl(self, old_dist_info, new_dist_info):
16 | """
17 | Compute the KL divergence of two distributions
18 | """
19 | raise NotImplementedError
20 |
21 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
22 | raise NotImplementedError
23 |
24 | def entropy(self, dist_info):
25 | raise NotImplementedError
26 |
27 | def log_likelihood_sym(self, x_var, dist_info_vars):
28 | raise NotImplementedError
29 |
30 | def likelihood_sym(self, x_var, dist_info_vars):
31 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars))
32 |
33 | def log_likelihood(self, xs, dist_info):
34 | raise NotImplementedError
35 |
36 | @property
37 | def dist_info_keys(self):
38 | raise NotImplementedError
39 |
--------------------------------------------------------------------------------
/rllab/rllab/distributions/bernoulli.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .base import Distribution
4 | import theano.tensor as TT
5 | import numpy as np
6 |
7 | TINY = 1e-8
8 |
9 |
10 | class Bernoulli(Distribution):
11 | def __init__(self, dim):
12 | self._dim = dim
13 |
14 | @property
15 | def dim(self):
16 | return self._dim
17 |
18 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
19 | old_p = old_dist_info_vars["p"]
20 | new_p = new_dist_info_vars["p"]
21 | kl = old_p * (TT.log(old_p + TINY) - TT.log(new_p + TINY)) + \
22 | (1 - old_p) * (TT.log(1 - old_p + TINY) - TT.log(1 - new_p + TINY))
23 | return TT.sum(kl, axis=-1)
24 |
25 | def kl(self, old_dist_info, new_dist_info):
26 | old_p = old_dist_info["p"]
27 | new_p = new_dist_info["p"]
28 | kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \
29 | (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
30 | return np.sum(kl, axis=-1)
31 |
32 | def sample(self, dist_info):
33 | p = np.asarray(dist_info["p"])
34 | return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p)
35 |
36 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
37 | old_p = old_dist_info_vars["p"]
38 | new_p = new_dist_info_vars["p"]
39 | return TT.prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
40 | axis=-1)
41 |
42 | def log_likelihood_sym(self, x_var, dist_info_vars):
43 | p = dist_info_vars["p"]
44 | return TT.sum(x_var * TT.log(p + TINY) + (1 - x_var) * TT.log(1 - p + TINY), axis=-1)
45 |
46 | def log_likelihood(self, xs, dist_info):
47 | p = dist_info["p"]
48 | return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)
49 |
50 | def entropy(self, dist_info):
51 | p = dist_info["p"]
52 | return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)
53 |
54 | @property
55 | def dist_info_keys(self):
56 | return ["p"]
57 |
--------------------------------------------------------------------------------
/rllab/rllab/distributions/delta.py:
--------------------------------------------------------------------------------
1 | from rllab.distributions.base import Distribution
2 |
3 | class Delta(Distribution):
4 | @property
5 | def dim(self):
6 | return 0
7 |
8 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
9 | return None
10 |
11 | def kl(self, old_dist_info, new_dist_info):
12 | return None
13 |
14 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
15 | raise NotImplementedError
16 |
17 | def entropy(self, dist_info):
18 | raise NotImplementedError
19 |
20 | def log_likelihood_sym(self, x_var, dist_info_vars):
21 | raise NotImplementedError
22 |
23 | def likelihood_sym(self, x_var, dist_info_vars):
24 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars))
25 |
26 | def log_likelihood(self, xs, dist_info):
27 | return None
28 |
29 | @property
30 | def dist_info_keys(self):
31 | return None
32 |
33 | def entropy(self,dist_info):
34 | return 0
35 |
--------------------------------------------------------------------------------
/rllab/rllab/distributions/recurrent_diagonal_gaussian.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as TT
2 | import numpy as np
3 | from rllab.distributions.base import Distribution
4 | from rllab.distributions.diagonal_gaussian import DiagonalGaussian
5 |
6 | RecurrentDiagonalGaussian = DiagonalGaussian
7 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/box2d/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/cartpole_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from rllab.envs.box2d.parser import find_body
3 |
4 | from rllab.core.serializable import Serializable
5 | from rllab.envs.box2d.box2d_env import Box2DEnv
6 | from rllab.misc import autoargs
7 | from rllab.misc.overrides import overrides
8 |
9 |
10 | class CartpoleEnv(Box2DEnv, Serializable):
11 |
12 | @autoargs.inherit(Box2DEnv.__init__)
13 | def __init__(self, *args, **kwargs):
14 | self.max_pole_angle = .2
15 | self.max_cart_pos = 2.4
16 | self.max_cart_speed = 4.
17 | self.max_pole_speed = 4.
18 | self.reset_range = 0.05
19 | super(CartpoleEnv, self).__init__(
20 | self.model_path("cartpole.xml.mako"),
21 | *args, **kwargs
22 | )
23 | self.cart = find_body(self.world, "cart")
24 | self.pole = find_body(self.world, "pole")
25 | Serializable.__init__(self, *args, **kwargs)
26 |
27 | @overrides
28 | def reset(self):
29 | self._set_state(self.initial_state)
30 | self._invalidate_state_caches()
31 | bounds = np.array([
32 | self.max_cart_pos,
33 | self.max_cart_speed,
34 | self.max_pole_angle,
35 | self.max_pole_speed
36 | ])
37 | low, high = -self.reset_range*bounds, self.reset_range*bounds
38 | xpos, xvel, apos, avel = np.random.uniform(low, high)
39 | self.cart.position = (xpos, self.cart.position[1])
40 | self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1])
41 | self.pole.angle = apos
42 | self.pole.angularVelocity = avel
43 | return self.get_current_obs()
44 |
45 | @overrides
46 | def compute_reward(self, action):
47 | yield
48 | notdone = 1 - int(self.is_current_done())
49 | ucost = 1e-5*(action**2).sum()
50 | xcost = 1 - np.cos(self.pole.angle)
51 | yield notdone * 10 - notdone * xcost - notdone * ucost
52 |
53 | @overrides
54 | def is_current_done(self):
55 | return abs(self.cart.position[0]) > self.max_cart_pos or \
56 | abs(self.pole.angle) > self.max_pole_angle
57 |
58 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/cartpole_swingup_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pygame
3 | from rllab.envs.box2d.parser import find_body
4 |
5 | from rllab.core.serializable import Serializable
6 | from rllab.envs.box2d.box2d_env import Box2DEnv
7 | from rllab.misc import autoargs
8 | from rllab.misc.overrides import overrides
9 |
10 |
11 | # Tornio, Matti, and Tapani Raiko. "Variational Bayesian approach for
12 | # nonlinear identification and control." Proc. of the IFAC Workshop on
13 | # Nonlinear Model Predictive Control for Fast Systems, NMPC FS06. 2006.
14 | class CartpoleSwingupEnv(Box2DEnv, Serializable):
15 |
16 | @autoargs.inherit(Box2DEnv.__init__)
17 | def __init__(self, *args, **kwargs):
18 | super(CartpoleSwingupEnv, self).__init__(
19 | self.model_path("cartpole.xml.mako"),
20 | *args, **kwargs
21 | )
22 | self.max_cart_pos = 3
23 | self.max_reward_cart_pos = 3
24 | self.cart = find_body(self.world, "cart")
25 | self.pole = find_body(self.world, "pole")
26 | Serializable.__init__(self, *args, **kwargs)
27 |
28 | @overrides
29 | def reset(self):
30 | self._set_state(self.initial_state)
31 | self._invalidate_state_caches()
32 | bounds = np.array([
33 | [-1, -2, np.pi-1, -3],
34 | [1, 2, np.pi+1, 3],
35 | ])
36 | low, high = bounds
37 | xpos, xvel, apos, avel = np.random.uniform(low, high)
38 | self.cart.position = (xpos, self.cart.position[1])
39 | self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1])
40 | self.pole.angle = apos
41 | self.pole.angularVelocity = avel
42 | return self.get_current_obs()
43 |
44 | @overrides
45 | def compute_reward(self, action):
46 | yield
47 | if self.is_current_done():
48 | yield -100
49 | else:
50 | if abs(self.cart.position[0]) > self.max_reward_cart_pos:
51 | yield -1
52 | else:
53 | yield np.cos(self.pole.angle)
54 |
55 | @overrides
56 | def is_current_done(self):
57 | return abs(self.cart.position[0]) > self.max_cart_pos
58 |
59 | @overrides
60 | def action_from_keys(self, keys):
61 | if keys[pygame.K_LEFT]:
62 | return np.asarray([-10])
63 | elif keys[pygame.K_RIGHT]:
64 | return np.asarray([+10])
65 | else:
66 | return np.asarray([0])
67 |
68 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/double_pendulum_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from rllab.envs.box2d.parser import find_body
3 |
4 | from rllab.core.serializable import Serializable
5 | from rllab.envs.box2d.box2d_env import Box2DEnv
6 | from rllab.misc import autoargs
7 | from rllab.misc.overrides import overrides
8 |
9 |
10 | # http://mlg.eng.cam.ac.uk/pilco/
11 | class DoublePendulumEnv(Box2DEnv, Serializable):
12 |
13 | @autoargs.inherit(Box2DEnv.__init__)
14 | def __init__(self, *args, **kwargs):
15 | # make sure mdp-level step is 100ms long
16 | kwargs["frame_skip"] = kwargs.get("frame_skip", 2)
17 | if kwargs.get("template_args", {}).get("noise", False):
18 | self.link_len = (np.random.rand()-0.5) + 1
19 | else:
20 | self.link_len = 1
21 | kwargs["template_args"] = kwargs.get("template_args", {})
22 | kwargs["template_args"]["link_len"] = self.link_len
23 | super(DoublePendulumEnv, self).__init__(
24 | self.model_path("double_pendulum.xml.mako"),
25 | *args, **kwargs
26 | )
27 | self.link1 = find_body(self.world, "link1")
28 | self.link2 = find_body(self.world, "link2")
29 | Serializable.__init__(self, *args, **kwargs)
30 |
31 | @overrides
32 | def reset(self):
33 | self._set_state(self.initial_state)
34 | self._invalidate_state_caches()
35 | stds = np.array([0.1, 0.1, 0.01, 0.01])
36 | pos1, pos2, v1, v2 = np.random.randn(*stds.shape) * stds
37 | self.link1.angle = pos1
38 | self.link2.angle = pos2
39 | self.link1.angularVelocity = v1
40 | self.link2.angularVelocity = v2
41 | return self.get_current_obs()
42 |
43 | def get_tip_pos(self):
44 | cur_center_pos = self.link2.position
45 | cur_angle = self.link2.angle
46 | cur_pos = (
47 | cur_center_pos[0] - self.link_len*np.sin(cur_angle),
48 | cur_center_pos[1] - self.link_len*np.cos(cur_angle)
49 | )
50 | return cur_pos
51 |
52 | @overrides
53 | def compute_reward(self, action):
54 | yield
55 | tgt_pos = np.asarray([0, self.link_len * 2])
56 | cur_pos = self.get_tip_pos()
57 | dist = np.linalg.norm(cur_pos - tgt_pos)
58 | yield -dist
59 |
60 | def is_current_done(self):
61 | return False
62 |
63 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/car_parking.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/car_parking.xml.rb:
--------------------------------------------------------------------------------
1 | car_height = 1.0
2 | car_width = 0.6
3 | car_mass = 1
4 | car_density = car_mass / car_height / car_width
5 |
6 | wheel_height = 0.3
7 | wheel_width = 0.1
8 | wheel_mass = 0.1
9 | wheel_density = wheel_mass / wheel_height / wheel_width
10 | wheel_max_deg = 30
11 |
12 | phantom_group = -1
13 | common = { group: phantom_group }
14 |
15 | box2d {
16 | world(timestep: 0.05, gravity: [0, 0]) {
17 | body(name: :goal, type: :static, position: [0, 0]) {
18 | fixture(common.merge(shape: :circle, radius: 1))
19 | }
20 |
21 | car_pos = [3, 4]
22 | body(name: :car, type: :dynamic, position: car_pos) {
23 | rect(
24 | box: [car_width / 2, car_height / 2],
25 | density: car_density,
26 | group: phantom_group,
27 | )
28 | }
29 | [:left_front_wheel, :right_front_wheel, :left_rear_wheel, :right_rear_wheel].each do |wheel|
30 | x_pos = car_width / 2
31 | x_pos *= wheel =~ /left/ ? -1 : 1
32 | y_pos = wheel =~ /front/ ? 0.2 : -0.3
33 | body(name: wheel, type: :dynamic, position: [car_pos[0] + x_pos, car_pos[1] + y_pos]) {
34 | rect(
35 | box: [wheel_width / 2, wheel_height / 2],
36 | density: wheel_density,
37 | group: phantom_group,
38 | )
39 | }
40 | # limit = wheel =~ /front/ ? [-wheel_max_deg, wheel_max_deg] : [0, 0]
41 | limit = [0, 0]
42 | joint(
43 | type: :revolute,
44 | name: "#{wheel}_joint",
45 | bodyA: :car,
46 | bodyB: wheel,
47 | localAnchorA: [x_pos, y_pos],
48 | localAnchorB: [0, 0],
49 | limit: limit,
50 | )
51 | end
52 | control(
53 | type: :force,
54 | bodies: [:left_front_wheel, :right_front_wheel],
55 | anchor: [0, 0],
56 | direction: [0, 1],
57 | ctrllimit: [-10.N, 10.N],
58 | )
59 | state body: :car, type: :xvel
60 | state body: :car, type: :yvel
61 | state body: :car, type: :dist, to: :goal
62 | state body: :car, type: :angle, to: :goal, transform: :cos
63 | state body: :car, type: :angle, to: :goal, transform: :sin
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/cartpole.xml.mako:
--------------------------------------------------------------------------------
1 | <%
2 | from rllab.misc.mako_utils import compute_rect_vertices
3 | cart_width = 4.0 / (12 ** 0.5)
4 | cart_height = 3.0 / (12 ** 0.5)
5 |
6 | pole_width = 0.1
7 | pole_height = 1.0
8 | noise = opts.get("noise", False)
9 | if noise:
10 | import numpy as np
11 | pole_height += (np.random.rand()-0.5) * pole_height * 1
12 |
13 | cart_friction = 0.0005
14 | pole_friction = 0.000002
15 | %>
16 |
17 |
18 |
19 |
20 |
26 |
27 |
28 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/double_pendulum.xml.mako:
--------------------------------------------------------------------------------
1 | <%
2 | from rllab.misc.mako_utils import compute_rect_vertices
3 | link_len = opts['link_len']
4 | link_width = 0.1
5 | %>
6 |
7 |
8 |
9 |
10 |
16 |
17 |
18 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/mountain_car.xml.mako:
--------------------------------------------------------------------------------
1 | <%
2 | noise = opts.get("noise", False)
3 | track_width = 4
4 | if noise:
5 | import numpy as np
6 | track_width += np.random.uniform(-1, 1)
7 | %>
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/mountain_car_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pygame
3 | from rllab.envs.box2d.parser import find_body
4 |
5 | from rllab.core.serializable import Serializable
6 | from rllab.envs.box2d.box2d_env import Box2DEnv
7 | from rllab.misc import autoargs
8 | from rllab.misc.overrides import overrides
9 |
10 |
11 | class MountainCarEnv(Box2DEnv, Serializable):
12 |
13 | @autoargs.inherit(Box2DEnv.__init__)
14 | @autoargs.arg("height_bonus_coeff", type=float,
15 | help="Height bonus added to each step's reward")
16 | @autoargs.arg("goal_cart_pos", type=float,
17 | help="Goal horizontal position")
18 | def __init__(self,
19 | height_bonus=1.,
20 | goal_cart_pos=0.6,
21 | *args, **kwargs):
22 | super(MountainCarEnv, self).__init__(
23 | self.model_path("mountain_car.xml.mako"),
24 | *args, **kwargs
25 | )
26 | self.max_cart_pos = 2
27 | self.goal_cart_pos = goal_cart_pos
28 | self.height_bonus = height_bonus
29 | self.cart = find_body(self.world, "cart")
30 | Serializable.quick_init(self, locals())
31 |
32 | @overrides
33 | def compute_reward(self, action):
34 | yield
35 | yield (-1 + self.height_bonus * self.cart.position[1])
36 |
37 | @overrides
38 | def is_current_done(self):
39 | return self.cart.position[0] >= self.goal_cart_pos \
40 | or abs(self.cart.position[0]) >= self.max_cart_pos
41 |
42 | @overrides
43 | def reset(self):
44 | self._set_state(self.initial_state)
45 | self._invalidate_state_caches()
46 | bounds = np.array([
47 | [-1],
48 | [1],
49 | ])
50 | low, high = bounds
51 | xvel = np.random.uniform(low, high)
52 | self.cart.linearVelocity = (float(xvel), self.cart.linearVelocity[1])
53 | return self.get_current_obs()
54 |
55 | @overrides
56 | def action_from_keys(self, keys):
57 | if keys[pygame.K_LEFT]:
58 | return np.asarray([-1])
59 | elif keys[pygame.K_RIGHT]:
60 | return np.asarray([+1])
61 | else:
62 | return np.asarray([0])
63 |
64 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from .xml_box2d import world_from_xml, find_body, find_joint
2 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/env_spec.py:
--------------------------------------------------------------------------------
1 | from rllab.core.serializable import Serializable
2 | from rllab.spaces.base import Space
3 |
4 |
5 | class EnvSpec(Serializable):
6 |
7 | def __init__(
8 | self,
9 | observation_space,
10 | action_space):
11 | """
12 | :type observation_space: Space
13 | :type action_space: Space
14 | """
15 | Serializable.quick_init(self, locals())
16 | self._observation_space = observation_space
17 | self._action_space = action_space
18 |
19 | @property
20 | def observation_space(self):
21 | return self._observation_space
22 |
23 | @property
24 | def action_space(self):
25 | return self._action_space
26 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/identification_env.py:
--------------------------------------------------------------------------------
1 | from rllab.core.serializable import Serializable
2 | from rllab.envs.proxy_env import ProxyEnv
3 | from rllab.misc.overrides import overrides
4 |
5 |
6 | class IdentificationEnv(ProxyEnv, Serializable):
7 |
8 | def __init__(self, mdp_cls, mdp_args):
9 | Serializable.quick_init(self, locals())
10 | self.mdp_cls = mdp_cls
11 | self.mdp_args = dict(mdp_args)
12 | self.mdp_args["template_args"] = dict(noise=True)
13 | mdp = self.gen_mdp()
14 | super(IdentificationEnv, self).__init__(mdp)
15 |
16 | def gen_mdp(self):
17 | return self.mdp_cls(**self.mdp_args)
18 |
19 | @overrides
20 | def reset(self):
21 | if getattr(self, "_mdp", None):
22 | if hasattr(self._wrapped_env, "release"):
23 | self._wrapped_env.release()
24 | self._wrapped_env = self.gen_mdp()
25 | return super(IdentificationEnv, self).reset()
26 |
27 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/gather/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/ant_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.ant_env import AntEnv
3 |
4 |
5 | class AntGatherEnv(GatherEnv):
6 |
7 | MODEL_CLASS = AntEnv
8 | ORI_IND = 6
9 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/point_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.point_env import PointEnv
3 |
4 |
5 | class PointGatherEnv(GatherEnv):
6 |
7 | MODEL_CLASS = PointEnv
8 | ORI_IND = 2
9 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/swimmer_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
3 |
4 |
5 | class SwimmerGatherEnv(GatherEnv):
6 |
7 | MODEL_CLASS = SwimmerEnv
8 | ORI_IND = 2
9 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/half_cheetah_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.envs.base import Step
5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
6 | from rllab.misc import logger
7 | from rllab.misc.overrides import overrides
8 |
9 |
10 | def smooth_abs(x, param):
11 | return np.sqrt(np.square(x) + np.square(param)) - param
12 |
13 |
14 | class HalfCheetahEnv(MujocoEnv, Serializable):
15 |
16 | FILE = 'half_cheetah.xml'
17 |
18 | def __init__(self, *args, **kwargs):
19 | super(HalfCheetahEnv, self).__init__(*args, **kwargs)
20 | Serializable.__init__(self, *args, **kwargs)
21 |
22 | def get_current_obs(self):
23 | return np.concatenate([
24 | self.model.data.qpos.flatten()[1:],
25 | self.model.data.qvel.flat,
26 | self.get_body_com("torso").flat,
27 | ])
28 |
29 | def get_body_xmat(self, body_name):
30 | idx = self.model.body_names.index(body_name)
31 | return self.model.data.xmat[idx].reshape((3, 3))
32 |
33 | def get_body_com(self, body_name):
34 | idx = self.model.body_names.index(body_name)
35 | return self.model.data.com_subtree[idx]
36 |
37 | def step(self, action):
38 | self.forward_dynamics(action)
39 | next_obs = self.get_current_obs()
40 | action = np.clip(action, *self.action_bounds)
41 | ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action))
42 | run_cost = -1 * self.get_body_comvel("torso")[0]
43 | cost = ctrl_cost + run_cost
44 | reward = -cost
45 | done = False
46 | return Step(next_obs, reward, done)
47 |
48 | @overrides
49 | def log_diagnostics(self, paths):
50 | progs = [
51 | path["observations"][-1][-3] - path["observations"][0][-3]
52 | for path in paths
53 | ]
54 | logger.record_tabular('AverageForwardProgress', np.mean(progs))
55 | logger.record_tabular('MaxForwardProgress', np.max(progs))
56 | logger.record_tabular('MinForwardProgress', np.min(progs))
57 | logger.record_tabular('StdForwardProgress', np.std(progs))
58 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/hill/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/ant_hill_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
4 | from rllab.envs.mujoco.ant_env import AntEnv
5 | from rllab.misc.overrides import overrides
6 | import rllab.envs.mujoco.hill.terrain as terrain
7 | from rllab.spaces import Box
8 |
9 | class AntHillEnv(HillEnv):
10 |
11 | MODEL_CLASS = AntEnv
12 |
13 | @overrides
14 | def _mod_hfield(self, hfield):
15 | # clear a flat patch for the robot to start off from
16 | return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([0.0, 0.0])))
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/half_cheetah_hill_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
4 | from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv
5 | from rllab.misc.overrides import overrides
6 | import rllab.envs.mujoco.hill.terrain as terrain
7 | from rllab.spaces import Box
8 |
9 | class HalfCheetahHillEnv(HillEnv):
10 |
11 | MODEL_CLASS = HalfCheetahEnv
12 |
13 | @overrides
14 | def _mod_hfield(self, hfield):
15 | # clear a flat patch for the robot to start off from
16 | return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5])))
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/hopper_hill_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
4 | from rllab.envs.mujoco.hopper_env import HopperEnv
5 | from rllab.misc.overrides import overrides
6 | import rllab.envs.mujoco.hill.terrain as terrain
7 | from rllab.spaces import Box
8 |
9 | class HopperHillEnv(HillEnv):
10 |
11 | MODEL_CLASS = HopperEnv
12 |
13 | @overrides
14 | def _mod_hfield(self, hfield):
15 | # clear a flat patch for the robot to start off from
16 | return terrain.clear_patch(hfield, Box(np.array([-1.0, -1.0]), np.array([-0.5, -0.5])))
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/swimmer3d_hill_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
4 | from rllab.envs.mujoco.swimmer3d_env import Swimmer3DEnv
5 | from rllab.misc.overrides import overrides
6 | import rllab.envs.mujoco.hill.terrain as terrain
7 | from rllab.spaces import Box
8 |
9 | class Swimmer3DHillEnv(HillEnv):
10 |
11 | MODEL_CLASS = Swimmer3DEnv
12 |
13 | @overrides
14 | def _mod_hfield(self, hfield):
15 | # clear a flat patch for the robot to start off from
16 | return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5])))
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/walker2d_hill_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
4 | from rllab.envs.mujoco.walker2d_env import Walker2DEnv
5 | from rllab.misc.overrides import overrides
6 | import rllab.envs.mujoco.hill.terrain as terrain
7 | from rllab.spaces import Box
8 |
9 | class Walker2DHillEnv(HillEnv):
10 |
11 | MODEL_CLASS = Walker2DEnv
12 |
13 | @overrides
14 | def _mod_hfield(self, hfield):
15 | # clear a flat patch for the robot to start off from
16 | return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([-0.5, -0.5])))
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/humanoid_env.py:
--------------------------------------------------------------------------------
1 | from .simple_humanoid_env import SimpleHumanoidEnv
2 |
3 |
4 | # Taken from Wojciech's code
5 | class HumanoidEnv(SimpleHumanoidEnv):
6 |
7 | FILE = 'humanoid.xml'
8 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/inverted_double_pendulum_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.envs.base import Step
5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
6 | from rllab.misc import autoargs
7 | from rllab.misc.overrides import overrides
8 |
9 |
10 | class InvertedDoublePendulumEnv(MujocoEnv, Serializable):
11 | FILE = 'inverted_double_pendulum.xml.mako'
12 |
13 | @autoargs.arg("random_start", type=bool,
14 | help="Randomized starting position by adjusting the angles"
15 | "When this is false, the double pendulum started out"
16 | "in balanced position")
17 | def __init__(
18 | self,
19 | *args, **kwargs):
20 | self.random_start = kwargs.get("random_start", True)
21 | super(InvertedDoublePendulumEnv, self).__init__(*args, **kwargs)
22 | Serializable.quick_init(self, locals())
23 |
24 | @overrides
25 | def get_current_obs(self):
26 | return np.concatenate([
27 | self.model.data.qpos[:1], # cart x pos
28 | np.sin(self.model.data.qpos[1:]), # link angles
29 | np.cos(self.model.data.qpos[1:]),
30 | np.clip(self.model.data.qvel, -10, 10),
31 | np.clip(self.model.data.qfrc_constraint, -10, 10)
32 | ]).reshape(-1)
33 |
34 | @overrides
35 | def step(self, action):
36 | self.forward_dynamics(action)
37 | next_obs = self.get_current_obs()
38 | x, _, y = self.model.data.site_xpos[0]
39 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
40 | v1, v2 = self.model.data.qvel[1:3]
41 | vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2
42 | alive_bonus = 10
43 | r = float(alive_bonus - dist_penalty - vel_penalty)
44 | done = y <= 1
45 | return Step(next_obs, r, done)
46 |
47 | @overrides
48 | def reset_mujoco(self, init_state=None):
49 | assert init_state is None
50 | qpos = np.copy(self.init_qpos)
51 | if self.random_start:
52 | qpos[1] = (np.random.rand() - 0.5) * 40 / 180. * np.pi
53 | self.model.data.qpos = qpos
54 | self.model.data.qvel = self.init_qvel
55 | self.model.data.qacc = self.init_qacc
56 | self.model.data.ctrl = self.init_ctrl
57 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/maze/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/ant_maze_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
2 | from rllab.envs.mujoco.ant_env import AntEnv
3 |
4 |
5 | class AntMazeEnv(MazeEnv):
6 |
7 | MODEL_CLASS = AntEnv
8 | ORI_IND = 6
9 |
10 | MAZE_HEIGHT = 2
11 | MAZE_SIZE_SCALING = 3.0
12 |
13 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/point_maze_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
2 | from rllab.envs.mujoco.point_env import PointEnv
3 |
4 |
5 | class PointMazeEnv(MazeEnv):
6 |
7 | MODEL_CLASS = PointEnv
8 | ORI_IND = 2
9 |
10 | MAZE_HEIGHT = 2
11 | MAZE_SIZE_SCALING = 3.0
12 |
13 | MANUAL_COLLISION = True
14 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/swimmer_maze_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
3 |
4 |
5 | class SwimmerMazeEnv(MazeEnv):
6 |
7 | MODEL_CLASS = SwimmerEnv
8 | ORI_IND = 2
9 |
10 | MAZE_HEIGHT = 0.5
11 | MAZE_SIZE_SCALING = 4
12 | MAZE_MAKE_CONTACTS = True
13 |
14 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/point_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.base import Step
2 | from .mujoco_env import MujocoEnv
3 | from rllab.core.serializable import Serializable
4 | from rllab.misc.overrides import overrides
5 | import numpy as np
6 | import math
7 | from rllab.mujoco_py import glfw
8 |
9 |
10 | class PointEnv(MujocoEnv, Serializable):
11 |
12 | """
13 | Use Left, Right, Up, Down, A (steer left), D (steer right)
14 | """
15 |
16 | FILE = 'point.xml'
17 |
18 | def __init__(self, *args, **kwargs):
19 | super(PointEnv, self).__init__(*args, **kwargs)
20 | Serializable.quick_init(self, locals())
21 |
22 | def step(self, action):
23 | qpos = np.copy(self.model.data.qpos)
24 | qpos[2, 0] += action[1]
25 | ori = qpos[2, 0]
26 | # compute increment in each direction
27 | dx = math.cos(ori) * action[0]
28 | dy = math.sin(ori) * action[0]
29 | # ensure that the robot is within reasonable range
30 | qpos[0, 0] = np.clip(qpos[0, 0] + dx, -7, 7)
31 | qpos[1, 0] = np.clip(qpos[1, 0] + dy, -7, 7)
32 | self.model.data.qpos = qpos
33 | self.model.forward()
34 | next_obs = self.get_current_obs()
35 | return Step(next_obs, 0, False)
36 |
37 | def get_xy(self):
38 | qpos = self.model.data.qpos
39 | return qpos[0, 0], qpos[1, 0]
40 |
41 | def set_xy(self, xy):
42 | qpos = np.copy(self.model.data.qpos)
43 | qpos[0, 0] = xy[0]
44 | qpos[1, 0] = xy[1]
45 | self.model.data.qpos = qpos
46 | self.model.forward()
47 |
48 | @overrides
49 | def action_from_key(self, key):
50 | lb, ub = self.action_bounds
51 | if key == glfw.KEY_LEFT:
52 | return np.array([0, ub[0]*0.3])
53 | elif key == glfw.KEY_RIGHT:
54 | return np.array([0, lb[0]*0.3])
55 | elif key == glfw.KEY_UP:
56 | return np.array([ub[1], 0])
57 | elif key == glfw.KEY_DOWN:
58 | return np.array([lb[1], 0])
59 | else:
60 | return np.array([0, 0])
61 |
62 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/swimmer3d_env.py:
--------------------------------------------------------------------------------
1 | from .swimmer_env import SwimmerEnv
2 |
3 | class Swimmer3DEnv(SwimmerEnv):
4 | FILE = 'swimmer3d.xml'
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/swimmer_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.base import Step
2 | from rllab.misc.overrides import overrides
3 | from .mujoco_env import MujocoEnv
4 | import numpy as np
5 | from rllab.core.serializable import Serializable
6 | from rllab.misc import logger
7 | from rllab.misc import autoargs
8 |
9 |
10 | class SwimmerEnv(MujocoEnv, Serializable):
11 |
12 | FILE = 'swimmer.xml'
13 | ORI_IND = 2
14 |
15 | @autoargs.arg('ctrl_cost_coeff', type=float,
16 | help='cost coefficient for controls')
17 | def __init__(
18 | self,
19 | ctrl_cost_coeff=1e-2,
20 | *args, **kwargs):
21 | self.ctrl_cost_coeff = ctrl_cost_coeff
22 | super(SwimmerEnv, self).__init__(*args, **kwargs)
23 | Serializable.quick_init(self, locals())
24 |
25 | def get_current_obs(self):
26 | return np.concatenate([
27 | self.model.data.qpos.flat,
28 | self.model.data.qvel.flat,
29 | self.get_body_com("torso").flat,
30 | ]).reshape(-1)
31 |
32 | def get_ori(self):
33 | return self.model.data.qpos[self.__class__.ORI_IND]
34 |
35 | def step(self, action):
36 | self.forward_dynamics(action)
37 | next_obs = self.get_current_obs()
38 | lb, ub = self.action_bounds
39 | scaling = (ub - lb) * 0.5
40 | ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
41 | np.square(action / scaling))
42 | forward_reward = self.get_body_comvel("torso")[0]
43 | reward = forward_reward - ctrl_cost
44 | done = False
45 | return Step(next_obs, reward, done)
46 |
47 | @overrides
48 | def log_diagnostics(self, paths):
49 | if len(paths) > 0:
50 | progs = [
51 | path["observations"][-1][-3] - path["observations"][0][-3]
52 | for path in paths
53 | ]
54 | logger.record_tabular('AverageForwardProgress', np.mean(progs))
55 | logger.record_tabular('MaxForwardProgress', np.max(progs))
56 | logger.record_tabular('MinForwardProgress', np.min(progs))
57 | logger.record_tabular('StdForwardProgress', np.std(progs))
58 | else:
59 | logger.record_tabular('AverageForwardProgress', np.nan)
60 | logger.record_tabular('MaxForwardProgress', np.nan)
61 | logger.record_tabular('MinForwardProgress', np.nan)
62 | logger.record_tabular('StdForwardProgress', np.nan)
63 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/walker2d_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.envs.base import Step
5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
6 | from rllab.misc import autoargs
7 | from rllab.misc import logger
8 | from rllab.misc.overrides import overrides
9 |
10 |
11 | def smooth_abs(x, param):
12 | return np.sqrt(np.square(x) + np.square(param)) - param
13 |
14 |
15 | class Walker2DEnv(MujocoEnv, Serializable):
16 |
17 | FILE = 'walker2d.xml'
18 |
19 | @autoargs.arg('ctrl_cost_coeff', type=float,
20 | help='cost coefficient for controls')
21 | def __init__(
22 | self,
23 | ctrl_cost_coeff=1e-2,
24 | *args, **kwargs):
25 | self.ctrl_cost_coeff = ctrl_cost_coeff
26 | super(Walker2DEnv, self).__init__(*args, **kwargs)
27 | Serializable.quick_init(self, locals())
28 |
29 | def get_current_obs(self):
30 | return np.concatenate([
31 | self.model.data.qpos.flat,
32 | self.model.data.qvel.flat,
33 | self.get_body_com("torso").flat,
34 | ])
35 |
36 | def step(self, action):
37 | self.forward_dynamics(action)
38 | next_obs = self.get_current_obs()
39 | action = np.clip(action, *self.action_bounds)
40 | lb, ub = self.action_bounds
41 | scaling = (ub - lb) * 0.5
42 | ctrl_cost = 0.5 * self.ctrl_cost_coeff * \
43 | np.sum(np.square(action / scaling))
44 | forward_reward = self.get_body_comvel("torso")[0]
45 | reward = forward_reward - ctrl_cost
46 | qpos = self.model.data.qpos
47 | done = not (qpos[0] > 0.8 and qpos[0] < 2.0
48 | and qpos[2] > -1.0 and qpos[2] < 1.0)
49 | return Step(next_obs, reward, done)
50 |
51 | @overrides
52 | def log_diagnostics(self, paths):
53 | progs = [
54 | path["observations"][-1][-3] - path["observations"][0][-3]
55 | for path in paths
56 | ]
57 | logger.record_tabular('AverageForwardProgress', np.mean(progs))
58 | logger.record_tabular('MaxForwardProgress', np.max(progs))
59 | logger.record_tabular('MinForwardProgress', np.min(progs))
60 | logger.record_tabular('StdForwardProgress', np.std(progs))
61 |
62 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/proxy_env.py:
--------------------------------------------------------------------------------
1 | from rllab.core.serializable import Serializable
2 | from .base import Env
3 |
4 |
5 | class ProxyEnv(Env, Serializable):
6 | def __init__(self, wrapped_env):
7 | Serializable.quick_init(self, locals())
8 | self._wrapped_env = wrapped_env
9 |
10 | @property
11 | def wrapped_env(self):
12 | return self._wrapped_env
13 |
14 | def reset(self, *args, **kwargs):
15 | return self._wrapped_env.reset(*args, **kwargs)
16 |
17 | @property
18 | def action_space(self):
19 | return self._wrapped_env.action_space
20 |
21 | @property
22 | def observation_space(self):
23 | return self._wrapped_env.observation_space
24 |
25 | def step(self, action):
26 | return self._wrapped_env.step(action)
27 |
28 | def render(self, *args, **kwargs):
29 | return self._wrapped_env.render(*args, **kwargs)
30 |
31 | def log_diagnostics(self, paths, *args, **kwargs):
32 | self._wrapped_env.log_diagnostics(paths, *args, **kwargs)
33 |
34 | @property
35 | def horizon(self):
36 | return self._wrapped_env.horizon
37 |
38 | def terminate(self):
39 | self._wrapped_env.terminate()
40 |
41 | def get_param_values(self):
42 | return self._wrapped_env.get_param_values()
43 |
44 | def set_param_values(self, params):
45 | self._wrapped_env.set_param_values(params)
46 |
--------------------------------------------------------------------------------
/rllab/rllab/envs/sliding_mem_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from rllab.core.serializable import Serializable
4 | from rllab.envs.base import Step
5 | from rllab.envs.proxy_env import ProxyEnv
6 | from rllab.misc import autoargs
7 | from rllab.misc.overrides import overrides
8 | from rllab.spaces import Box
9 |
10 |
11 | class SlidingMemEnv(ProxyEnv, Serializable):
12 |
13 | def __init__(
14 | self,
15 | env,
16 | n_steps=4,
17 | axis=0,
18 | ):
19 | super().__init__(env)
20 | Serializable.quick_init(self, locals())
21 | self.n_steps = n_steps
22 | self.axis = axis
23 | self.buffer = None
24 |
25 | def reset_buffer(self, new_):
26 | assert self.axis == 0
27 | self.buffer = np.zeros(self.observation_space.shape, dtype=np.float32)
28 | self.buffer[0:] = new_
29 |
30 | def add_to_buffer(self, new_):
31 | assert self.axis == 0
32 | self.buffer[1:] = self.buffer[:-1]
33 | self.buffer[:1] = new_
34 |
35 | @property
36 | def observation_space(self):
37 | origin = self._wrapped_env.observation_space
38 | return Box(
39 | *[
40 | np.repeat(b, self.n_steps, axis=self.axis)
41 | for b in origin.bounds
42 | ]
43 | )
44 |
45 | @overrides
46 | def reset(self):
47 | obs = self._wrapped_env.reset()
48 | self.reset_buffer(obs)
49 | return self.buffer
50 |
51 | @overrides
52 | def step(self, action):
53 | next_obs, reward, done, info = self._wrapped_env.step(action)
54 | self.add_to_buffer(next_obs)
55 | return Step(self.buffer, reward, done, **info)
56 |
57 |
--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/exploration_strategies/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/base.py:
--------------------------------------------------------------------------------
1 | class ExplorationStrategy(object):
2 | def get_action(self, t, observation, policy, **kwargs):
3 | raise NotImplementedError
4 |
5 | def reset(self):
6 | pass
7 |
--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/gaussian_strategy.py:
--------------------------------------------------------------------------------
1 | from rllab.core.serializable import Serializable
2 | from rllab.spaces.box import Box
3 | from rllab.exploration_strategies.base import ExplorationStrategy
4 | import numpy as np
5 |
6 |
7 | class GaussianStrategy(ExplorationStrategy, Serializable):
8 | """
9 | This strategy adds Gaussian noise to the action taken by the deterministic policy.
10 | """
11 |
12 | def __init__(self, env_spec, max_sigma=1.0, min_sigma=0.1, decay_period=1000000):
13 | assert isinstance(env_spec.action_space, Box)
14 | assert len(env_spec.action_space.shape) == 1
15 | Serializable.quick_init(self, locals())
16 | self._max_sigma = max_sigma
17 | self._min_sigma = min_sigma
18 | self._decay_period = decay_period
19 | self._action_space = env_spec.action_space
20 |
21 | def get_action(self, t, observation, policy, **kwargs):
22 | action, agent_info = policy.get_action(observation)
23 | sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min(1.0, t * 1.0 / self._decay_period)
24 | return np.clip(action + np.random.normal(size=len(action)) * sigma, self._action_space.low,
25 | self._action_space.high)
26 |
--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/ou_strategy.py:
--------------------------------------------------------------------------------
1 | from rllab.misc.overrides import overrides
2 | from rllab.misc.ext import AttrDict
3 | from rllab.core.serializable import Serializable
4 | from rllab.spaces.box import Box
5 | from rllab.exploration_strategies.base import ExplorationStrategy
6 | import numpy as np
7 | import numpy.random as nr
8 |
9 |
10 | class OUStrategy(ExplorationStrategy, Serializable):
11 | """
12 | This strategy implements the Ornstein-Uhlenbeck process, which adds
13 | time-correlated noise to the actions taken by the deterministic policy.
14 | The OU process satisfies the following stochastic differential equation:
15 | dxt = theta*(mu - xt)*dt + sigma*dWt
16 | where Wt denotes the Wiener process
17 | """
18 |
19 | def __init__(self, env_spec, mu=0, theta=0.15, sigma=0.3, **kwargs):
20 | assert isinstance(env_spec.action_space, Box)
21 | assert len(env_spec.action_space.shape) == 1
22 | Serializable.quick_init(self, locals())
23 | self.mu = mu
24 | self.theta = theta
25 | self.sigma = sigma
26 | self.action_space = env_spec.action_space
27 | self.state = np.ones(self.action_space.flat_dim) * self.mu
28 | self.reset()
29 |
30 | def __getstate__(self):
31 | d = Serializable.__getstate__(self)
32 | d["state"] = self.state
33 | return d
34 |
35 | def __setstate__(self, d):
36 | Serializable.__setstate__(self, d)
37 | self.state = d["state"]
38 |
39 | @overrides
40 | def reset(self):
41 | self.state = np.ones(self.action_space.flat_dim) * self.mu
42 |
43 | def evolve_state(self):
44 | x = self.state
45 | dx = self.theta * (self.mu - x) + self.sigma * nr.randn(len(x))
46 | self.state = x + dx
47 | return self.state
48 |
49 | @overrides
50 | def get_action(self, t, observation, policy, **kwargs):
51 | action, _ = policy.get_action(observation)
52 | ou_state = self.evolve_state()
53 | return np.clip(action + ou_state, self.action_space.low, self.action_space.high)
54 |
55 |
56 | if __name__ == "__main__":
57 | ou = OUStrategy(env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1,))), mu=0, theta=0.15, sigma=0.3)
58 | states = []
59 | for i in range(1000):
60 | states.append(ou.evolve_state()[0])
61 | import matplotlib.pyplot as plt
62 |
63 | plt.plot(states)
64 | plt.show()
65 |
--------------------------------------------------------------------------------
/rllab/rllab/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/misc/mako_utils.py:
--------------------------------------------------------------------------------
1 |
2 | def compute_rect_vertices(fromp, to, radius):
3 | x1, y1 = fromp
4 | x2, y2 = to
5 | if abs(y1 - y2) < 1e-6:
6 | dx = 0
7 | dy = radius
8 | else:
9 | dx = radius * 1.0 / (((x1 - x2) / (y1 - y2)) ** 2 + 1) ** 0.5
10 | # equivalently dx = radius * (y2-y1).to_f / ((x2-x1)**2 + (y2-y1)**2)**0.5
11 | dy = (radius**2 - dx**2) ** 0.5
12 | dy *= -1 if (x1 - x2) * (y1 - y2) > 0 else 1
13 |
14 | return ";".join([",".join(map(str, r)) for r in [
15 | [x1 + dx, y1 + dy],
16 | [x2 + dx, y2 + dy],
17 | [x2 - dx, y2 - dy],
18 | [x1 - dx, y1 - dy],
19 | ]])
20 |
21 |
--------------------------------------------------------------------------------
/rllab/rllab/misc/meta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/meta.py
--------------------------------------------------------------------------------
/rllab/rllab/misc/resolve.py:
--------------------------------------------------------------------------------
1 | from pydoc import locate
2 | import types
3 | from rllab.misc.ext import iscanr
4 |
5 |
6 | def classesinmodule(module):
7 | md = module.__dict__
8 | return [
9 | md[c] for c in md if (
10 | isinstance(md[c], type) and md[c].__module__ == module.__name__
11 | )
12 | ]
13 |
14 |
15 | def locate_with_hint(class_path, prefix_hints=[]):
16 | module_or_class = locate(class_path)
17 | if module_or_class is None:
18 | # for hint in iscanr(lambda x, y: x + "." + y, prefix_hints):
19 | # module_or_class = locate(hint + "." + class_path)
20 | # if module_or_class:
21 | # break
22 | hint = ".".join(prefix_hints)
23 | module_or_class = locate(hint + "." + class_path)
24 | return module_or_class
25 |
26 |
27 | def load_class(class_path, superclass=None, prefix_hints=[]):
28 | module_or_class = locate_with_hint(class_path, prefix_hints)
29 | if module_or_class is None:
30 | raise ValueError("Cannot find module or class under path %s" % class_path)
31 | if type(module_or_class) == types.ModuleType:
32 | if superclass:
33 | classes = [x for x in classesinmodule(module_or_class) if issubclass(x, superclass)]
34 | if len(classes) == 0:
35 | if superclass:
36 | raise ValueError('Could not find any subclasses of %s defined in module %s' % (str(superclass), class_path))
37 | else:
38 | raise ValueError('Could not find any classes defined in module %s' % (class_path))
39 | elif len(classes) > 1:
40 | if superclass:
41 | raise ValueError('Multiple subclasses of %s are defined in the module %s' % (str(superclass), class_path))
42 | else:
43 | raise ValueError('Multiple classes are defined in the module %s' % (class_path))
44 | else:
45 | return classes[0]
46 | elif isinstance(module_or_class, type):
47 | if superclass is None or issubclass(module_or_class, superclass):
48 | return module_or_class
49 | else:
50 | raise ValueError('The class %s is not a subclass of %s' % (str(module_or_class), str(superclass)))
51 | else:
52 | raise ValueError('Unsupported object: %s' % str(module_or_class))
53 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/.rvmrc:
--------------------------------------------------------------------------------
1 | rvm use 2.1.0@mjpy --create
2 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | gem 'pry'
4 | gem 'activesupport'
5 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: https://rubygems.org/
3 | specs:
4 | activesupport (4.1.8)
5 | i18n (~> 0.6, >= 0.6.9)
6 | json (~> 1.7, >= 1.7.7)
7 | minitest (~> 5.1)
8 | thread_safe (~> 0.1)
9 | tzinfo (~> 1.1)
10 | coderay (1.1.0)
11 | i18n (0.7.0)
12 | json (1.8.1)
13 | method_source (0.8.2)
14 | minitest (5.5.1)
15 | pry (0.10.1)
16 | coderay (~> 1.1.0)
17 | method_source (~> 0.8.1)
18 | slop (~> 3.4)
19 | slop (3.6.0)
20 | thread_safe (0.3.4)
21 | tzinfo (1.2.2)
22 | thread_safe (~> 0.1)
23 |
24 | PLATFORMS
25 | ruby
26 |
27 | DEPENDENCIES
28 | activesupport
29 | pry
30 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/__init__.py:
--------------------------------------------------------------------------------
1 | from .mjviewer import MjViewer
2 | from .mjcore import MjModel
3 | from .mjcore import register_license
4 | import os
5 | from .mjconstants import *
6 |
7 | register_license(os.path.join(os.path.dirname(__file__),
8 | '../../vendor/mujoco/mjkey.txt'))
9 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/gen_binding.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | parent_path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P )
3 | mujoco_path=$parent_path/../../vendor/mujoco
4 | rm /tmp/code_gen_mujoco.h
5 | cat $mujoco_path/mjdata.h >> /tmp/code_gen_mujoco.h && \
6 | cat $mujoco_path/mjmodel.h >> /tmp/code_gen_mujoco.h && \
7 | cat $mujoco_path/mjrender.h >> /tmp/code_gen_mujoco.h && \
8 | cat $mujoco_path/mjvisualize.h >> /tmp/code_gen_mujoco.h && \
9 | ruby $parent_path/codegen.rb /tmp/code_gen_mujoco.h $mujoco_path/mjxmacro.h > $parent_path/mjtypes.py
10 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/mjconstants.py:
--------------------------------------------------------------------------------
1 | MOUSE_ROTATE_V = 1
2 | MOUSE_ROTATE_H = 2
3 | MOUSE_MOVE_V = 3
4 | MOUSE_MOVE_H = 4
5 | MOUSE_ZOOM = 5
6 |
7 | mjOBJ_BODY = 1
8 |
--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/mjextra.py:
--------------------------------------------------------------------------------
1 | def append_objects(cur, extra):
2 | for i in range(cur.ngeom, cur.ngeom + extra.ngeom):
3 | cur.geoms[i] = extra.geoms[i - cur.ngeom]
4 | cur.ngeom = cur.ngeom + extra.ngeom
5 | if cur.ngeom > cur.maxgeom:
6 | raise ValueError("buffer limit exceeded!")
7 |
--------------------------------------------------------------------------------
/rllab/rllab/optimizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/optimizers/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/optimizers/minibatch_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class BatchDataset(object):
5 |
6 | def __init__(self, inputs, batch_size, extra_inputs=None):
7 | self._inputs = [
8 | i for i in inputs
9 | ]
10 | if extra_inputs is None:
11 | extra_inputs = []
12 | self._extra_inputs = extra_inputs
13 | self._batch_size = batch_size
14 | if batch_size is not None:
15 | self._ids = np.arange(self._inputs[0].shape[0])
16 | self.update()
17 |
18 | @property
19 | def number_batches(self):
20 | if self._batch_size is None:
21 | return 1
22 | return int(np.ceil(self._inputs[0].shape[0] * 1.0 / self._batch_size))
23 |
24 | def iterate(self, update=True):
25 | if self._batch_size is None:
26 | yield list(self._inputs) + list(self._extra_inputs)
27 | else:
28 | for itr in range(self.number_batches):
29 | batch_start = itr * self._batch_size
30 | batch_end = (itr + 1) * self._batch_size
31 | batch_ids = self._ids[batch_start:batch_end]
32 | batch = [d[batch_ids] for d in self._inputs]
33 | yield list(batch) + list(self._extra_inputs)
34 | if update:
35 | self.update()
36 |
37 | def update(self):
38 | np.random.shuffle(self._ids)
39 |
--------------------------------------------------------------------------------
/rllab/rllab/plotter/__init__.py:
--------------------------------------------------------------------------------
1 | from .plotter import *
2 |
--------------------------------------------------------------------------------
/rllab/rllab/plotter/plotter.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | from queue import Empty
3 | from multiprocessing import Process, Queue
4 | from rllab.sampler.utils import rollout
5 | import numpy as np
6 |
7 | __all__ = [
8 | 'init_worker',
9 | 'init_plot',
10 | 'update_plot'
11 | ]
12 |
13 | process = None
14 | queue = None
15 |
16 |
17 | def _worker_start():
18 | env = None
19 | policy = None
20 | max_length = None
21 | try:
22 | while True:
23 | msgs = {}
24 | # Only fetch the last message of each type
25 | while True:
26 | try:
27 | msg = queue.get_nowait()
28 | msgs[msg[0]] = msg[1:]
29 | except Empty:
30 | break
31 | if 'stop' in msgs:
32 | break
33 | elif 'update' in msgs:
34 | env, policy = msgs['update']
35 | # env.start_viewer()
36 | elif 'demo' in msgs:
37 | param_values, max_length = msgs['demo']
38 | policy.set_param_values(param_values)
39 | rollout(env, policy, max_path_length=max_length, animated=True, speedup=5)
40 | else:
41 | if max_length:
42 | rollout(env, policy, max_path_length=max_length, animated=True, speedup=5)
43 | except KeyboardInterrupt:
44 | pass
45 |
46 |
47 | def _shutdown_worker():
48 | if process:
49 | queue.put(['stop'])
50 | queue.close()
51 | process.join()
52 |
53 |
54 | def init_worker():
55 | global process, queue
56 | queue = Queue()
57 | process = Process(target=_worker_start)
58 | process.start()
59 | atexit.register(_shutdown_worker)
60 |
61 |
62 | def init_plot(env, policy):
63 | queue.put(['update', env, policy])
64 |
65 |
66 | def update_plot(policy, max_length=np.inf):
67 | queue.put(['demo', policy.get_param_values(), max_length])
68 |
--------------------------------------------------------------------------------
/rllab/rllab/policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/policies/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/policies/base.py:
--------------------------------------------------------------------------------
1 | from rllab.core.parameterized import Parameterized
2 |
3 |
4 | class Policy(Parameterized):
5 | def __init__(self, env_spec):
6 | Parameterized.__init__(self)
7 | self._env_spec = env_spec
8 |
9 | # Should be implemented by all policies
10 |
11 | def get_action(self, observation):
12 | raise NotImplementedError
13 |
14 | def reset(self):
15 | pass
16 |
17 | @property
18 | def observation_space(self):
19 | return self._env_spec.observation_space
20 |
21 | @property
22 | def action_space(self):
23 | return self._env_spec.action_space
24 |
25 | @property
26 | def recurrent(self):
27 | """
28 | Indicates whether the policy is recurrent.
29 | :return:
30 | """
31 | return False
32 |
33 | def log_diagnostics(self, paths):
34 | """
35 | Log extra information per iteration based on the collected paths
36 | """
37 | pass
38 |
39 | @property
40 | def state_info_keys(self):
41 | """
42 | Return keys for the information related to the policy's state when taking an action.
43 | :return:
44 | """
45 | return list()
46 |
47 | def terminate(self):
48 | """
49 | Clean up operation
50 | """
51 | pass
52 |
53 |
54 | class StochasticPolicy(Policy):
55 |
56 | @property
57 | def distribution(self):
58 | """
59 | :rtype Distribution
60 | """
61 | raise NotImplementedError
62 |
63 | def dist_info_sym(self, obs_var, state_info_vars):
64 | """
65 | Return the symbolic distribution information about the actions.
66 | :param obs_var: symbolic variable for observations
67 | :param state_info_vars: a dictionary whose values should contain information about the state of the policy at
68 | the time it received the observation
69 | :return:
70 | """
71 | raise NotImplementedError
72 |
73 | def dist_info(self, obs, state_infos):
74 | """
75 | Return the distribution information about the actions.
76 | :param obs_var: observation values
77 | :param state_info_vars: a dictionary whose values should contain information about the state of the policy at
78 | the time it received the observation
79 | :return:
80 | """
81 | raise NotImplementedError
82 |
--------------------------------------------------------------------------------
/rllab/rllab/policies/uniform_control_policy.py:
--------------------------------------------------------------------------------
1 | from rllab.core.parameterized import Parameterized
2 | from rllab.core.serializable import Serializable
3 | from rllab.distributions.delta import Delta
4 | from rllab.policies.base import Policy
5 | from rllab.misc.overrides import overrides
6 |
7 |
8 | class UniformControlPolicy(Policy):
9 | def __init__(
10 | self,
11 | env_spec,
12 | ):
13 | Serializable.quick_init(self, locals())
14 | super(UniformControlPolicy, self).__init__(env_spec=env_spec)
15 |
16 | @overrides
17 | def get_action(self, observation):
18 | return self.action_space.sample(), dict()
19 |
20 | def get_params_internal(self, **tags):
21 | return []
22 |
23 | def get_actions(self, observations):
24 | return self.action_space.sample_n(len(observations)), dict()
25 |
26 | @property
27 | def vectorized(self):
28 | return True
29 |
30 | def reset(self, dones=None):
31 | pass
32 |
33 | @property
34 | def distribution(self):
35 | # Just a placeholder
36 | return Delta()
37 |
--------------------------------------------------------------------------------
/rllab/rllab/q_functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/q_functions/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/q_functions/base.py:
--------------------------------------------------------------------------------
1 | from rllab.core.parameterized import Parameterized
2 |
3 |
4 | class QFunction(Parameterized):
5 | pass
6 |
--------------------------------------------------------------------------------
/rllab/rllab/regressors/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 |
--------------------------------------------------------------------------------
/rllab/rllab/regressors/product_regressor.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | import numpy as np
5 | from rllab.core.serializable import Serializable
6 |
7 |
8 | class ProductRegressor(Serializable):
9 | """
10 | A class for performing MLE regression by fitting a product distribution to the outputs. A separate regressor will
11 | be trained for each individual input distribution.
12 | """
13 |
14 | def __init__(self, regressors):
15 | """
16 | :param regressors: List of individual regressors
17 | """
18 | Serializable.quick_init(self, locals())
19 | self.regressors = regressors
20 | self.output_dims = [x.output_dim for x in regressors]
21 |
22 | def _split_ys(self, ys):
23 | ys = np.asarray(ys)
24 | split_ids = np.cumsum(self.output_dims)[:-1]
25 | return np.split(ys, split_ids, axis=1)
26 |
27 | def fit(self, xs, ys):
28 | for regressor, split_ys in zip(self.regressors, self._split_ys(ys)):
29 | regressor.fit(xs, split_ys)
30 |
31 | def predict(self, xs):
32 | return np.concatenate([
33 | regressor.predict(xs) for regressor in self.regressors
34 | ], axis=1)
35 |
36 | def sample_predict(self, xs):
37 | return np.concatenate([
38 | regressor.sample_predict(xs) for regressor in self.regressors
39 | ], axis=1)
40 |
41 | def predict_log_likelihood(self, xs, ys):
42 | return np.sum([
43 | regressor.predict_log_likelihood(xs, split_ys)
44 | for regressor, split_ys in zip(self.regressors, self._split_ys(ys))
45 | ], axis=0)
46 |
47 | def get_param_values(self, **tags):
48 | return np.concatenate(
49 | [regressor.get_param_values(**tags) for regressor in self.regressors]
50 | )
51 |
52 | def set_param_values(self, flattened_params, **tags):
53 | param_dims = [
54 | np.prod(regressor.get_param_shapes(**tags))
55 | for regressor in self.regressors
56 | ]
57 | split_ids = np.cumsum(param_dims)[:-1]
58 | for regressor, split_param_values in zip(self.regressors, np.split(flattened_params, split_ids)):
59 | regressor.set_param_values(split_param_values)
60 |
--------------------------------------------------------------------------------
/rllab/rllab/sampler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/sampler/__init__.py
--------------------------------------------------------------------------------
/rllab/rllab/sampler/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from rllab.misc import tensor_utils
3 | import time
4 |
5 |
6 | def rollout(env, agent, max_path_length=np.inf, animated=False, speedup=1, save_video=True, video_filename='sim_out.mp4', reset_args=None, policy_contexts=None):
7 | observations = []
8 | actions = []
9 | rewards = []
10 | agent_infos = []
11 | env_infos = []
12 | images = []
13 | o = env.reset(reset_args=reset_args, policy_contexts=policy_contexts)
14 | agent.reset()
15 | path_length = 0
16 | if animated:
17 | env.render()
18 | while path_length < max_path_length:
19 | a, agent_info = agent.get_action(o)
20 | next_o, r, d, env_info = env.step(a)
21 | observations.append(env.observation_space.flatten(o))
22 | rewards.append(r)
23 | actions.append(env.action_space.flatten(a))
24 | agent_infos.append(agent_info)
25 | env_infos.append(env_info)
26 | path_length += 1
27 | if d: # and not animated: # TODO testing
28 | break
29 | o = next_o
30 | if animated:
31 | env.render()
32 | timestep = 0.05
33 | time.sleep(timestep / speedup)
34 | if save_video:
35 | from PIL import Image
36 | image = env.wrapped_env.wrapped_env.get_viewer().get_image()
37 | pil_image = Image.frombytes('RGB', (image[1], image[2]), image[0])
38 | images.append(np.flipud(np.array(pil_image)))
39 |
40 | if animated:
41 | if save_video and len(images) >= max_path_length:
42 | import moviepy.editor as mpy
43 | clip = mpy.ImageSequenceClip(images, fps=20*speedup)
44 | if video_filename[-3:] == 'gif':
45 | clip.write_gif(video_filename, fps=20*speedup)
46 | else:
47 | clip.write_videofile(video_filename, fps=20*speedup)
48 | #return
49 |
50 | return dict(
51 | observations=tensor_utils.stack_tensor_list(observations),
52 | actions=tensor_utils.stack_tensor_list(actions),
53 | rewards=tensor_utils.stack_tensor_list(rewards),
54 | agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos),
55 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos),
56 | )
--------------------------------------------------------------------------------
/rllab/rllab/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .product import Product
2 | from .discrete import Discrete
3 | from .box import Box
4 |
5 | __all__ = ["Product", "Discrete", "Box"]
--------------------------------------------------------------------------------
/rllab/rllab/spaces/base.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Space(object):
5 | """
6 | Provides a classification state spaces and action spaces,
7 | so you can write generic code that applies to any Environment.
8 | E.g. to choose a random action.
9 | """
10 |
11 | def sample(self, seed=0):
12 | """
13 | Uniformly randomly sample a random elemnt of this space
14 | """
15 | raise NotImplementedError
16 |
17 | def contains(self, x):
18 | """
19 | Return boolean specifying if x is a valid
20 | member of this space
21 | """
22 | raise NotImplementedError
23 |
24 | def flatten(self, x):
25 | raise NotImplementedError
26 |
27 | def unflatten(self, x):
28 | raise NotImplementedError
29 |
30 | def flatten_n(self, xs):
31 | raise NotImplementedError
32 |
33 | def unflatten_n(self, xs):
34 | raise NotImplementedError
35 |
36 | @property
37 | def flat_dim(self):
38 | """
39 | The dimension of the flattened vector of the tensor representation
40 | """
41 | raise NotImplementedError
42 |
43 | def new_tensor_variable(self, name, extra_dims):
44 | """
45 | Create a Theano tensor variable given the name and extra dimensions prepended
46 | :param name: name of the variable
47 | :param extra_dims: extra dimensions in the front
48 | :return: the created tensor variable
49 | """
50 | raise NotImplementedError
51 |
--------------------------------------------------------------------------------
/rllab/rllab/spaces/box.py:
--------------------------------------------------------------------------------
1 | from rllab.core.serializable import Serializable
2 | from .base import Space
3 | import numpy as np
4 | from rllab.misc import ext
5 | import theano
6 |
7 |
8 | class Box(Space):
9 | """
10 | A box in R^n.
11 | I.e., each coordinate is bounded.
12 | """
13 |
14 | def __init__(self, low, high, shape=None):
15 | """
16 | Two kinds of valid input:
17 | Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
18 | Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
19 | """
20 | if shape is None:
21 | assert low.shape == high.shape
22 | self.low = low
23 | self.high = high
24 | else:
25 | assert np.isscalar(low) and np.isscalar(high)
26 | self.low = low + np.zeros(shape)
27 | self.high = high + np.zeros(shape)
28 |
29 | def sample(self):
30 | return np.random.uniform(low=self.low, high=self.high, size=self.low.shape)
31 |
32 | def contains(self, x):
33 | return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()
34 |
35 | @property
36 | def shape(self):
37 | return self.low.shape
38 |
39 | @property
40 | def flat_dim(self):
41 | return np.prod(self.low.shape)
42 |
43 | @property
44 | def bounds(self):
45 | return self.low, self.high
46 |
47 | def flatten(self, x):
48 | return np.asarray(x).flatten()
49 |
50 | def unflatten(self, x):
51 | return np.asarray(x).reshape(self.shape)
52 |
53 | def flatten_n(self, xs):
54 | xs = np.asarray(xs)
55 | return xs.reshape((xs.shape[0], -1))
56 |
57 | def unflatten_n(self, xs):
58 | xs = np.asarray(xs)
59 | return xs.reshape((xs.shape[0],) + self.shape)
60 |
61 | def __repr__(self):
62 | return "Box" + str(self.shape)
63 |
64 | def __eq__(self, other):
65 | return isinstance(other, Box) and np.allclose(self.low, other.low) and \
66 | np.allclose(self.high, other.high)
67 |
68 | def __hash__(self):
69 | return hash((self.low, self.high))
70 |
71 | def new_tensor_variable(self, name, extra_dims):
72 | return ext.new_tensor(
73 | name=name,
74 | ndim=extra_dims+1,
75 | dtype=theano.config.floatX
76 | )
77 |
78 |
--------------------------------------------------------------------------------
/rllab/rllab/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | from .base import Space
2 | import numpy as np
3 | from rllab.misc import special
4 | from rllab.misc import ext
5 |
6 |
7 | class Discrete(Space):
8 | """
9 | {0,1,...,n-1}
10 | """
11 |
12 | def __init__(self, n):
13 | self._n = n
14 |
15 | @property
16 | def n(self):
17 | return self._n
18 |
19 | def sample(self):
20 | return np.random.randint(self.n)
21 |
22 | def contains(self, x):
23 | x = np.asarray(x)
24 | return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n
25 |
26 | def __repr__(self):
27 | return "Discrete(%d)" % self.n
28 |
29 | def __eq__(self, other):
30 | return self.n == other.n
31 |
32 | def flatten(self, x):
33 | return special.to_onehot(x, self.n)
34 |
35 | def unflatten(self, x):
36 | return special.from_onehot(x)
37 |
38 | def flatten_n(self, x):
39 | return special.to_onehot_n(x, self.n)
40 |
41 | def unflatten_n(self, x):
42 | return special.from_onehot_n(x)
43 |
44 | @property
45 | def flat_dim(self):
46 | return self.n
47 |
48 | def weighted_sample(self, weights):
49 | return special.weighted_sample(weights, range(self.n))
50 |
51 | @property
52 | def default_value(self):
53 | return 0
54 |
55 | def new_tensor_variable(self, name, extra_dims):
56 | if self.n <= 2 ** 8:
57 | return ext.new_tensor(
58 | name=name,
59 | ndim=extra_dims+1,
60 | dtype='uint8'
61 | )
62 | elif self.n <= 2 ** 16:
63 | return ext.new_tensor(
64 | name=name,
65 | ndim=extra_dims+1,
66 | dtype='uint16'
67 | )
68 | else:
69 | return ext.new_tensor(
70 | name=name,
71 | ndim=extra_dims+1,
72 | dtype='uint32'
73 | )
74 |
75 | def __eq__(self, other):
76 | if not isinstance(other, Discrete):
77 | return False
78 | return self.n == other.n
79 |
80 | def __hash__(self):
81 | return hash(self.n)
--------------------------------------------------------------------------------
/rllab/rllab/viskit/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/__init__.py
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/__init__.py
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/__init__.py
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/npg.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/trpo.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from sandbox.rocky.tf.algos.npo import NPO
4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
5 |
6 |
7 | class TRPO(NPO):
8 | """
9 | Trust Region Policy Optimization
10 | """
11 |
12 | def __init__(
13 | self,
14 | optimizer=None,
15 | optimizer_args=None,
16 | **kwargs):
17 | if optimizer is None:
18 | if optimizer_args is None:
19 | optimizer_args = dict()
20 | optimizer = ConjugateGradientOptimizer(**optimizer_args)
21 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
22 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/core/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/core/layers_powered.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.core.parameterized import Parameterized
2 | import sandbox.rocky.tf.core.layers as L
3 | import itertools
4 |
5 |
6 | class LayersPowered(Parameterized):
7 |
8 | def __init__(self, output_layers, input_layers=None):
9 | self._output_layers = output_layers
10 | self._input_layers = input_layers
11 | Parameterized.__init__(self)
12 |
13 | def get_params_internal(self, **tags):
14 | layers = L.get_all_layers(self._output_layers, treat_as_input=self._input_layers)
15 | params = itertools.chain.from_iterable(l.get_params(**tags) for l in layers)
16 | return L.unique(params)
17 |
18 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/base.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | class Distribution(object):
6 | @property
7 | def dim(self):
8 | raise NotImplementedError
9 |
10 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
11 | """
12 | Compute the symbolic KL divergence of two distributions
13 | """
14 | raise NotImplementedError
15 |
16 | def kl(self, old_dist_info, new_dist_info):
17 | """
18 | Compute the KL divergence of two distributions
19 | """
20 | raise NotImplementedError
21 |
22 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
23 | raise NotImplementedError
24 |
25 | def entropy(self, dist_info):
26 | raise NotImplementedError
27 |
28 | def log_likelihood_sym(self, x_var, dist_info_vars):
29 | raise NotImplementedError
30 |
31 | def log_likelihood(self, xs, dist_info):
32 | raise NotImplementedError
33 |
34 | @property
35 | def dist_info_specs(self):
36 | raise NotImplementedError
37 |
38 | @property
39 | def dist_info_keys(self):
40 | return [k for k, _ in self.dist_info_specs]
41 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/bernoulli.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from .base import Distribution
4 | import tensorflow as tf
5 | import numpy as np
6 |
7 | TINY = 1e-8
8 |
9 |
10 | class Bernoulli(Distribution):
11 | def __init__(self, dim):
12 | self._dim = dim
13 |
14 | @property
15 | def dim(self):
16 | return self._dim
17 |
18 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
19 | old_p = old_dist_info_vars["p"]
20 | new_p = new_dist_info_vars["p"]
21 | kl = old_p * (tf.log(old_p + TINY) - tf.log(new_p + TINY)) + \
22 | (1 - old_p) * (tf.log(1 - old_p + TINY) - tf.log(1 - new_p + TINY))
23 | ndims = kl.get_shape().ndims
24 | return tf.reduce_sum(kl, axis=ndims - 1)
25 |
26 | def kl(self, old_dist_info, new_dist_info):
27 | old_p = old_dist_info["p"]
28 | new_p = new_dist_info["p"]
29 | kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \
30 | (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
31 | return np.sum(kl, axis=-1)
32 |
33 | def sample(self, dist_info):
34 | p = np.asarray(dist_info["p"])
35 | return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p)
36 |
37 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
38 | old_p = old_dist_info_vars["p"]
39 | new_p = new_dist_info_vars["p"]
40 | ndims = old_p.get_shape().ndims
41 | return tf.reduce_prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
42 | axis=ndims - 1)
43 |
44 | def log_likelihood_sym(self, x_var, dist_info_vars):
45 | p = dist_info_vars["p"]
46 | ndims = p.get_shape().ndims
47 | return tf.reduce_sum(x_var * tf.log(p + TINY) + (1 - x_var) * tf.log(1 - p + TINY), axis=ndims - 1)
48 |
49 | def log_likelihood(self, xs, dist_info):
50 | p = dist_info["p"]
51 | return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)
52 |
53 | def entropy(self, dist_info):
54 | p = dist_info["p"]
55 | return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)
56 |
57 | @property
58 | def dist_info_keys(self):
59 | return ["p"]
60 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | from sandbox.rocky.tf.distributions.diagonal_gaussian import DiagonalGaussian
5 |
6 | RecurrentDiagonalGaussian = DiagonalGaussian
7 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/envs/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/envs/vec_env_executor.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pickle as pickle
3 | from sandbox.rocky.tf.misc import tensor_utils
4 |
5 |
6 | class VecEnvExecutor(object):
7 | def __init__(self, envs, max_path_length):
8 | self.envs = envs
9 | self._action_space = envs[0].action_space
10 | self._observation_space = envs[0].observation_space
11 | self.ts = np.zeros(len(self.envs), dtype='int')
12 | self.max_path_length = max_path_length
13 |
14 | def step(self, action_n, reset_args=None, policy_contexts=None):
15 | if reset_args is None:
16 | policy_contexts = [None]*len(self.envs)
17 | reset_args = [None]*len(self.envs)
18 | all_results = [env.step(a) for (a, env) in zip(action_n, self.envs)]
19 | obs, rewards, dones, env_infos = list(map(list, list(zip(*all_results))))
20 | dones = np.asarray(dones)
21 | rewards = np.asarray(rewards)
22 | self.ts += 1
23 | if self.max_path_length is not None:
24 | dones[self.ts >= self.max_path_length] = True
25 | for (i, done) in enumerate(dones):
26 | if done:
27 | obs[i] = self.envs[i].reset(reset_args=reset_args[i], policy_contexts=policy_contexts[i])
28 | self.ts[i] = 0
29 | return obs, rewards, dones, tensor_utils.stack_tensor_dict_list(env_infos)
30 |
31 | def reset(self, reset_args=None, policy_contexts=None):
32 | if reset_args is not None:
33 | assert policy_contexts is not None
34 | results = [env.reset(reset_args=arg, policy_contexts=policy_context) for env, arg, policy_context in zip(self.envs, reset_args, policy_contexts)]
35 | else:
36 | results = [env.reset() for env in self.envs]
37 | self.ts[:] = 0
38 | return results
39 |
40 | @property
41 | def num_envs(self):
42 | return len(self.envs)
43 |
44 | @property
45 | def action_space(self):
46 | return self._action_space
47 |
48 | @property
49 | def observation_space(self):
50 | return self._observation_space
51 |
52 | def terminate(self):
53 | pass
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/trpo_cartpole.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
6 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp
7 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
8 | from sandbox.rocky.tf.envs.base import TfEnv
9 | from rllab.misc.instrument import stub, run_experiment_lite
10 |
11 | env = TfEnv(normalize(CartpoleEnv()))
12 |
13 | policy = GaussianMLPPolicy(
14 | name="policy",
15 | env_spec=env.spec,
16 | # The neural network policy should have two hidden layers, each with 32 hidden units.
17 | hidden_sizes=(32, 32)
18 | )
19 |
20 | baseline = LinearFeatureBaseline(env_spec=env.spec)
21 |
22 | algo = TRPO(
23 | env=env,
24 | policy=policy,
25 | baseline=baseline,
26 | batch_size=4000,
27 | max_path_length=100,
28 | n_itr=40,
29 | discount=0.99,
30 | step_size=0.01,
31 | # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
32 |
33 | )
34 | algo.train()
35 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.algos.trpo import TRPO
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from sandbox.rocky.tf.policies.gaussian_gru_policy import GaussianGRUPolicy
6 | from sandbox.rocky.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy
7 | from sandbox.rocky.tf.envs.base import TfEnv
8 | import sandbox.rocky.tf.core.layers as L
9 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp
10 | from rllab.misc.instrument import stub, run_experiment_lite
11 |
12 | env = TfEnv(normalize(CartpoleEnv()))
13 |
14 | policy = GaussianLSTMPolicy(
15 | name="policy",
16 | env_spec=env.spec,
17 | lstm_layer_cls=L.TfBasicLSTMLayer,
18 | # gru_layer_cls=L.GRULayer,
19 | )
20 |
21 | baseline = LinearFeatureBaseline(env_spec=env.spec)
22 |
23 | algo = TRPO(
24 | env=env,
25 | policy=policy,
26 | baseline=baseline,
27 | batch_size=4000,
28 | max_path_length=100,
29 | n_itr=10,
30 | discount=0.99,
31 | step_size=0.01,
32 | optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
33 | )
34 | algo.train()
35 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/vpg_cartpole.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.algos.vpg import VPG
2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
4 | from rllab.envs.normalized_env import normalize
5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 | from sandbox.rocky.tf.envs.base import TfEnv
7 | from rllab.misc.instrument import stub, run_experiment_lite
8 |
9 | env = TfEnv(normalize(CartpoleEnv()))
10 |
11 | policy = GaussianMLPPolicy(
12 | name="policy",
13 | env_spec=env.spec,
14 | # The neural network policy should have two hidden layers, each with 32 hidden units.
15 | hidden_sizes=(32, 32)
16 | )
17 |
18 | baseline = LinearFeatureBaseline(env_spec=env.spec)
19 |
20 | algo = VPG(
21 | env=env,
22 | policy=policy,
23 | baseline=baseline,
24 | batch_size=10000,
25 | max_path_length=100,
26 | n_itr=40,
27 | discount=0.99,
28 | optimizer_args=dict(
29 | tf_optimizer_args=dict(
30 | learning_rate=0.01,
31 | )
32 | )
33 | )
34 | algo.train()
35 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/misc/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/policies/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/policies/uniform_control_policy.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.policies.base import Policy
2 | from rllab.core.serializable import Serializable
3 |
4 |
5 | class UniformControlPolicy(Policy, Serializable):
6 | def __init__(
7 | self,
8 | env_spec,
9 | ):
10 | Serializable.quick_init(self, locals())
11 | super(UniformControlPolicy, self).__init__(env_spec=env_spec)
12 |
13 | @property
14 | def vectorized(self):
15 | return True
16 |
17 | def get_action(self, observation):
18 | return self.action_space.sample(), dict()
19 |
20 | def get_actions(self, observations):
21 | return self.action_space.sample_n(len(observations)), dict()
22 |
23 | def get_params_internal(self, **tags):
24 | return []
25 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/q_functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/q_functions/__init__.py
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/q_functions/base.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.core.parameterized import Parameterized
2 |
3 | class QFunction(Parameterized):
4 | pass
5 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/regressors/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/samplers/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .product import Product
2 | from .discrete import Discrete
3 | from .box import Box
4 |
5 | __all__ = ["Product", "Discrete", "Box"]
6 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/box.py:
--------------------------------------------------------------------------------
1 | from rllab.spaces.box import Box as TheanoBox
2 | import tensorflow as tf
3 |
4 |
5 | class Box(TheanoBox):
6 | def new_tensor_variable(self, name, extra_dims, flatten=True):
7 | if flatten:
8 | return tf.placeholder(tf.float32, shape=[None] * extra_dims + [self.flat_dim], name=name)
9 | return tf.placeholder(tf.float32, shape=[None] * extra_dims + list(self.shape), name=name)
10 |
11 | @property
12 | def dtype(self):
13 | return tf.float32
14 |
--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | from rllab.spaces.base import Space
2 | import numpy as np
3 | from rllab.misc import special
4 | from rllab.misc import ext
5 | import tensorflow as tf
6 |
7 |
8 | class Discrete(Space):
9 | """
10 | {0,1,...,n-1}
11 | """
12 |
13 | def __init__(self, n):
14 | self._n = n
15 |
16 | @property
17 | def n(self):
18 | return self._n
19 |
20 | def sample(self):
21 | return np.random.randint(self.n)
22 |
23 | def sample_n(self, n):
24 | return np.random.randint(low=0, high=self.n, size=n)
25 |
26 | def contains(self, x):
27 | x = np.asarray(x)
28 | return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n
29 |
30 | def __repr__(self):
31 | return "Discrete(%d)" % self.n
32 |
33 | def __eq__(self, other):
34 | return self.n == other.n
35 |
36 | def flatten(self, x):
37 | return special.to_onehot(x, self.n)
38 |
39 | def unflatten(self, x):
40 | return special.from_onehot(x)
41 |
42 | def flatten_n(self, x):
43 | return special.to_onehot_n(x, self.n)
44 |
45 | def unflatten_n(self, x):
46 | return special.from_onehot_n(x)
47 |
48 | @property
49 | def default_value(self):
50 | return 0
51 |
52 | @property
53 | def flat_dim(self):
54 | return self.n
55 |
56 | def weighted_sample(self, weights):
57 | return special.weighted_sample(weights, range(self.n))
58 |
59 | def new_tensor_variable(self, name, extra_dims):
60 | # needed for safe conversion to float32
61 | return tf.placeholder(dtype=tf.uint8, shape=[None] * extra_dims + [self.flat_dim], name=name)
62 |
63 | @property
64 | def dtype(self):
65 | return tf.uint8
66 |
67 | def __eq__(self, other):
68 | if not isinstance(other, Discrete):
69 | return False
70 | return self.n == other.n
71 |
72 | def __hash__(self):
73 | return hash(self.n)
74 |
75 |
--------------------------------------------------------------------------------
/rllab/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/scripts/__init__.py
--------------------------------------------------------------------------------
/rllab/scripts/resume_training.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | from rllab.sampler.utils import rollout
5 | from rllab.algos.batch_polopt import BatchPolopt
6 | import argparse
7 | import joblib
8 | import uuid
9 | import os
10 | import random
11 | import numpy as np
12 | import json
13 | import subprocess
14 | from rllab.misc import logger
15 | from rllab.misc.instrument import to_local_command
16 |
17 | filename = str(uuid.uuid4())
18 |
19 | if __name__ == "__main__":
20 |
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument('file', type=str,
23 | help='path to the snapshot file')
24 | parser.add_argument('--log_dir', type=str, default=None,
25 | help='path to the new log directory')
26 | # Look for params.json file
27 | args = parser.parse_args()
28 | parent_dir = os.path.dirname(os.path.realpath(args.file))
29 | json_file_path = os.path.join(parent_dir, "params.json")
30 | logger.log("Looking for params.json at %s..." % json_file_path)
31 | try:
32 | with open(json_file_path, "r") as f:
33 | params = json.load(f)
34 | # exclude certain parameters
35 | excluded = ['json_args']
36 | for k in excluded:
37 | if k in params:
38 | del params[k]
39 | for k, v in list(params.items()):
40 | if v is None:
41 | del params[k]
42 | if args.log_dir is not None:
43 | params['log_dir'] = args.log_dir
44 | params['resume_from'] = args.file
45 | command = to_local_command(params, script='scripts/run_experiment_lite.py')
46 | print(command)
47 | try:
48 | subprocess.call(command, shell=True, env=os.environ)
49 | except Exception as e:
50 | print(e)
51 | if isinstance(e, KeyboardInterrupt):
52 | raise
53 | except IOError as e:
54 | logger.log("Failed to find json file. Continuing in non-stub mode...")
55 | data = joblib.load(args.file)
56 | assert 'algo' in data
57 | algo = data['algo']
58 | assert isinstance(algo, BatchPolopt)
59 | algo.train()
60 |
--------------------------------------------------------------------------------
/rllab/scripts/setup_linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Make sure that conda is available
3 |
4 | hash conda 2>/dev/null || {
5 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer."
6 | exit 0
7 | }
8 |
9 | echo "Installing system dependencies"
10 | echo "You will probably be asked for your sudo password."
11 | sudo apt-get update
12 | sudo apt-get install -y python-pip python-dev swig cmake build-essential zlib1g-dev
13 | sudo apt-get build-dep -y python-pygame
14 | sudo apt-get build-dep -y python-scipy
15 |
16 | # Make sure that we're under the directory of the project
17 | cd "$(dirname "$0")/.."
18 |
19 | echo "Creating conda environment..."
20 | conda env create -f environment.yml
21 | conda env update
22 |
23 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab."
24 |
--------------------------------------------------------------------------------
/rllab/scripts/setup_mujoco.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "$(uname)" == "Darwin" ]; then
4 | mujoco_file="libmujoco131.dylib"
5 | glfw_file="libglfw.3.dylib"
6 | zip_file="mjpro131_osx.zip"
7 | mktemp_cmd="mktemp -d /tmp/mujoco"
8 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
9 | mujoco_file="libmujoco131.so"
10 | glfw_file="libglfw.so.3"
11 | zip_file="mjpro131_linux.zip"
12 | mktemp_cmd="mktemp -d"
13 | fi
14 |
15 | if [ ! -f vendor/mujoco/$mujoco_file ]; then
16 | read -e -p "Please enter the path to the mujoco zip file [$zip_file]:" path
17 | path=${path:-$zip_file}
18 | eval path=\"$path\"
19 | if [ ! -f $path ]; then
20 | echo "No file found at $path"
21 | exit 0
22 | fi
23 | rm -r /tmp/mujoco
24 | dir=`$mktemp_cmd`
25 | unzip $path -d $dir
26 | if [ ! -f $dir/mjpro131/bin/$mujoco_file ]; then
27 | echo "mjpro/$mujoco_file not found. Make sure you have the correct file (most likely named $zip_file)"
28 | exit 0
29 | fi
30 | if [ ! -f $dir/mjpro131/bin/$glfw_file ]; then
31 | echo "mjpro/$glfw_file not found. Make sure you have the correct file (most likely named $zip_file)"
32 | exit 0
33 | fi
34 |
35 | mkdir -p vendor/mujoco
36 | cp $dir/mjpro131/bin/$mujoco_file vendor/mujoco/
37 | cp $dir/mjpro131/bin/$glfw_file vendor/mujoco/
38 | fi
39 |
40 | if [ ! -f vendor/mujoco/mjkey.txt ]; then
41 | read -e -p "Please enter the path to the mujoco license file [mjkey.txt]:" path
42 | path=${path:-mjkey.txt}
43 | eval path=$path
44 | if [ ! -f $path ]; then
45 | echo "No file found at $path"
46 | exit 0
47 | fi
48 | cp $path vendor/mujoco/mjkey.txt
49 | fi
50 |
51 | echo "Mujoco has been set up!"
52 |
--------------------------------------------------------------------------------
/rllab/scripts/setup_osx.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Make sure that pip is available
3 | hash brew 2>/dev/null || {
4 | echo "Please install homebrew before continuing. You can use the following command to install:"
5 | echo "/usr/bin/ruby -e \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)\""
6 | exit 0
7 | }
8 |
9 | hash conda 2>/dev/null || {
10 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer."
11 | exit 0
12 | }
13 |
14 |
15 | echo "Installing system dependencies"
16 | echo "You will probably be asked for your sudo password."
17 |
18 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi
19 |
20 | # Make sure that we're under the directory of the project
21 | cd "$(dirname "$0")/.."
22 | echo "Creating conda environment..."
23 | conda env create -f environment.yml
24 | conda env update
25 |
26 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab."
27 |
--------------------------------------------------------------------------------
/rllab/scripts/sim_policy.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import joblib
4 | import tensorflow as tf
5 |
6 | from rllab.misc.console import query_yes_no
7 | from rllab.sampler.utils import rollout
8 |
9 | if __name__ == "__main__":
10 |
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument('file', type=str,
13 | help='path to the snapshot file')
14 | parser.add_argument('--max_path_length', type=int, default=1000,
15 | help='Max length of rollout')
16 | parser.add_argument('--speedup', type=float, default=1,
17 | help='Speedup')
18 | args = parser.parse_args()
19 |
20 | # If the snapshot file use tensorflow, do:
21 | # import tensorflow as tf
22 | # with tf.Session():
23 | # [rest of the code]
24 | with tf.Session() as sess:
25 | data = joblib.load(args.file)
26 | policy = data['policy']
27 | env = data['env']
28 | while True:
29 | path = rollout(env, policy, max_path_length=args.max_path_length,
30 | animated=True, speedup=args.speedup)
31 | if not query_yes_no('Continue simulation?'):
32 | break
33 |
--------------------------------------------------------------------------------
/rllab/scripts/submit_gym.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import argparse
4 | import os
5 | import os.path as osp
6 | import gym
7 | from rllab.viskit.core import load_params
8 |
9 | if __name__ == "__main__":
10 | # rl_gym.api_key = 'g8JOpnNVmcjMShBiFtyji2VWX3P2uCzc'
11 | if 'OPENAI_GYM_API_KEY' not in os.environ:
12 | raise ValueError("OpenAi Gym API key not configured. Please register an account on https://gym.openai.com and"
13 | " set the OPENAI_GYM_API_KEY environment variable, and try the script again.")
14 |
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('log_dir', type=str,
17 | help='path to the logging directory')
18 | parser.add_argument('--algorithm_id', type=str, default=None, help='Algorithm ID')
19 | args = parser.parse_args()
20 | snapshot_dir = osp.abspath(osp.join(args.log_dir, ".."))
21 | params_file_path = osp.join(snapshot_dir, "params.json")
22 | gym.upload(args.log_dir, algorithm_id=args.algorithm_id)
23 |
--------------------------------------------------------------------------------
/rllab/scripts/sync_s3.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('.')
3 | from rllab import config
4 | import os
5 | import argparse
6 | import ast
7 |
8 | if __name__ == "__main__":
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('folder', type=str, default=None, nargs='?')
11 | parser.add_argument('--dry', action='store_true', default=False)
12 | parser.add_argument('--bare', action='store_true', default=False)
13 | args = parser.parse_args()
14 | remote_dir = config.AWS_S3_PATH
15 | local_dir = os.path.join(config.LOG_DIR, "s3")
16 | if args.folder:
17 | remote_dir = os.path.join(remote_dir, args.folder)
18 | local_dir = os.path.join(local_dir, args.folder)
19 | if args.bare:
20 | command = ("""
21 | aws s3 sync {remote_dir} {local_dir} --exclude '*' --include '*.csv' --include '*.json' --content-type "UTF-8"
22 | """.format(local_dir=local_dir, remote_dir=remote_dir))
23 | else:
24 | command = ("""
25 | aws s3 sync {remote_dir} {local_dir} --exclude '*stdout.log' --exclude '*stdouterr.log' --content-type "UTF-8"
26 | """.format(local_dir=local_dir, remote_dir=remote_dir))
27 | if args.dry:
28 | print(command)
29 | else:
30 | os.system(command)
--------------------------------------------------------------------------------
/rllab/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup,find_packages
3 |
4 | setup(
5 | name='rllab',
6 | packages=[package for package in find_packages()
7 | if package.startswith('rllab')],
8 | version='0.1.0',
9 | )
10 |
--------------------------------------------------------------------------------
/rllab/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/__init__.py
--------------------------------------------------------------------------------
/rllab/tests/algos/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/tests/algos/test_trpo.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | from rllab.envs.base import Env, Step
4 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
5 | from rllab.baselines.zero_baseline import ZeroBaseline
6 | from rllab.algos.trpo import TRPO
7 | from rllab.spaces.box import Box
8 | import lasagne.nonlinearities
9 | import numpy as np
10 | import theano.tensor as TT
11 |
12 |
13 | class DummyEnv(Env):
14 | @property
15 | def observation_space(self):
16 | return Box(low=-np.inf, high=np.inf, shape=(1,))
17 |
18 | @property
19 | def action_space(self):
20 | return Box(low=-5.0, high=5.0, shape=(1,))
21 |
22 | def reset(self):
23 | return np.zeros(1)
24 |
25 | def step(self, action):
26 | return Step(observation=np.zeros(1), reward=np.random.normal(), done=True)
27 |
28 |
29 | def naive_relu(x):
30 | return TT.max(x, 0)
31 |
32 |
33 | def test_trpo_relu_nan():
34 | env = DummyEnv()
35 | policy = GaussianMLPPolicy(
36 | env_spec=env.spec,
37 | hidden_nonlinearity=naive_relu,
38 | hidden_sizes=(1,))
39 | baseline = ZeroBaseline(env_spec=env.spec)
40 | algo = TRPO(
41 | env=env, policy=policy, baseline=baseline, n_itr=1, batch_size=1000, max_path_length=100,
42 | step_size=0.001
43 | )
44 | algo.train()
45 | assert not np.isnan(np.sum(policy.get_param_values()))
46 |
47 |
48 | def test_trpo_deterministic_nan():
49 | env = DummyEnv()
50 | policy = GaussianMLPPolicy(
51 | env_spec=env.spec,
52 | hidden_sizes=(1,))
53 | policy._l_log_std.param.set_value([np.float32(np.log(1e-8))])
54 | baseline = ZeroBaseline(env_spec=env.spec)
55 | algo = TRPO(
56 | env=env, policy=policy, baseline=baseline, n_itr=10, batch_size=1000, max_path_length=100,
57 | step_size=0.01
58 | )
59 | algo.train()
60 | assert not np.isnan(np.sum(policy.get_param_values()))
61 |
--------------------------------------------------------------------------------
/rllab/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/envs/__init__.py
--------------------------------------------------------------------------------
/rllab/tests/envs/test_maze_env.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | from rllab.envs.mujoco.maze.maze_env_utils import line_intersect, ray_segment_intersect
4 |
5 |
6 | def test_line_intersect():
7 | assert line_intersect((0, 0), (0, 1), (0, 0), (1, 0))[:2] == (0, 0)
8 | assert line_intersect((0, 0), (0, 1), (0, 0), (0, 1))[2] == 0
9 | assert ray_segment_intersect(ray=((0, 0), 0), segment=((1, -1), (1, 1))) == (1, 0)
10 | assert ray_segment_intersect(ray=((0, 0), math.pi), segment=((1, -1), (1, 1))) is None
11 |
--------------------------------------------------------------------------------
/rllab/tests/regression_tests/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/rllab/tests/regression_tests/test_issue_3.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | from nose2.tools import such
5 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
7 | from rllab.algos.trpo import TRPO
8 | from rllab.baselines.zero_baseline import ZeroBaseline
9 |
10 | with such.A("Issue #3") as it:
11 | @it.should("be fixed")
12 | def test_issue_3():
13 | """
14 | As reported in https://github.com/rllab/rllab/issues/3, the adaptive_std parameter was not functioning properly
15 | """
16 | env = CartpoleEnv()
17 | policy = GaussianMLPPolicy(
18 | env_spec=env,
19 | adaptive_std=True
20 | )
21 | baseline = ZeroBaseline(env_spec=env.spec)
22 | algo = TRPO(
23 | env=env,
24 | policy=policy,
25 | baseline=baseline,
26 | batch_size=100,
27 | n_itr=1
28 | )
29 | algo.train()
30 |
31 | it.createTests(globals())
32 |
--------------------------------------------------------------------------------
/rllab/tests/test_baselines.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | os.environ['THEANO_FLAGS'] = 'mode=FAST_COMPILE,optimizer=None'
4 |
5 | from rllab.algos.vpg import VPG
6 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
7 | from rllab.baselines.zero_baseline import ZeroBaseline
8 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
9 | from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline
10 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
11 | from nose2 import tools
12 |
13 |
14 | baselines = [ZeroBaseline, LinearFeatureBaseline, GaussianMLPBaseline]
15 |
16 |
17 | @tools.params(*baselines)
18 | def test_baseline(baseline_cls):
19 | env = CartpoleEnv()
20 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6,))
21 | baseline = baseline_cls(env_spec=env.spec)
22 | algo = VPG(
23 | env=env, policy=policy, baseline=baseline,
24 | n_itr=1, batch_size=1000, max_path_length=100
25 | )
26 | algo.train()
27 |
--------------------------------------------------------------------------------
/rllab/tests/test_instrument.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | from rllab.misc import instrument
5 | from nose2.tools import such
6 |
7 |
8 | class TestClass(object):
9 | @property
10 | def arr(self):
11 | return [1, 2, 3]
12 |
13 | @property
14 | def compound_arr(self):
15 | return [dict(a=1)]
16 |
17 |
18 | with such.A("instrument") as it:
19 | @it.should
20 | def test_concretize():
21 | it.assertEqual(instrument.concretize([5]), [5])
22 | it.assertEqual(instrument.concretize((5,)), (5,))
23 | fake_globals = dict(TestClass=TestClass)
24 | instrument.stub(fake_globals)
25 | modified = fake_globals["TestClass"]
26 | it.assertIsInstance(modified, instrument.StubClass)
27 | it.assertIsInstance(modified(), instrument.StubObject)
28 | it.assertEqual(instrument.concretize((5,)), (5,))
29 | it.assertIsInstance(instrument.concretize(modified()), TestClass)
30 |
31 |
32 | @it.should
33 | def test_chained_call():
34 | fake_globals = dict(TestClass=TestClass)
35 | instrument.stub(fake_globals)
36 | modified = fake_globals["TestClass"]
37 | it.assertIsInstance(modified().arr[0], instrument.StubMethodCall)
38 | it.assertIsInstance(modified().compound_arr[0]["a"], instrument.StubMethodCall)
39 | it.assertEqual(instrument.concretize(modified().arr[0]), 1)
40 |
41 |
42 | @it.should
43 | def test_variant_generator():
44 |
45 | vg = instrument.VariantGenerator()
46 | vg.add("key1", [1, 2, 3])
47 | vg.add("key2", [True, False])
48 | vg.add("key3", lambda key2: [1] if key2 else [1, 2])
49 | it.assertEqual(len(vg.variants()), 9)
50 |
51 | class VG(instrument.VariantGenerator):
52 |
53 | @instrument.variant
54 | def key1(self):
55 | return [1, 2, 3]
56 |
57 | @instrument.variant
58 | def key2(self):
59 | yield True
60 | yield False
61 |
62 | @instrument.variant
63 | def key3(self, key2):
64 | if key2:
65 | yield 1
66 | else:
67 | yield 1
68 | yield 2
69 |
70 | it.assertEqual(len(VG().variants()), 9)
71 |
72 | it.createTests(globals())
73 |
--------------------------------------------------------------------------------
/rllab/tests/test_networks.py:
--------------------------------------------------------------------------------
1 | def test_gru_network():
2 | from rllab.core.network import GRUNetwork
3 | import lasagne.layers as L
4 | from rllab.misc import ext
5 | import numpy as np
6 | network = GRUNetwork(
7 | input_shape=(2, 3),
8 | output_dim=5,
9 | hidden_dim=4,
10 | )
11 | f_output = ext.compile_function(
12 | inputs=[network.input_layer.input_var],
13 | outputs=L.get_output(network.output_layer)
14 | )
15 | assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5)
16 |
--------------------------------------------------------------------------------
/rllab/tests/test_sampler.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import numpy as np
4 |
5 |
6 | def test_truncate_paths():
7 | from rllab.sampler.parallel_sampler import truncate_paths
8 |
9 | paths = [
10 | dict(
11 | observations=np.zeros((100, 1)),
12 | actions=np.zeros((100, 1)),
13 | rewards=np.zeros(100),
14 | env_infos=dict(),
15 | agent_infos=dict(lala=np.zeros(100)),
16 | ),
17 | dict(
18 | observations=np.zeros((50, 1)),
19 | actions=np.zeros((50, 1)),
20 | rewards=np.zeros(50),
21 | env_infos=dict(),
22 | agent_infos=dict(lala=np.zeros(50)),
23 | ),
24 | ]
25 |
26 | truncated = truncate_paths(paths, 130)
27 | assert len(truncated) == 2
28 | assert len(truncated[-1]["observations"]) == 30
29 | assert len(truncated[0]["observations"]) == 100
30 | # make sure not to change the original one
31 | assert len(paths) == 2
32 | assert len(paths[-1]["observations"]) == 50
33 |
--------------------------------------------------------------------------------
/rllab/tests/test_serializable.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | from rllab.core.serializable import Serializable
4 | from sandbox.rocky.tf.core.parameterized import Parameterized, suppress_params_loading
5 |
6 |
7 | class Simple(Parameterized, Serializable):
8 | def __init__(self, name):
9 | Serializable.quick_init(self, locals())
10 | with tf.variable_scope(name):
11 | self.w = tf.get_variable("w", [10, 10])
12 |
13 | def get_params_internal(self, **tags):
14 | return [self.w]
15 |
16 |
17 | class AllArgs(Serializable):
18 | def __init__(self, vararg, *args, **kwargs):
19 | Serializable.quick_init(self, locals())
20 | self.vararg = vararg
21 | self.args = args
22 | self.kwargs = kwargs
23 |
24 |
25 | def test_serializable():
26 | with suppress_params_loading():
27 | obj = Simple(name="obj")
28 | obj1 = Serializable.clone(obj, name="obj1")
29 | assert obj.w.name.startswith('obj/')
30 | assert obj1.w.name.startswith('obj1/')
31 |
32 | obj2 = AllArgs(0, *(1,), **{'kwarg': 2})
33 | obj3 = Serializable.clone(obj2)
34 | assert obj3.vararg == 0
35 | assert len(obj3.args) == 1 and obj3.args[0] == 1
36 | assert len(obj3.kwargs) == 1 and obj3.kwargs['kwarg'] == 2
37 |
38 |
39 | if __name__ == "__main__":
40 | test_serializable()
41 |
--------------------------------------------------------------------------------
/rllab/tests/test_spaces.py:
--------------------------------------------------------------------------------
1 |
2 | from rllab.spaces import Product, Discrete, Box
3 | import numpy as np
4 |
5 |
6 | def test_product_space():
7 | _ = Product([Discrete(3), Discrete(2)])
8 | product_space = Product(Discrete(3), Discrete(2))
9 | sample = product_space.sample()
10 | assert product_space.contains(sample)
11 |
12 |
13 | def test_product_space_unflatten_n():
14 | space = Product([Discrete(3), Discrete(3)])
15 | np.testing.assert_array_equal(space.flatten((2, 2)), space.flatten_n([(2, 2)])[0])
16 | np.testing.assert_array_equal(
17 | space.unflatten(space.flatten((2, 2))),
18 | space.unflatten_n(space.flatten_n([(2, 2)]))[0]
19 | )
20 |
21 |
22 | def test_box():
23 | space = Box(low=-1, high=1, shape=(2, 2))
24 | np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4])
25 | np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]])
26 | np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]])
27 | np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]])
28 |
--------------------------------------------------------------------------------
/rllab/tests/test_stateful_pool.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | def _worker_collect_once(_):
6 | return 'a', 1
7 |
8 |
9 | def test_stateful_pool():
10 | from rllab.sampler import stateful_pool
11 | stateful_pool.singleton_pool.initialize(n_parallel=3)
12 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False)
13 | assert tuple(results) == ('a', 'a', 'a')
14 |
15 |
16 | def test_stateful_pool_over_capacity():
17 | from rllab.sampler import stateful_pool
18 | stateful_pool.singleton_pool.initialize(n_parallel=4)
19 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False)
20 | assert len(results) >= 3
21 |
--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/green_ball.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/red_ball.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/scripts/maze_data_collect.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 | from inverse_rl.algos.trpo import TRPO
4 | from inverse_rl.models.tf_util import get_session_config
5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
6 | from sandbox.rocky.tf.envs.base import TfEnv
7 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
8 |
9 | from inverse_rl.envs.env_utils import CustomGymEnv
10 | from inverse_rl.utils.log_utils import rllab_logdir
11 | from inverse_rl.utils.hyper_sweep import run_sweep_parallel, run_sweep_serial
12 |
13 |
14 | def main(exp_name, ent_wt=1.0, discrete=True):
15 | tf.reset_default_graph()
16 | if discrete:
17 | env = TfEnv(CustomGymEnv('PointMazeLeft-v0', record_video=False, record_log=False))
18 | else:
19 | env = TfEnv(CustomGymEnv('PointMazeLeftCont-v0', record_video=False, record_log=False))
20 |
21 | policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
22 | with tf.Session(config=get_session_config()) as sess:
23 | algo = TRPO(
24 | env=env,
25 | sess=sess,
26 | policy=policy,
27 | n_itr=2000,
28 | batch_size=20000,
29 | max_path_length=500,
30 | discount=0.99,
31 | store_paths=True,
32 | entropy_weight=ent_wt,
33 | baseline=LinearFeatureBaseline(env_spec=env.spec),
34 | exp_name=exp_name,
35 | )
36 | if discrete:
37 | output = 'data/maze_left_data_collect_discrete-15/%s' % exp_name
38 | else:
39 | output = 'data/maze_left_data_collect/%s' % exp_name
40 | with rllab_logdir(algo=algo, dirname=output):
41 | algo.train()
42 |
43 |
44 | if __name__ == "__main__":
45 | params_dict = {
46 | 'ent_wt': [0.1],
47 | 'discrete': True # Setting discrete to 'True' to get training data, 'False' to get test data (test unseen positions)
48 | }
49 | run_sweep_parallel(main, params_dict, repeat=4)
50 |
--------------------------------------------------------------------------------