├── .gitignore ├── README.md ├── data_fusion_discrete └── maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32 │ └── 2019_05_14_02_33_17_0 │ └── itr_2800.pkl ├── inverse_rl ├── __init__.py ├── __pycache__ │ └── __init__.cpython-35.pyc ├── algos │ ├── __pycache__ │ │ ├── batch_polopt.cpython-35.pyc │ │ ├── irl_batch_polopt.cpython-35.pyc │ │ ├── irl_npo.cpython-35.pyc │ │ ├── irl_trpo.cpython-35.pyc │ │ ├── meta_irl_batch_polopt.cpython-35.pyc │ │ ├── meta_irl_npo.cpython-35.pyc │ │ ├── meta_irl_trpo.cpython-35.pyc │ │ ├── npo.cpython-35.pyc │ │ ├── penalty_lbfgs_optimizer.cpython-35.pyc │ │ └── trpo.cpython-35.pyc │ ├── batch_polopt.py │ ├── irl_batch_polopt.py │ ├── irl_npo.py │ ├── irl_trpo.py │ ├── meta_irl_batch_polopt.py │ ├── meta_irl_npo.py │ ├── meta_irl_trpo.py │ ├── npo.py │ ├── penalty_lbfgs_optimizer.py │ └── trpo.py ├── envs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── env_utils.cpython-35.pyc │ │ └── point_maze_env.cpython-35.pyc │ ├── assets │ │ └── twod_maze.xml │ ├── dynamic_mjc │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── mjc_models.cpython-35.pyc │ │ │ └── model_builder.cpython-35.pyc │ │ ├── mjc_models.py │ │ └── model_builder.py │ ├── env_utils.py │ ├── point_maze_env.py │ └── utils.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── airl_state.cpython-35.pyc │ │ ├── architectures.cpython-35.pyc │ │ ├── fusion_manager.cpython-35.pyc │ │ ├── imitation_learning.cpython-35.pyc │ │ ├── old_imitation_learning.cpython-35.pyc │ │ ├── pretrain.cpython-35.pyc │ │ └── tf_util.cpython-35.pyc │ ├── airl_state.py │ ├── architectures.py │ ├── fusion_manager.py │ ├── imitation_learning.py │ ├── info_airl_state_test.py │ ├── info_airl_state_train.py │ ├── old_imitation_learning.py │ ├── pretrain.py │ └── tf_util.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── general.cpython-35.pyc │ ├── hyper_sweep.cpython-35.pyc │ ├── hyperparametrized.cpython-35.pyc │ ├── log_utils.cpython-35.pyc │ └── math_utils.cpython-35.pyc │ ├── general.py │ ├── hyper_sweep.py │ ├── hyperparametrized.py │ ├── log_utils.py │ └── math_utils.py ├── rllab ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── circle.yml ├── contrib │ ├── __init__.py │ ├── alexbeloi │ │ ├── __init__.py │ │ ├── examples │ │ │ ├── __init__.py │ │ │ ├── trpois_cartpole.py │ │ │ └── vpgis_cartpole.py │ │ └── is_sampler.py │ ├── bichengcao │ │ ├── __init__.py │ │ └── examples │ │ │ ├── __init__.py │ │ │ ├── trpo_gym_Acrobot-v1.py │ │ │ ├── trpo_gym_CartPole-v0.py │ │ │ ├── trpo_gym_CartPole-v1.py │ │ │ ├── trpo_gym_MountainCar-v0.py │ │ │ └── trpo_gym_Pendulum-v0.py │ └── rllab_hyperopt │ │ ├── __init__.py │ │ ├── core.py │ │ ├── example │ │ ├── __init__.py │ │ ├── main.py │ │ ├── score.py │ │ └── task.py │ │ └── visualize_hyperopt_results.ipynb ├── docker │ ├── Dockerfile │ ├── gpu_Dockerfile │ ├── gpu_tf_Dockerfile │ └── tester_Dockerfile ├── docs │ ├── Makefile │ ├── conf.py │ ├── index.rst │ └── user │ │ ├── cluster.rst │ │ ├── cluster_1.png │ │ ├── cluster_2.png │ │ ├── cluster_3.png │ │ ├── experiments.rst │ │ ├── gym_integration.rst │ │ ├── implement_algo_advanced.rst │ │ ├── implement_algo_basic.rst │ │ ├── implement_env.rst │ │ └── installation.rst ├── environment.yml ├── examples │ ├── __init__.py │ ├── cluster_demo.py │ ├── cluster_gym_mujoco_demo.py │ ├── ddpg_cartpole.py │ ├── nop_cartpole.py │ ├── point_env.py │ ├── trpo_cartpole.py │ ├── trpo_cartpole_pickled.py │ ├── trpo_cartpole_recurrent.py │ ├── trpo_gym_cartpole.py │ ├── trpo_gym_pendulum.py │ ├── trpo_gym_tf_cartpole.py │ ├── trpo_point.py │ ├── trpo_swimmer.py │ ├── vpg_1.py │ └── vpg_2.py ├── rllab │ ├── __init__.py │ ├── algos │ │ ├── __init__.py │ │ ├── base.py │ │ ├── batch_polopt.py │ │ ├── cem.py │ │ ├── cma_es.py │ │ ├── cma_es_lib.py │ │ ├── ddpg.py │ │ ├── erwr.py │ │ ├── nop.py │ │ ├── npo.py │ │ ├── ppo.py │ │ ├── reps.py │ │ ├── tnpg.py │ │ ├── trpo.py │ │ ├── util.py │ │ └── vpg.py │ ├── baselines │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gaussian_conv_baseline.py │ │ ├── gaussian_mlp_baseline.py │ │ ├── linear_feature_baseline.py │ │ └── zero_baseline.py │ ├── config.py │ ├── config_personal_template.py │ ├── core │ │ ├── __init__.py │ │ ├── lasagne_helpers.py │ │ ├── lasagne_layers.py │ │ ├── lasagne_powered.py │ │ ├── network.py │ │ ├── parameterized.py │ │ └── serializable.py │ ├── distributions │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bernoulli.py │ │ ├── categorical.py │ │ ├── delta.py │ │ ├── diagonal_gaussian.py │ │ ├── recurrent_categorical.py │ │ └── recurrent_diagonal_gaussian.py │ ├── envs │ │ ├── __init__.py │ │ ├── base.py │ │ ├── box2d │ │ │ ├── __init__.py │ │ │ ├── box2d_env.py │ │ │ ├── box2d_viewer.py │ │ │ ├── car_parking_env.py │ │ │ ├── cartpole_env.py │ │ │ ├── cartpole_swingup_env.py │ │ │ ├── double_pendulum_env.py │ │ │ ├── models │ │ │ │ ├── car_parking.xml │ │ │ │ ├── car_parking.xml.rb │ │ │ │ ├── cartpole.xml.mako │ │ │ │ ├── double_pendulum.xml.mako │ │ │ │ └── mountain_car.xml.mako │ │ │ ├── mountain_car_env.py │ │ │ └── parser │ │ │ │ ├── __init__.py │ │ │ │ ├── xml_attr_types.py │ │ │ │ ├── xml_box2d.py │ │ │ │ └── xml_types.py │ │ ├── env_spec.py │ │ ├── grid_world_env.py │ │ ├── gym_env.py │ │ ├── identification_env.py │ │ ├── mujoco │ │ │ ├── __init__.py │ │ │ ├── ant_env.py │ │ │ ├── gather │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_gather_env.py │ │ │ │ ├── embedded_viewer.py │ │ │ │ ├── gather_env.py │ │ │ │ ├── point_gather_env.py │ │ │ │ └── swimmer_gather_env.py │ │ │ ├── half_cheetah_env.py │ │ │ ├── hill │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_hill_env.py │ │ │ │ ├── half_cheetah_hill_env.py │ │ │ │ ├── hill_env.py │ │ │ │ ├── hopper_hill_env.py │ │ │ │ ├── swimmer3d_hill_env.py │ │ │ │ ├── terrain.py │ │ │ │ └── walker2d_hill_env.py │ │ │ ├── hopper_env.py │ │ │ ├── humanoid_env.py │ │ │ ├── inverted_double_pendulum_env.py │ │ │ ├── maze │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_maze_env.py │ │ │ │ ├── maze_env.py │ │ │ │ ├── maze_env_utils.py │ │ │ │ ├── point_maze_env.py │ │ │ │ └── swimmer_maze_env.py │ │ │ ├── mujoco_env.py │ │ │ ├── point_env.py │ │ │ ├── simple_humanoid_env.py │ │ │ ├── swimmer3d_env.py │ │ │ ├── swimmer_env.py │ │ │ └── walker2d_env.py │ │ ├── noisy_env.py │ │ ├── normalized_env.py │ │ ├── occlusion_env.py │ │ ├── proxy_env.py │ │ └── sliding_mem_env.py │ ├── exploration_strategies │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gaussian_strategy.py │ │ └── ou_strategy.py │ ├── misc │ │ ├── __init__.py │ │ ├── autoargs.py │ │ ├── console.py │ │ ├── ext.py │ │ ├── instrument.py │ │ ├── krylov.py │ │ ├── logger.py │ │ ├── mako_utils.py │ │ ├── meta.py │ │ ├── nb_utils.py │ │ ├── overrides.py │ │ ├── resolve.py │ │ ├── special.py │ │ ├── tabulate.py │ │ ├── tensor_utils.py │ │ └── viewer2d.py │ ├── mujoco_py │ │ ├── .rvmrc │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── __init__.py │ │ ├── codegen.rb │ │ ├── gen_binding.sh │ │ ├── glfw.py │ │ ├── mjconstants.py │ │ ├── mjcore.py │ │ ├── mjextra.py │ │ ├── mjlib.py │ │ ├── mjtypes.py │ │ ├── mjviewer.py │ │ └── util.py │ ├── optimizers │ │ ├── __init__.py │ │ ├── conjugate_gradient_optimizer.py │ │ ├── first_order_optimizer.py │ │ ├── hessian_free_optimizer.py │ │ ├── hf.py │ │ ├── lbfgs_optimizer.py │ │ ├── minibatch_dataset.py │ │ └── penalty_lbfgs_optimizer.py │ ├── plotter │ │ ├── __init__.py │ │ └── plotter.py │ ├── policies │ │ ├── __init__.py │ │ ├── base.py │ │ ├── categorical_conv_policy.py │ │ ├── categorical_gru_policy.py │ │ ├── categorical_mlp_policy.py │ │ ├── deterministic_mlp_policy.py │ │ ├── gaussian_gru_policy.py │ │ ├── gaussian_mlp_policy.py │ │ └── uniform_control_policy.py │ ├── q_functions │ │ ├── __init__.py │ │ ├── base.py │ │ └── continuous_mlp_q_function.py │ ├── regressors │ │ ├── __init__.py │ │ ├── categorical_mlp_regressor.py │ │ ├── gaussian_conv_regressor.py │ │ ├── gaussian_mlp_regressor.py │ │ └── product_regressor.py │ ├── sampler │ │ ├── __init__.py │ │ ├── base.py │ │ ├── parallel_sampler.py │ │ ├── stateful_pool.py │ │ └── utils.py │ ├── spaces │ │ ├── __init__.py │ │ ├── base.py │ │ ├── box.py │ │ ├── discrete.py │ │ └── product.py │ └── viskit │ │ ├── __init__.py │ │ ├── core.py │ │ ├── frontend.py │ │ ├── static │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ └── dropdowns-enhancement.css │ │ └── js │ │ │ ├── bootstrap.min.js │ │ │ ├── dropdowns-enhancement.js │ │ │ ├── jquery-1.10.2.min.js │ │ │ ├── jquery.loadTemplate-1.5.6.js │ │ │ └── plotly-latest.min.js │ │ └── templates │ │ └── main.html ├── sandbox │ ├── __init__.py │ └── rocky │ │ ├── __init__.py │ │ └── tf │ │ ├── __init__.py │ │ ├── algos │ │ ├── __init__.py │ │ ├── batch_polopt.py │ │ ├── npg.py │ │ ├── npo.py │ │ ├── trpo.py │ │ └── vpg.py │ │ ├── core │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── layers_powered.py │ │ ├── network.py │ │ └── parameterized.py │ │ ├── distributions │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bernoulli.py │ │ ├── categorical.py │ │ ├── diagonal_gaussian.py │ │ ├── recurrent_categorical.py │ │ └── recurrent_diagonal_gaussian.py │ │ ├── envs │ │ ├── __init__.py │ │ ├── base.py │ │ ├── parallel_vec_env_executor.py │ │ └── vec_env_executor.py │ │ ├── launchers │ │ ├── __init__.py │ │ ├── trpo_cartpole.py │ │ ├── trpo_cartpole_recurrent.py │ │ └── vpg_cartpole.py │ │ ├── misc │ │ ├── __init__.py │ │ └── tensor_utils.py │ │ ├── optimizers │ │ ├── __init__.py │ │ ├── conjugate_gradient_optimizer.py │ │ ├── first_order_optimizer.py │ │ ├── lbfgs_optimizer.py │ │ └── penalty_lbfgs_optimizer.py │ │ ├── policies │ │ ├── __init__.py │ │ ├── base.py │ │ ├── categorical_conv_policy.py │ │ ├── categorical_gru_policy.py │ │ ├── categorical_lstm_policy.py │ │ ├── categorical_mlp_policy.py │ │ ├── deterministic_mlp_policy.py │ │ ├── gaussian_gru_policy.py │ │ ├── gaussian_lstm_policy.py │ │ ├── gaussian_mlp_policy.py │ │ ├── latent_gaussian_mlp_policy.py │ │ └── uniform_control_policy.py │ │ ├── q_functions │ │ ├── __init__.py │ │ ├── base.py │ │ └── continuous_mlp_q_function.py │ │ ├── regressors │ │ ├── __init__.py │ │ ├── bernoulli_mlp_regressor.py │ │ ├── categorical_mlp_regressor.py │ │ ├── deterministic_mlp_regressor.py │ │ └── gaussian_mlp_regressor.py │ │ ├── samplers │ │ ├── __init__.py │ │ ├── batch_sampler.py │ │ └── vectorized_sampler.py │ │ └── spaces │ │ ├── __init__.py │ │ ├── box.py │ │ ├── discrete.py │ │ └── product.py ├── scripts │ ├── __init__.py │ ├── resume_training.py │ ├── run_experiment_lite.py │ ├── setup_ec2_for_rllab.py │ ├── setup_linux.sh │ ├── setup_mujoco.sh │ ├── setup_osx.sh │ ├── sim_env.py │ ├── sim_policy.py │ ├── submit_gym.py │ └── sync_s3.py ├── setup.py ├── tests │ ├── __init__.py │ ├── algos │ │ ├── __init__.py │ │ └── test_trpo.py │ ├── envs │ │ ├── __init__.py │ │ ├── test_envs.py │ │ └── test_maze_env.py │ ├── regression_tests │ │ ├── __init__.py │ │ └── test_issue_3.py │ ├── test_algos.py │ ├── test_baselines.py │ ├── test_instrument.py │ ├── test_networks.py │ ├── test_sampler.py │ ├── test_serializable.py │ ├── test_spaces.py │ └── test_stateful_pool.py └── vendor │ └── mujoco_models │ ├── ant.xml │ ├── green_ball.xml │ ├── half_cheetah.xml │ ├── hill_ant_env.xml.mako │ ├── hill_half_cheetah_env.xml.mako │ ├── hill_hopper_env.xml.mako │ ├── hill_swimmer3d_env.xml.mako │ ├── hill_walker2d_env.xml.mako │ ├── hopper.xml │ ├── humanoid.xml │ ├── inverted_double_pendulum.xml │ ├── inverted_double_pendulum.xml.mako │ ├── point.xml │ ├── red_ball.xml │ ├── simple_humanoid.xml │ ├── swimmer.xml │ ├── swimmer3d.xml │ ├── utils.mako │ └── walker2d.xml └── scripts ├── maze_data_collect.py ├── maze_visualize_reward.py ├── maze_wall_meta_irl.py └── maze_wall_meta_irl_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Meta-Inverse Reinforcement Learning with Probabilistic Context Variables
2 | Lantao Yu*, Tianhe Yu*, Chelsea Finn, Stefano Ermon.
3 | The 33rd Conference on Neural Information Processing Systems. (NeurIPS 2019)
4 | [[Paper]](https://arxiv.org/pdf/1909.09314.pdf) [[Website]](https://sites.google.com/view/pemirl) 5 | 6 | ### Usage 7 | Requirement: The rllab package used in this project is provided [here](https://github.com/ermongroup/MetaIRL/tree/master/rllab). 8 | 9 | To get expert trajectories for downstream tasks: 10 | ``` 11 | python scripts/maze_data_collect.py 12 | ``` 13 | 14 | After getting expert trajectories, run Meta-Inverse RL to learn context dependent reward functions: 15 | ``` 16 | python scripts/maze_wall_meta_irl.py 17 | ``` 18 | We provided a pretrained IRL model [here](https://github.com/ermongroup/MetaIRL/tree/master/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0), which will be loaded by the following codes by default. 19 | 20 | To visualize the context-dependent reward function (Figure 2 in the paper): 21 | ``` 22 | python scripts/maze_visualize_reward.py 23 | ``` 24 | 25 | To use the context-dependent reward function to train a new policy under new dynamics: 26 | ``` 27 | python scripts/maze_wall_meta_irl_test.py 28 | ``` -------------------------------------------------------------------------------- /data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl -------------------------------------------------------------------------------- /inverse_rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__init__.py -------------------------------------------------------------------------------- /inverse_rl/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/npo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/npo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/__pycache__/trpo.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/trpo.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/algos/irl_trpo.py: -------------------------------------------------------------------------------- 1 | from inverse_rl.algos.irl_npo import IRLNPO 2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | 4 | 5 | class IRLTRPO(IRLNPO): 6 | """ 7 | Trust Region Policy Optimization 8 | """ 9 | 10 | def __init__( 11 | self, 12 | optimizer=None, 13 | optimizer_args=None, 14 | **kwargs): 15 | if optimizer is None: 16 | if optimizer_args is None: 17 | optimizer_args = dict() 18 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 19 | super(IRLTRPO, self).__init__(optimizer=optimizer, **kwargs) 20 | -------------------------------------------------------------------------------- /inverse_rl/algos/meta_irl_trpo.py: -------------------------------------------------------------------------------- 1 | from inverse_rl.algos.meta_irl_npo import MetaIRLNPO 2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | 4 | 5 | class MetaIRLTRPO(MetaIRLNPO): 6 | """ 7 | Trust Region Policy Optimization 8 | """ 9 | 10 | def __init__( 11 | self, 12 | optimizer=None, 13 | optimizer_args=None, 14 | **kwargs): 15 | if optimizer is None: 16 | if optimizer_args is None: 17 | optimizer_args = dict() 18 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 19 | super(MetaIRLTRPO, self).__init__(optimizer=optimizer, **kwargs) 20 | -------------------------------------------------------------------------------- /inverse_rl/algos/trpo.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from inverse_rl.algos.npo import NPO 4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 5 | 6 | 7 | class TRPO(NPO): 8 | """ 9 | Trust Region Policy Optimization 10 | """ 11 | 12 | def __init__( 13 | self, 14 | optimizer=None, 15 | optimizer_args=None, 16 | **kwargs): 17 | if optimizer is None: 18 | if optimizer_args is None: 19 | optimizer_args = dict() 20 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 21 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs) 22 | -------------------------------------------------------------------------------- /inverse_rl/envs/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from gym.envs import register 4 | 5 | LOGGER = logging.getLogger(__name__) 6 | 7 | _REGISTERED = False 8 | def register_custom_envs(): 9 | global _REGISTERED 10 | if _REGISTERED: 11 | return 12 | _REGISTERED = True 13 | 14 | LOGGER.info("Registering custom gym environments") 15 | register(id='PointMazeRight-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', 16 | kwargs={'sparse_reward': False, 'direction': 1, 'discrete': True}) 17 | register(id='PointMazeLeft-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', 18 | kwargs={'sparse_reward': False, 'direction': 0, 'discrete': True}) 19 | register(id='PointMazeRightCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', 20 | kwargs={'sparse_reward': False, 'direction': 1, 'discrete': False}) 21 | register(id='PointMazeLeftCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', 22 | kwargs={'sparse_reward': False, 'direction': 0, 'discrete': False}) 23 | 24 | -------------------------------------------------------------------------------- /inverse_rl/envs/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/assets/twod_maze.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /inverse_rl/envs/dynamic_mjc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__init__.py -------------------------------------------------------------------------------- /inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/envs/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def flat_to_one_hot(val, ndim): 4 | """ 5 | 6 | >>> flat_to_one_hot(2, ndim=4) 7 | array([ 0., 0., 1., 0.]) 8 | >>> flat_to_one_hot(4, ndim=5) 9 | array([ 0., 0., 0., 0., 1.]) 10 | >>> flat_to_one_hot(np.array([2, 4, 3]), ndim=5) 11 | array([[ 0., 0., 1., 0., 0.], 12 | [ 0., 0., 0., 0., 1.], 13 | [ 0., 0., 0., 1., 0.]]) 14 | """ 15 | shape =np.array(val).shape 16 | v = np.zeros(shape + (ndim,)) 17 | if len(shape) == 1: 18 | v[np.arange(shape[0]), val] = 1.0 19 | else: 20 | v[val] = 1.0 21 | return v 22 | 23 | def one_hot_to_flat(val): 24 | """ 25 | >>> one_hot_to_flat(np.array([0,0,0,0,1])) 26 | 4 27 | >>> one_hot_to_flat(np.array([0,0,1,0])) 28 | 2 29 | >>> one_hot_to_flat(np.array([[0,0,1,0], [1,0,0,0], [0,1,0,0]])) 30 | array([2, 0, 1]) 31 | """ 32 | idxs = np.array(np.where(val == 1.0))[-1] 33 | if len(val.shape) == 1: 34 | return int(idxs) 35 | return idxs -------------------------------------------------------------------------------- /inverse_rl/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__init__.py -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/airl_state.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/airl_state.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/architectures.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/architectures.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/pretrain.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/pretrain.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/__pycache__/tf_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/tf_util.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/models/architectures.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from inverse_rl.models.tf_util import relu_layer, linear 3 | 4 | 5 | def make_relu_net(layers=2, dout=1, d_hidden=32): 6 | def relu_net(x, last_layer_bias=True): 7 | out = x 8 | for i in range(layers): 9 | out = relu_layer(out, dout=d_hidden, name='l%d'%i) 10 | out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias) 11 | return out 12 | return relu_net 13 | 14 | 15 | def relu_net(x, layers=2, dout=1, d_hidden=32): 16 | out = x 17 | for i in range(layers): 18 | out = relu_layer(out, dout=d_hidden, name='l%d'%i) 19 | out = linear(out, dout=dout, name='lfinal') 20 | return out 21 | 22 | 23 | def linear_net(x, dout=1): 24 | out = x 25 | out = linear(out, dout=dout, name='lfinal') 26 | return out 27 | 28 | 29 | def feedforward_energy(obs_act, ff_arch=relu_net): 30 | # for trajectories, using feedforward nets rather than RNNs 31 | dimOU = int(obs_act.get_shape()[2]) 32 | orig_shape = tf.shape(obs_act) 33 | 34 | obs_act = tf.reshape(obs_act, [-1, dimOU]) 35 | outputs = ff_arch(obs_act) 36 | dOut = int(outputs.get_shape()[-1]) 37 | 38 | new_shape = tf.stack([orig_shape[0],orig_shape[1], dOut]) 39 | outputs = tf.reshape(outputs, new_shape) 40 | return outputs 41 | 42 | 43 | def rnn_trajectory_energy(obs_act): 44 | """ 45 | Operates on trajectories 46 | """ 47 | # for trajectories 48 | dimOU = int(obs_act.get_shape()[2]) 49 | 50 | cell = tf.contrib.rnn.GRUCell(num_units=dimOU) 51 | cell_out = tf.contrib.rnn.OutputProjectionWrapper(cell, 1) 52 | outputs, hidden = tf.nn.dynamic_rnn(cell_out, obs_act, time_major=False, dtype=tf.float32) 53 | return outputs 54 | 55 | -------------------------------------------------------------------------------- /inverse_rl/models/tf_util.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | REG_VARS = 'reg_vars' 5 | 6 | def linear(X, dout, name, bias=True): 7 | with tf.variable_scope(name): 8 | dX = int(X.get_shape()[-1]) 9 | W = tf.get_variable('W', shape=(dX, dout)) 10 | tf.add_to_collection(REG_VARS, W) 11 | if bias: 12 | b = tf.get_variable('b', initializer=tf.constant(np.zeros(dout).astype(np.float32))) 13 | else: 14 | b = 0 15 | return tf.matmul(X, W)+b 16 | 17 | def discounted_reduce_sum(X, discount, axis=-1): 18 | if discount != 1.0: 19 | disc = tf.cumprod(discount*tf.ones_like(X), axis=axis) 20 | else: 21 | disc = 1.0 22 | return tf.reduce_sum(X*disc, axis=axis) 23 | 24 | def assert_shape(tens, shape): 25 | assert tens.get_shape().is_compatible_with(shape) 26 | 27 | def relu_layer(X, dout, name): 28 | return tf.nn.relu(linear(X, dout, name)) 29 | 30 | def softplus_layer(X, dout, name): 31 | return tf.nn.softplus(linear(X, dout, name)) 32 | 33 | def tanh_layer(X, dout, name): 34 | return tf.nn.tanh(linear(X, dout, name)) 35 | 36 | def get_session_config(): 37 | session_config = tf.ConfigProto() 38 | session_config.gpu_options.allow_growth = True 39 | #session_config.gpu_options.per_process_gpu_memory_fraction = 0.2 40 | return session_config 41 | 42 | 43 | def load_prior_params(pkl_fname, key='irl_params'): 44 | import joblib 45 | with tf.Session(config=get_session_config()): 46 | params = joblib.load(pkl_fname) 47 | 48 | tf.reset_default_graph() 49 | #joblib.dump(params, file_name, compress=3) 50 | params = params[key] 51 | #print(params) 52 | assert params is not None 53 | return params 54 | -------------------------------------------------------------------------------- /inverse_rl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from inverse_rl.utils.general import * 2 | -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/general.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/general.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc -------------------------------------------------------------------------------- /inverse_rl/utils/general.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | 4 | def flatten_list(lol): 5 | return [ a for b in lol for a in b ] 6 | 7 | class TrainingIterator(object): 8 | def __init__(self, itrs, heartbeat=float('inf')): 9 | self.itrs = itrs 10 | self.heartbeat_time = heartbeat 11 | self.__vals = {} 12 | 13 | def random_idx(self, N, size): 14 | return np.random.randint(0, N, size=size) 15 | 16 | @property 17 | def itr(self): 18 | return self.__itr 19 | 20 | @property 21 | def heartbeat(self): 22 | return self.__heartbeat 23 | 24 | @property 25 | def elapsed(self): 26 | assert self.heartbeat, 'elapsed is only valid when heartbeat=True' 27 | return self.__elapsed 28 | 29 | def itr_message(self): 30 | return '==> Itr %d/%d (elapsed:%.2f)' % (self.itr+1, self.itrs, self.elapsed) 31 | 32 | def record(self, key, value): 33 | if key in self.__vals: 34 | self.__vals[key].append(value) 35 | else: 36 | self.__vals[key] = [value] 37 | 38 | def pop(self, key): 39 | vals = self.__vals.get(key, []) 40 | del self.__vals[key] 41 | return vals 42 | 43 | def pop_mean(self, key): 44 | return np.mean(self.pop(key)) 45 | 46 | def __iter__(self): 47 | prev_time = time.time() 48 | self.__heartbeat = False 49 | for i in range(self.itrs): 50 | self.__itr = i 51 | cur_time = time.time() 52 | if (cur_time-prev_time) > self.heartbeat_time or i==(self.itrs-1): 53 | self.__heartbeat = True 54 | self.__elapsed = cur_time-prev_time 55 | prev_time = cur_time 56 | yield self 57 | self.__heartbeat = False -------------------------------------------------------------------------------- /inverse_rl/utils/hyperparametrized.py: -------------------------------------------------------------------------------- 1 | CLSNAME = '__clsname__' 2 | _HYPER_ = '__hyper__' 3 | _HYPERNAME_ = '__hyper_clsname__' 4 | 5 | 6 | def extract_hyperparams(obj): 7 | if any([isinstance(obj, type_) for type_ in (int, float, str)]): 8 | return obj 9 | elif isinstance(type(obj), Hyperparametrized): 10 | hypers = getattr(obj, _HYPER_) 11 | hypers[CLSNAME] = getattr(obj, _HYPERNAME_) 12 | for attr in hypers: 13 | hypers[attr] = extract_hyperparams(hypers[attr]) 14 | return hypers 15 | return type(obj).__name__ 16 | 17 | class Hyperparametrized(type): 18 | def __new__(self, clsname, bases, clsdict): 19 | old_init = clsdict.get('__init__', bases[0].__init__) 20 | def init_wrapper(inst, *args, **kwargs): 21 | hyper = getattr(inst, _HYPER_, {}) 22 | hyper.update(kwargs) 23 | setattr(inst, _HYPER_, hyper) 24 | 25 | if getattr(inst, _HYPERNAME_, None) is None: 26 | setattr(inst, _HYPERNAME_, clsname) 27 | return old_init(inst, *args, **kwargs) 28 | clsdict['__init__'] = init_wrapper 29 | 30 | cls = super(Hyperparametrized, self).__new__(self, clsname, bases, clsdict) 31 | return cls 32 | 33 | 34 | class HyperparamWrapper(object, metaclass=Hyperparametrized): 35 | def __init__(self, **hyper_kwargs): 36 | pass 37 | 38 | if __name__ == "__main__": 39 | class Algo1(object, metaclass=Hyperparametrized): 40 | def __init__(self, hyper1=1.0, hyper2=2.0, model1=None): 41 | pass 42 | 43 | 44 | class Algo2(Algo1): 45 | def __init__(self, hyper3=5.0, **kwargs): 46 | super(Algo2, self).__init__(**kwargs) 47 | 48 | 49 | class Model1(object, metaclass=Hyperparametrized): 50 | def __init__(self, hyper1=None): 51 | pass 52 | 53 | 54 | def get_params_json(**kwargs): 55 | hyper_dict = extract_hyperparams(HyperparamWrapper(**kwargs)) 56 | del hyper_dict[CLSNAME] 57 | return hyper_dict 58 | 59 | m1 = Model1(hyper1='Test') 60 | a1 = Algo2(hyper1=1.0, hyper2=5.0, hyper3=10.0, model1=m1) 61 | 62 | print( isinstance(type(a1), Hyperparametrized)) 63 | print(get_params_json(a1=a1)) 64 | -------------------------------------------------------------------------------- /inverse_rl/utils/math_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import scipy.stats 4 | 5 | def rle(inarray): 6 | """ run length encoding. Partial credit to R rle function. 7 | Multi datatype arrays catered for including non Numpy 8 | returns: tuple (runlengths, startpositions, values) """ 9 | ia = np.array(inarray) # force numpy 10 | n = len(ia) 11 | if n == 0: 12 | return (None, None, None) 13 | else: 14 | y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe) 15 | i = np.append(np.where(y), n - 1) # must include last element posi 16 | z = np.diff(np.append(-1, i)) # run lengths 17 | p = np.cumsum(np.append(0, z))[:-1] # positions 18 | return(z, p, ia[i]) 19 | 20 | def split_list_by_lengths(values, lengths): 21 | """ 22 | 23 | >>> split_list_by_lengths([0,0,0,1,1,1,2,2,2], [2,2,5]) 24 | [[0, 0], [0, 1], [1, 1, 2, 2, 2]] 25 | """ 26 | assert np.sum(lengths) == len(values) 27 | idxs = np.cumsum(lengths) 28 | idxs = np.insert(idxs, 0, 0) 29 | return [ values[idxs[i]:idxs[i+1] ] for i in range(len(idxs)-1)] 30 | 31 | def clip_sing(X, clip_val=1): 32 | U, E, V = np.linalg.svd(X, full_matrices=False) 33 | E = np.clip(E, -clip_val, clip_val) 34 | return U.dot(np.diag(E)).dot(V) 35 | 36 | def gauss_log_pdf(params, x): 37 | mean, log_diag_std = params 38 | N, d = mean.shape 39 | cov = np.square(np.exp(log_diag_std)) 40 | diff = x-mean 41 | exp_term = -0.5 * np.sum(np.square(diff)/cov, axis=1) 42 | norm_term = -0.5*d*np.log(2*np.pi) 43 | var_term = -0.5 * np.sum(np.log(cov), axis=1) 44 | log_probs = norm_term + var_term + exp_term 45 | return log_probs #sp.stats.multivariate_normal.logpdf(x, mean=mean, cov=cov) 46 | 47 | def categorical_log_pdf(params, x, one_hot=True): 48 | if not one_hot: 49 | raise NotImplementedError() 50 | probs = params[0] 51 | return np.log(np.max(probs * x, axis=1)) 52 | 53 | -------------------------------------------------------------------------------- /rllab/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | *.pyc 3 | *-checkpoint.ipynb 4 | .DS_Store 5 | *.h5 6 | *.log 7 | *.npz 8 | secrets.py 9 | *.avi 10 | *.mp4 11 | build 12 | build_linux 13 | .idea 14 | .sublime-project 15 | run_experiment.sh 16 | scratch-notebooks 17 | launch_scripts 18 | *.sh.e* 19 | *.sh.o* 20 | MUJOCO_LOG.TXT 21 | vendor/mujoco 22 | .project 23 | .pydevproject 24 | *.pdf 25 | .env 26 | snippets 27 | private 28 | lua 29 | iterate.dat 30 | .env 31 | src/ 32 | .settings 33 | .pods 34 | docs/_build 35 | blackbox.zip 36 | blackbox 37 | rllab/config_personal.py 38 | *.swp 39 | -------------------------------------------------------------------------------- /rllab/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 rllab contributors 4 | 5 | rllab uses a shared copyright model: each contributor holds copyright over 6 | their contributions to rllab. The project versioning records all such 7 | contribution and copyright details. 8 | By contributing to the rllab repository through pull-request, comment, 9 | or otherwise, the contributor releases their content to the license and 10 | copyright terms herein. 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy 13 | of this software and associated documentation files (the "Software"), to deal 14 | in the Software without restriction, including without limitation the rights 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 | copies of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in all 20 | copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28 | SOFTWARE. 29 | -------------------------------------------------------------------------------- /rllab/circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | services: 3 | - docker 4 | 5 | dependencies: 6 | cache_directories: 7 | - "~/docker" 8 | override: 9 | - docker info 10 | - if [[ -e ~/docker/image.tar ]]; then docker load -i ~/docker/image.tar; fi 11 | - docker build -t tester -f docker/tester_Dockerfile . 12 | - mkdir -p ~/docker; docker save tester > ~/docker/image.tar 13 | 14 | test: 15 | override: 16 | - docker run tester /bin/bash -li -c "CIRCLECI=true nose2" 17 | -------------------------------------------------------------------------------- /rllab/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/alexbeloi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/alexbeloi/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/examples/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/alexbeloi/examples/trpois_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.algos.tnpg import TNPG 3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 4 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 5 | from rllab.envs.normalized_env import normalize 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | from contrib.alexbeloi.is_sampler import ISSampler 8 | 9 | """ 10 | Example using VPG with ISSampler, iterations alternate between live and 11 | importance sampled iterations. 12 | """ 13 | 14 | env = normalize(CartpoleEnv()) 15 | 16 | policy = GaussianMLPPolicy( 17 | env_spec=env.spec, 18 | # The neural network policy should have two hidden layers, each with 32 hidden units. 19 | hidden_sizes=(32, 32) 20 | ) 21 | 22 | baseline = LinearFeatureBaseline(env_spec=env.spec) 23 | 24 | optimizer_args = dict( 25 | # debug_nan=True, 26 | # reg_coeff=0.1, 27 | # cg_iters=2 28 | ) 29 | 30 | algo = TRPO( 31 | env=env, 32 | policy=policy, 33 | baseline=baseline, 34 | batch_size=4000, 35 | max_path_length=100, 36 | n_itr=200, 37 | discount=0.99, 38 | step_size=0.01, 39 | sampler_cls=ISSampler, 40 | sampler_args=dict(n_backtrack=1), 41 | optimizer_args=optimizer_args 42 | ) 43 | algo.train() 44 | -------------------------------------------------------------------------------- /rllab/contrib/alexbeloi/examples/vpgis_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.vpg import VPG 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | from contrib.alexbeloi.is_sampler import ISSampler 7 | 8 | """ 9 | Example using VPG with ISSampler, iterations alternate between live and 10 | importance sampled iterations. 11 | """ 12 | 13 | env = normalize(CartpoleEnv()) 14 | 15 | policy = GaussianMLPPolicy( 16 | env_spec=env.spec, 17 | # The neural network policy should have two hidden layers, each with 32 hidden units. 18 | hidden_sizes=(32, 32) 19 | ) 20 | 21 | baseline = LinearFeatureBaseline(env_spec=env.spec) 22 | 23 | algo = VPG( 24 | env=env, 25 | policy=policy, 26 | baseline=baseline, 27 | batch_size=4000, 28 | max_path_length=100, 29 | n_itr=40, 30 | discount=0.99, 31 | step_size=0.01, 32 | sampler_cls=ISSampler, 33 | sampler_args=dict(n_backtrack=1), 34 | ) 35 | algo.train() 36 | -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/examples/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | env = normalize(GymEnv("Acrobot-v1")) 11 | 12 | policy = CategoricalMLPPolicy( 13 | env_spec=env.spec, 14 | hidden_sizes=(32, 32) 15 | ) 16 | 17 | baseline = LinearFeatureBaseline(env_spec=env.spec) 18 | 19 | algo = TRPO( 20 | env=env, 21 | policy=policy, 22 | baseline=baseline, 23 | batch_size=4000, 24 | max_path_length=env.horizon, 25 | n_itr=50, 26 | discount=0.99, 27 | step_size=0.01, 28 | plot=True, 29 | ) 30 | algo.train() 31 | 32 | 33 | run_experiment_lite( 34 | run_task, 35 | n_parallel=1, 36 | snapshot_mode="last", 37 | plot=True, 38 | ) 39 | -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | env = normalize(GymEnv("CartPole-v0")) 11 | 12 | policy = CategoricalMLPPolicy( 13 | env_spec=env.spec, 14 | hidden_sizes=(32, 32) 15 | ) 16 | 17 | baseline = LinearFeatureBaseline(env_spec=env.spec) 18 | 19 | algo = TRPO( 20 | env=env, 21 | policy=policy, 22 | baseline=baseline, 23 | batch_size=4000, 24 | max_path_length=env.horizon, 25 | n_itr=50, 26 | discount=0.99, 27 | step_size=0.01, 28 | plot=True, 29 | ) 30 | algo.train() 31 | 32 | 33 | run_experiment_lite( 34 | run_task, 35 | n_parallel=1, 36 | snapshot_mode="last", 37 | plot=True, 38 | ) 39 | -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | env = normalize(GymEnv("CartPole-v1")) 11 | 12 | policy = CategoricalMLPPolicy( 13 | env_spec=env.spec, 14 | hidden_sizes=(32, 32) 15 | ) 16 | 17 | baseline = LinearFeatureBaseline(env_spec=env.spec) 18 | 19 | algo = TRPO( 20 | env=env, 21 | policy=policy, 22 | baseline=baseline, 23 | batch_size=4000, 24 | max_path_length=env.horizon, 25 | n_itr=50, 26 | discount=0.99, 27 | step_size=0.01, 28 | plot=True, 29 | ) 30 | algo.train() 31 | 32 | 33 | run_experiment_lite( 34 | run_task, 35 | n_parallel=1, 36 | snapshot_mode="last", 37 | plot=True, 38 | ) 39 | -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py: -------------------------------------------------------------------------------- 1 | # This doesn't work. After 150 iterations still didn't learn anything. 2 | 3 | from rllab.algos.trpo import TRPO 4 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 5 | from rllab.envs.gym_env import GymEnv 6 | from rllab.envs.normalized_env import normalize 7 | from rllab.misc.instrument import run_experiment_lite 8 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy 9 | 10 | 11 | def run_task(*_): 12 | env = normalize(GymEnv("MountainCar-v0")) 13 | 14 | policy = CategoricalMLPPolicy( 15 | env_spec=env.spec, 16 | hidden_sizes=(32, 32) 17 | ) 18 | 19 | baseline = LinearFeatureBaseline(env_spec=env.spec) 20 | 21 | algo = TRPO( 22 | env=env, 23 | policy=policy, 24 | baseline=baseline, 25 | batch_size=4000, 26 | max_path_length=env.horizon, 27 | n_itr=150, 28 | discount=0.99, 29 | step_size=0.1, 30 | plot=True, 31 | ) 32 | algo.train() 33 | 34 | 35 | run_experiment_lite( 36 | run_task, 37 | n_parallel=1, 38 | snapshot_mode="last", 39 | plot=True, 40 | ) 41 | -------------------------------------------------------------------------------- /rllab/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | env = normalize(GymEnv("Pendulum-v0")) 11 | 12 | policy = GaussianMLPPolicy( 13 | env_spec=env.spec, 14 | hidden_sizes=(32, 32) 15 | ) 16 | 17 | baseline = LinearFeatureBaseline(env_spec=env.spec) 18 | 19 | algo = TRPO( 20 | env=env, 21 | policy=policy, 22 | baseline=baseline, 23 | batch_size=4000, 24 | max_path_length=env.horizon, 25 | n_itr=50, 26 | discount=0.99, 27 | step_size=0.01, 28 | plot=True, 29 | ) 30 | algo.train() 31 | 32 | 33 | run_experiment_lite( 34 | run_task, 35 | n_parallel=1, 36 | snapshot_mode="last", 37 | plot=True, 38 | ) 39 | -------------------------------------------------------------------------------- /rllab/contrib/rllab_hyperopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/rllab_hyperopt/example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/example/__init__.py -------------------------------------------------------------------------------- /rllab/contrib/rllab_hyperopt/example/main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Main module to launch an example hyperopt search on EC2. 3 | 4 | Launch this from outside the rllab main dir. Otherwise, rllab will try to ship the logfiles being written by this process, 5 | which will fail because tar doesn't want to tar files that are being written to. Alternatively, disable the packaging of 6 | log files by rllab, but I couldn't quickly find how to do this. 7 | 8 | You can use Jupyter notebook visualize_hyperopt_results.ipynb to inspect results. 9 | ''' 10 | from hyperopt import hp 11 | 12 | from contrib.rllab_hyperopt.core import launch_hyperopt_search 13 | # the functions to run the task and process result do not need to be in separate files. They do need to be separate from 14 | # the main file though. Also, anything you import in the module that contains run_task needs to be on the Rllab AMI. 15 | # Therefore, since I use pandas to process results, I have put them in separate files here. 16 | from contrib.rllab_hyperopt.example.score import process_result 17 | from contrib.rllab_hyperopt.example.task import run_task 18 | 19 | # define a search space. See https://github.com/hyperopt/hyperopt/wiki/FMin, sect 2 for more detail 20 | param_space = {'step_size': hp.uniform('step_size', 0.01, 0.1), 21 | 'seed': hp.choice('seed',[0, 1, 2])} 22 | 23 | # just by way of example, pass a different config to run_experiment_lite 24 | run_experiment_kwargs = dict( 25 | n_parallel=16, 26 | aws_config=dict(instance_type="c4.4xlarge",spot_price='0.7') 27 | ) 28 | 29 | launch_hyperopt_search( 30 | run_task, # the task to run 31 | process_result, # the function that will process results and return a score 32 | param_space, # param search space 33 | hyperopt_experiment_key='test12', # key for hyperopt DB, and also exp_prefix for run_experiment_lite 34 | n_hyperopt_workers=3, # nr of local workers AND nr of EC2 instances that will be started in parallel 35 | hyperopt_max_evals=5, # nr of parameter values to eval 36 | result_timeout=600, # wait this long for results from S3 before timing out 37 | run_experiment_kwargs=run_experiment_kwargs) # additional kwargs to pass to run_experiment_lite -------------------------------------------------------------------------------- /rllab/contrib/rllab_hyperopt/example/score.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | from rllab import config 5 | 6 | def process_result(exp_prefix, exp_name): 7 | # Open the default rllab path for storing results 8 | result_path = os.path.join(config.LOG_DIR, "s3", exp_prefix, exp_name, 'progress.csv') 9 | print("Processing result from",result_path) 10 | 11 | # This example uses pandas to easily read in results and create a simple smoothed learning curve 12 | df = pd.read_csv(result_path) 13 | curve = df['AverageReturn'].rolling(window=max(1,int(0.05*df.shape[0])), min_periods=1, center=True).mean().values.flatten() 14 | max_ix = curve.argmax() 15 | max_score = curve.max() 16 | 17 | # The result dict can contain arbitrary values, but ALWAYS needs to have a "loss" entry. 18 | return dict( 19 | max_score=max_score, 20 | max_iter=max_ix, 21 | scores=curve, # returning the curve allows you to plot best, worst etc curve later 22 | loss=-max_score 23 | ) -------------------------------------------------------------------------------- /rllab/contrib/rllab_hyperopt/example/task.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | 7 | def run_task(v): 8 | env = normalize(CartpoleEnv()) 9 | 10 | policy = GaussianMLPPolicy( 11 | env_spec=env.spec, 12 | # The neural network policy should have two hidden layers, each with 32 hidden units. 13 | hidden_sizes=(32, 32) 14 | ) 15 | 16 | baseline = LinearFeatureBaseline(env_spec=env.spec) 17 | 18 | algo = TRPO( 19 | env=env, 20 | policy=policy, 21 | baseline=baseline, 22 | batch_size=4000, 23 | max_path_length=100, 24 | n_itr=40, 25 | discount=0.99, 26 | step_size=v["step_size"], 27 | # Uncomment both lines (this and the plot parameter below) to enable plotting 28 | # plot=True, 29 | ) 30 | algo.train() -------------------------------------------------------------------------------- /rllab/docker/tester_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neocxi/rllab_exp_gpu_tf:py3 2 | 3 | RUN bash -c 'source activate rllab3 && conda install -y nomkl && conda uninstall -y scipy && conda install -y scipy' 4 | 5 | ADD . /root/code/rllab 6 | WORKDIR /root/code/rllab 7 | -------------------------------------------------------------------------------- /rllab/docs/index.rst: -------------------------------------------------------------------------------- 1 | .. rllab documentation master file, created by 2 | sphinx-quickstart on Mon Feb 15 20:07:12 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to rllab 7 | ================ 8 | 9 | rllab is a framework for developing and evaluating reinforcement learning algorithms. 10 | 11 | rllab is a work in progress, input is welcome. The available documentation is limited for now. 12 | 13 | User Guide 14 | ========== 15 | 16 | The rllab user guide explains how to install rllab, how to run experiments, and how to implement new MDPs and new algorithms. 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | user/installation 22 | user/experiments 23 | user/gym_integration 24 | user/implement_env 25 | user/implement_algo_basic 26 | user/implement_algo_advanced 27 | user/cluster 28 | 29 | 30 | Citing rllab 31 | ============ 32 | 33 | If you use rllab for academic research, you are highly encouraged to cite the following paper: 34 | 35 | - Yan Duan, Xi Chen, Rein Houthooft, John Schulman, Pieter Abbeel. "`Benchmarking Deep Reinforcement Learning for Continuous Control `_. *Proceedings of the 33rd International Conference on Machine Learning (ICML), 2016.* 36 | 37 | 38 | Indices and tables 39 | ================== 40 | 41 | * :ref:`genindex` 42 | * :ref:`modindex` 43 | * :ref:`search` 44 | 45 | -------------------------------------------------------------------------------- /rllab/docs/user/cluster_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_1.png -------------------------------------------------------------------------------- /rllab/docs/user/cluster_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_2.png -------------------------------------------------------------------------------- /rllab/docs/user/cluster_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_3.png -------------------------------------------------------------------------------- /rllab/docs/user/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | 4 | ============ 5 | Installation 6 | ============ 7 | 8 | Preparation 9 | =========== 10 | 11 | You need to edit your :code:`PYTHONPATH` to include the rllab directory: 12 | 13 | .. code-block:: bash 14 | 15 | export PYTHONPATH=path_to_rllab:$PYTHONPATH 16 | 17 | Express Install 18 | =============== 19 | 20 | The fastest way to set up dependencies for rllab is via running the setup script. 21 | 22 | - On Linux, run the following: 23 | 24 | .. code-block:: bash 25 | 26 | ./scripts/setup_linux.sh 27 | 28 | - On Mac OS X, run the following: 29 | 30 | .. code-block:: bash 31 | 32 | ./scripts/setup_osx.sh 33 | 34 | The script sets up a conda environment, which is similar to :code:`virtualenv`. To start using it, run the following: 35 | 36 | .. code-block:: bash 37 | 38 | source activate rllab3 39 | 40 | 41 | Optionally, if you would like to run experiments that depends on the Mujoco environment, you can set it up by running the following command: 42 | 43 | .. code-block:: bash 44 | 45 | ./scripts/setup_mujoco.sh 46 | 47 | and follow the instructions. You need to have the zip file for Mujoco v1.31 and the license file ready. 48 | 49 | 50 | 51 | Manual Install 52 | ============== 53 | 54 | Anaconda 55 | ------------ 56 | 57 | :code:`rllab` assumes that you are using Anaconda Python distribution. You can download it from `https://www.continuum.io/downloads`. Make sure to download the installer for Python 2.7. 58 | 59 | 60 | System dependencies for pygame 61 | ------------------------------ 62 | 63 | A few environments in rllab are implemented using Box2D, which uses pygame for visualization. 64 | It requires a few system dependencies to be installed first. 65 | 66 | On Linux, run the following: 67 | 68 | .. code-block:: bash 69 | 70 | sudo apt-get install swig 71 | sudo apt-get build-dep python-pygame 72 | 73 | On Mac OS X, run the following: 74 | 75 | .. code-block:: bash 76 | 77 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi 78 | 79 | System dependencies for scipy 80 | ----------------------------- 81 | 82 | This step is only needed under Linux: 83 | 84 | .. code-block:: bash 85 | 86 | sudo apt-get install build-dep python-scipy 87 | 88 | Install Python modules 89 | ---------------------- 90 | 91 | .. code-block:: bash 92 | 93 | conda env create -f environment.yml 94 | -------------------------------------------------------------------------------- /rllab/environment.yml: -------------------------------------------------------------------------------- 1 | name: rllab3 2 | channels: 3 | - https://conda.anaconda.org/kne 4 | - https://conda.anaconda.org/tlatorre 5 | - https://conda.anaconda.org/cjs14 6 | - https://conda.anaconda.org/menpo 7 | - jjhelmus 8 | - soumith 9 | dependencies: 10 | - python==3.5.2 11 | - numpy==1.12.0 12 | - scipy 13 | - path.py 14 | - python-dateutil 15 | - joblib==0.10.3 16 | - mako 17 | - ipywidgets 18 | - numba 19 | - flask 20 | - pybox2d 21 | - pygame 22 | - h5py 23 | - matplotlib 24 | - opencv3=3.1.0 25 | - scikit-learn 26 | - pytorch==0.1.9 27 | - torchvision==0.1.6 28 | - mpi4py 29 | - pandas 30 | - pip: 31 | - Pillow 32 | - atari-py 33 | - pyprind 34 | - ipdb 35 | - boto3 36 | - PyOpenGL 37 | - nose2 38 | - pyzmq 39 | - tqdm 40 | - msgpack-python 41 | - git+https://github.com/inksci/mujoco-py-v0.5.7.git 42 | # - mujoco-py==1.50.1.68 43 | - cached_property 44 | - line_profiler 45 | - cloudpickle 46 | - Cython 47 | - redis 48 | - keras==1.2.1 49 | - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano 50 | - git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne 51 | - git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly 52 | - awscli 53 | - git+https://github.com/openai/gym.git@v0.7.4#egg=gym 54 | - pyglet 55 | - git+https://github.com/neocxi/prettytensor.git 56 | - jupyter 57 | - progressbar2 58 | - chainer==1.18.0 59 | - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp35-cp35m-linux_x86_64.whl; 'linux' in sys_platform 60 | - https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow_gpu-1.7.0-py3-none-any.whl; sys_platform == 'darwin' 61 | - numpy-stl==2.2.0 62 | - nibabel==2.1.0 63 | - pylru==1.0.9 64 | - hyperopt 65 | - polling 66 | -------------------------------------------------------------------------------- /rllab/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/examples/__init__.py -------------------------------------------------------------------------------- /rllab/examples/cluster_demo.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import stub, run_experiment_lite 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | import sys 8 | 9 | 10 | def run_task(v): 11 | env = normalize(CartpoleEnv()) 12 | 13 | policy = GaussianMLPPolicy( 14 | env_spec=env.spec, 15 | # The neural network policy should have two hidden layers, each with 32 hidden units. 16 | hidden_sizes=(32, 32) 17 | ) 18 | 19 | baseline = LinearFeatureBaseline(env_spec=env.spec) 20 | 21 | algo = TRPO( 22 | env=env, 23 | policy=policy, 24 | baseline=baseline, 25 | batch_size=4000, 26 | max_path_length=100, 27 | n_itr=40, 28 | discount=0.99, 29 | step_size=v["step_size"], 30 | # Uncomment both lines (this and the plot parameter below) to enable plotting 31 | # plot=True, 32 | ) 33 | algo.train() 34 | 35 | 36 | for step_size in [0.01, 0.05, 0.1]: 37 | for seed in [1, 11, 21, 31, 41]: 38 | run_experiment_lite( 39 | run_task, 40 | exp_prefix="first_exp", 41 | # Number of parallel workers for sampling 42 | n_parallel=1, 43 | # Only keep the snapshot parameters for the last iteration 44 | snapshot_mode="last", 45 | # Specifies the seed for the experiment. If this is not provided, a random seed 46 | # will be used 47 | seed=seed, 48 | # mode="local", 49 | mode="ec2", 50 | variant=dict(step_size=step_size, seed=seed) 51 | # plot=True, 52 | # terminate_machine=False, 53 | ) 54 | sys.exit() 55 | -------------------------------------------------------------------------------- /rllab/examples/cluster_gym_mujoco_demo.py: -------------------------------------------------------------------------------- 1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 2 | from rllab.envs.normalized_env import normalize 3 | from sandbox.rocky.tf.envs.base import TfEnv 4 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy 5 | from sandbox.rocky.tf.algos.trpo import TRPO 6 | from rllab.misc.instrument import run_experiment_lite 7 | from rllab.envs.gym_env import GymEnv 8 | import sys 9 | 10 | from rllab.misc.instrument import VariantGenerator, variant 11 | 12 | 13 | class VG(VariantGenerator): 14 | 15 | @variant 16 | def step_size(self): 17 | return [0.01, 0.05, 0.1] 18 | 19 | @variant 20 | def seed(self): 21 | return [1, 11, 21, 31, 41] 22 | 23 | 24 | def run_task(vv): 25 | 26 | env = TfEnv(normalize(GymEnv('HalfCheetah-v1', record_video=False, record_log=False))) 27 | 28 | policy = GaussianMLPPolicy( 29 | env_spec=env.spec, 30 | # The neural network policy should have two hidden layers, each with 32 hidden units. 31 | hidden_sizes=(32, 32), 32 | name="policy" 33 | ) 34 | 35 | baseline = LinearFeatureBaseline(env_spec=env.spec) 36 | 37 | algo = TRPO( 38 | env=env, 39 | policy=policy, 40 | baseline=baseline, 41 | batch_size=4000, 42 | max_path_length=100, 43 | n_itr=40, 44 | discount=0.99, 45 | step_size=vv["step_size"], 46 | # Uncomment both lines (this and the plot parameter below) to enable plotting 47 | # plot=True, 48 | ) 49 | algo.train() 50 | 51 | 52 | variants = VG().variants() 53 | 54 | for v in variants: 55 | 56 | run_experiment_lite( 57 | run_task, 58 | exp_prefix="first_exp", 59 | # Number of parallel workers for sampling 60 | n_parallel=1, 61 | # Only keep the snapshot parameters for the last iteration 62 | snapshot_mode="last", 63 | # Specifies the seed for the experiment. If this is not provided, a random seed 64 | # will be used 65 | seed=v["seed"], 66 | # mode="local", 67 | mode="ec2", 68 | variant=v, 69 | # plot=True, 70 | # terminate_machine=False, 71 | ) 72 | sys.exit() 73 | -------------------------------------------------------------------------------- /rllab/examples/ddpg_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.ddpg import DDPG 2 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 3 | from rllab.envs.normalized_env import normalize 4 | from rllab.misc.instrument import run_experiment_lite 5 | from rllab.exploration_strategies.ou_strategy import OUStrategy 6 | from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy 7 | from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction 8 | 9 | 10 | def run_task(*_): 11 | env = normalize(CartpoleEnv()) 12 | 13 | policy = DeterministicMLPPolicy( 14 | env_spec=env.spec, 15 | # The neural network policy should have two hidden layers, each with 32 hidden units. 16 | hidden_sizes=(32, 32) 17 | ) 18 | 19 | es = OUStrategy(env_spec=env.spec) 20 | 21 | qf = ContinuousMLPQFunction(env_spec=env.spec) 22 | 23 | algo = DDPG( 24 | env=env, 25 | policy=policy, 26 | es=es, 27 | qf=qf, 28 | batch_size=32, 29 | max_path_length=100, 30 | epoch_length=1000, 31 | min_pool_size=10000, 32 | n_epochs=1000, 33 | discount=0.99, 34 | scale_reward=0.01, 35 | qf_learning_rate=1e-3, 36 | policy_learning_rate=1e-4, 37 | # Uncomment both lines (this and the plot parameter below) to enable plotting 38 | # plot=True, 39 | ) 40 | algo.train() 41 | 42 | run_experiment_lite( 43 | run_task, 44 | # Number of parallel workers for sampling 45 | n_parallel=1, 46 | # Only keep the snapshot parameters for the last iteration 47 | snapshot_mode="last", 48 | # Specifies the seed for the experiment. If this is not provided, a random seed 49 | # will be used 50 | seed=1, 51 | # plot=True, 52 | ) 53 | -------------------------------------------------------------------------------- /rllab/examples/nop_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.nop import NOP 2 | from rllab.baselines.zero_baseline import ZeroBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.uniform_control_policy import UniformControlPolicy 6 | 7 | env = normalize(CartpoleEnv()) 8 | 9 | policy = UniformControlPolicy( 10 | env_spec=env.spec, 11 | # The neural network policy should have two hidden layers, each with 32 hidden units. 12 | ) 13 | 14 | baseline = ZeroBaseline(env_spec=env.spec) 15 | 16 | algo = NOP( 17 | env=env, 18 | policy=policy, 19 | baseline=baseline, 20 | batch_size=4000, 21 | max_path_length=100, 22 | n_itr=40, 23 | discount=0.99, 24 | step_size=0.01, 25 | ) 26 | algo.train() 27 | -------------------------------------------------------------------------------- /rllab/examples/point_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.base import Env 2 | from rllab.spaces import Box 3 | from rllab.envs.base import Step 4 | import numpy as np 5 | 6 | 7 | class PointEnv(Env): 8 | @property 9 | def observation_space(self): 10 | return Box(low=-np.inf, high=np.inf, shape=(2,)) 11 | 12 | @property 13 | def action_space(self): 14 | return Box(low=-0.1, high=0.1, shape=(2,)) 15 | 16 | def reset(self): 17 | self._state = np.random.uniform(-1, 1, size=(2,)) 18 | observation = np.copy(self._state) 19 | return observation 20 | 21 | def step(self, action): 22 | self._state = self._state + action 23 | x, y = self._state 24 | reward = - (x ** 2 + y ** 2) ** 0.5 25 | done = abs(x) < 0.01 and abs(y) < 0.01 26 | next_observation = np.copy(self._state) 27 | return Step(observation=next_observation, reward=reward, done=done) 28 | 29 | def render(self): 30 | print('current state:', self._state) 31 | -------------------------------------------------------------------------------- /rllab/examples/trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | 7 | env = normalize(CartpoleEnv()) 8 | 9 | policy = GaussianMLPPolicy( 10 | env_spec=env.spec, 11 | # The neural network policy should have two hidden layers, each with 32 hidden units. 12 | hidden_sizes=(32, 32) 13 | ) 14 | 15 | baseline = LinearFeatureBaseline(env_spec=env.spec) 16 | 17 | algo = TRPO( 18 | env=env, 19 | policy=policy, 20 | baseline=baseline, 21 | batch_size=4000, 22 | max_path_length=100, 23 | n_itr=40, 24 | discount=0.99, 25 | step_size=0.01, 26 | ) 27 | algo.train() 28 | -------------------------------------------------------------------------------- /rllab/examples/trpo_cartpole_pickled.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | env = normalize(CartpoleEnv()) 11 | 12 | policy = GaussianMLPPolicy( 13 | env_spec=env.spec, 14 | # The neural network policy should have two hidden layers, each with 32 hidden units. 15 | hidden_sizes=(32, 32) 16 | ) 17 | 18 | baseline = LinearFeatureBaseline(env_spec=env.spec) 19 | 20 | algo = TRPO( 21 | env=env, 22 | policy=policy, 23 | baseline=baseline, 24 | batch_size=4000, 25 | max_path_length=100, 26 | n_itr=1000, 27 | discount=0.99, 28 | step_size=0.01, 29 | # Uncomment both lines (this and the plot parameter below) to enable plotting 30 | #plot=True 31 | ) 32 | algo.train() 33 | 34 | 35 | run_experiment_lite( 36 | run_task, 37 | # Number of parallel workers for sampling 38 | n_parallel=2, 39 | # Only keep the snapshot parameters for the last iteration 40 | snapshot_mode="last", 41 | # Specifies the seed for the experiment. If this is not provided, a random seed 42 | # will be used 43 | seed=1, 44 | #plot=True 45 | ) 46 | -------------------------------------------------------------------------------- /rllab/examples/trpo_cartpole_recurrent.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_gru_policy import GaussianGRUPolicy 6 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp 7 | from rllab.misc.instrument import run_experiment_lite 8 | 9 | 10 | def run_task(*_): 11 | env = normalize(CartpoleEnv()) 12 | 13 | policy = GaussianGRUPolicy( 14 | env_spec=env.spec, 15 | ) 16 | 17 | baseline = LinearFeatureBaseline(env_spec=env.spec) 18 | 19 | algo = TRPO( 20 | env=env, 21 | policy=policy, 22 | baseline=baseline, 23 | batch_size=4000, 24 | max_path_length=100, 25 | n_itr=10, 26 | discount=0.99, 27 | step_size=0.01, 28 | optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)) 29 | ) 30 | algo.train() 31 | 32 | 33 | run_experiment_lite( 34 | run_task, 35 | n_parallel=1, 36 | seed=1, 37 | ) 38 | -------------------------------------------------------------------------------- /rllab/examples/trpo_gym_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | # Please note that different environments with different action spaces may 11 | # require different policies. For example with a Discrete action space, a 12 | # CategoricalMLPPolicy works, but for a Box action space may need to use 13 | # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example) 14 | env = normalize(GymEnv("CartPole-v0")) 15 | 16 | policy = CategoricalMLPPolicy( 17 | env_spec=env.spec, 18 | # The neural network policy should have two hidden layers, each with 32 hidden units. 19 | hidden_sizes=(32, 32) 20 | ) 21 | 22 | baseline = LinearFeatureBaseline(env_spec=env.spec) 23 | 24 | algo = TRPO( 25 | env=env, 26 | policy=policy, 27 | baseline=baseline, 28 | batch_size=4000, 29 | max_path_length=env.horizon, 30 | n_itr=50, 31 | discount=0.99, 32 | step_size=0.01, 33 | # Uncomment both lines (this and the plot parameter below) to enable plotting 34 | # plot=True, 35 | ) 36 | algo.train() 37 | 38 | 39 | run_experiment_lite( 40 | run_task, 41 | # Number of parallel workers for sampling 42 | n_parallel=1, 43 | # Only keep the snapshot parameters for the last iteration 44 | snapshot_mode="last", 45 | # Specifies the seed for the experiment. If this is not provided, a random seed 46 | # will be used 47 | seed=1, 48 | # plot=True, 49 | ) 50 | -------------------------------------------------------------------------------- /rllab/examples/trpo_gym_pendulum.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.gym_env import GymEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.misc.instrument import run_experiment_lite 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | 8 | 9 | def run_task(*_): 10 | # Please note that different environments with different action spaces may require different 11 | # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete 12 | # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example) 13 | env = normalize(GymEnv("Pendulum-v0")) 14 | 15 | policy = GaussianMLPPolicy( 16 | env_spec=env.spec, 17 | # The neural network policy should have two hidden layers, each with 32 hidden units. 18 | hidden_sizes=(32, 32) 19 | ) 20 | 21 | baseline = LinearFeatureBaseline(env_spec=env.spec) 22 | 23 | algo = TRPO( 24 | env=env, 25 | policy=policy, 26 | baseline=baseline, 27 | batch_size=4000, 28 | max_path_length=env.horizon, 29 | n_itr=50, 30 | discount=0.99, 31 | step_size=0.01, 32 | # Uncomment both lines (this and the plot parameter below) to enable plotting 33 | # plot=True, 34 | ) 35 | algo.train() 36 | 37 | 38 | run_experiment_lite( 39 | run_task, 40 | # Number of parallel workers for sampling 41 | n_parallel=1, 42 | # Only keep the snapshot parameters for the last iteration 43 | snapshot_mode="last", 44 | # Specifies the seed for the experiment. If this is not provided, a random seed 45 | # will be used 46 | seed=1, 47 | # plot=True, 48 | ) 49 | -------------------------------------------------------------------------------- /rllab/examples/trpo_gym_tf_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 2 | from rllab.envs.gym_env import GymEnv 3 | from rllab.envs.normalized_env import normalize 4 | from rllab.misc.instrument import stub, run_experiment_lite 5 | 6 | from sandbox.rocky.tf.envs.base import TfEnv 7 | from sandbox.rocky.tf.policies.categorical_mlp_policy import CategoricalMLPPolicy 8 | from sandbox.rocky.tf.algos.trpo import TRPO 9 | 10 | stub(globals()) 11 | 12 | # Need to wrap in a tf environment and force_reset to true 13 | # see https://github.com/openai/rllab/issues/87#issuecomment-282519288 14 | env = TfEnv(normalize(GymEnv("CartPole-v0", force_reset=True))) 15 | 16 | policy = CategoricalMLPPolicy( 17 | name="policy", 18 | env_spec=env.spec, 19 | # The neural network policy should have two hidden layers, each with 32 hidden units. 20 | hidden_sizes=(32, 32) 21 | ) 22 | 23 | baseline = LinearFeatureBaseline(env_spec=env.spec) 24 | 25 | algo = TRPO( 26 | env=env, 27 | policy=policy, 28 | baseline=baseline, 29 | batch_size=4000, 30 | max_path_length=200, 31 | n_itr=120, 32 | discount=0.99, 33 | step_size=0.01, 34 | # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)) 35 | ) 36 | 37 | run_experiment_lite( 38 | algo.train(), 39 | n_parallel=1, 40 | snapshot_mode="last", 41 | seed=1 42 | ) 43 | -------------------------------------------------------------------------------- /rllab/examples/trpo_point.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from examples.point_env import PointEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | 7 | env = normalize(PointEnv()) 8 | policy = GaussianMLPPolicy( 9 | env_spec=env.spec, 10 | ) 11 | baseline = LinearFeatureBaseline(env_spec=env.spec) 12 | algo = TRPO( 13 | env=env, 14 | policy=policy, 15 | baseline=baseline, 16 | ) 17 | algo.train() 18 | -------------------------------------------------------------------------------- /rllab/examples/trpo_swimmer.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | 7 | env = normalize(SwimmerEnv()) 8 | 9 | policy = GaussianMLPPolicy( 10 | env_spec=env.spec, 11 | # The neural network policy should have two hidden layers, each with 32 hidden units. 12 | hidden_sizes=(32, 32) 13 | ) 14 | 15 | baseline = LinearFeatureBaseline(env_spec=env.spec) 16 | 17 | algo = TRPO( 18 | env=env, 19 | policy=policy, 20 | baseline=baseline, 21 | batch_size=4000, 22 | max_path_length=500, 23 | n_itr=40, 24 | discount=0.99, 25 | step_size=0.01, 26 | ) 27 | algo.train() 28 | -------------------------------------------------------------------------------- /rllab/rllab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/algos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/algos/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/algos/base.py: -------------------------------------------------------------------------------- 1 | class Algorithm(object): 2 | pass 3 | 4 | 5 | class RLAlgorithm(Algorithm): 6 | 7 | def train(self): 8 | raise NotImplementedError 9 | -------------------------------------------------------------------------------- /rllab/rllab/algos/erwr.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.vpg import VPG 2 | from rllab.optimizers.lbfgs_optimizer import LbfgsOptimizer 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class ERWR(VPG, Serializable): 7 | """ 8 | Episodic Reward Weighted Regression [1]_ 9 | 10 | Notes 11 | ----- 12 | This does not implement the original RwR [2]_ that deals with "immediate reward problems" since 13 | it doesn't find solutions that optimize for temporally delayed rewards. 14 | 15 | .. [1] Kober, Jens, and Jan R. Peters. "Policy search for motor primitives in robotics." Advances in neural information processing systems. 2009. 16 | .. [2] Peters, Jan, and Stefan Schaal. "Using reward-weighted regression for reinforcement learning of task space control." Approximate Dynamic Programming and Reinforcement Learning, 2007. ADPRL 2007. IEEE International Symposium on. IEEE, 2007. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | optimizer=None, 22 | optimizer_args=None, 23 | positive_adv=None, 24 | **kwargs): 25 | Serializable.quick_init(self, locals()) 26 | if optimizer is None: 27 | if optimizer_args is None: 28 | optimizer_args = dict() 29 | optimizer = LbfgsOptimizer(**optimizer_args) 30 | super(ERWR, self).__init__( 31 | optimizer=optimizer, 32 | positive_adv=True if positive_adv is None else positive_adv, 33 | **kwargs 34 | ) 35 | 36 | -------------------------------------------------------------------------------- /rllab/rllab/algos/nop.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.batch_polopt import BatchPolopt 2 | from rllab.misc.overrides import overrides 3 | 4 | 5 | class NOP(BatchPolopt): 6 | """ 7 | NOP (no optimization performed) policy search algorithm 8 | """ 9 | 10 | def __init__( 11 | self, 12 | **kwargs): 13 | super(NOP, self).__init__(**kwargs) 14 | 15 | @overrides 16 | def init_opt(self): 17 | pass 18 | 19 | @overrides 20 | def optimize_policy(self, itr, samples_data): 21 | pass 22 | 23 | @overrides 24 | def get_itr_snapshot(self, itr, samples_data): 25 | return dict() 26 | -------------------------------------------------------------------------------- /rllab/rllab/algos/ppo.py: -------------------------------------------------------------------------------- 1 | from rllab.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer 2 | from rllab.algos.npo import NPO 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class PPO(NPO, Serializable): 7 | """ 8 | Penalized Policy Optimization. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | Serializable.quick_init(self, locals()) 17 | if optimizer is None: 18 | if optimizer_args is None: 19 | optimizer_args = dict() 20 | optimizer = PenaltyLbfgsOptimizer(**optimizer_args) 21 | super(PPO, self).__init__(optimizer=optimizer, **kwargs) 22 | -------------------------------------------------------------------------------- /rllab/rllab/algos/tnpg.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.npo import NPO 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | from rllab.misc import ext 4 | 5 | 6 | class TNPG(NPO): 7 | """ 8 | Truncated Natural Policy Gradient. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | if optimizer is None: 17 | default_args = dict(max_backtracks=1) 18 | if optimizer_args is None: 19 | optimizer_args = default_args 20 | else: 21 | optimizer_args = dict(default_args, **optimizer_args) 22 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 23 | super(TNPG, self).__init__(optimizer=optimizer, **kwargs) 24 | -------------------------------------------------------------------------------- /rllab/rllab/algos/trpo.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.npo import NPO 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class TRPO(NPO): 7 | """ 8 | Trust Region Policy Optimization 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | if optimizer is None: 17 | if optimizer_args is None: 18 | optimizer_args = dict() 19 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 20 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs) 21 | -------------------------------------------------------------------------------- /rllab/rllab/baselines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/baselines/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/baselines/base.py: -------------------------------------------------------------------------------- 1 | from rllab.misc import autoargs 2 | 3 | 4 | class Baseline(object): 5 | 6 | def __init__(self, env_spec): 7 | self._mdp_spec = env_spec 8 | 9 | @property 10 | def algorithm_parallelized(self): 11 | return False 12 | 13 | def get_param_values(self): 14 | raise NotImplementedError 15 | 16 | def set_param_values(self, val): 17 | raise NotImplementedError 18 | 19 | def fit(self, paths): 20 | raise NotImplementedError 21 | 22 | def predict(self, path): 23 | raise NotImplementedError 24 | 25 | @classmethod 26 | @autoargs.add_args 27 | def add_args(cls, parser): 28 | pass 29 | 30 | @classmethod 31 | @autoargs.new_from_args 32 | def new_from_args(cls, args, mdp): 33 | pass 34 | 35 | def log_diagnostics(self, paths): 36 | """ 37 | Log extra information per iteration based on the collected paths 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /rllab/rllab/baselines/gaussian_conv_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.misc.overrides import overrides 5 | from rllab.core.parameterized import Parameterized 6 | from rllab.baselines.base import Baseline 7 | from rllab.regressors.gaussian_conv_regressor import GaussianConvRegressor 8 | 9 | 10 | class GaussianConvBaseline(Baseline, Parameterized): 11 | 12 | def __init__( 13 | self, 14 | env_spec, 15 | subsample_factor=1., 16 | regressor_args=None, 17 | ): 18 | Serializable.quick_init(self, locals()) 19 | super(GaussianConvBaseline, self).__init__(env_spec) 20 | if regressor_args is None: 21 | regressor_args = dict() 22 | 23 | self._regressor = GaussianConvRegressor( 24 | input_shape=env_spec.observation_space.shape, 25 | output_dim=1, 26 | name="vf", 27 | **regressor_args 28 | ) 29 | 30 | @overrides 31 | def fit(self, paths): 32 | observations = np.concatenate([p["observations"] for p in paths]) 33 | returns = np.concatenate([p["returns"] for p in paths]) 34 | self._regressor.fit(observations, returns.reshape((-1, 1))) 35 | 36 | @overrides 37 | def predict(self, path): 38 | return self._regressor.predict(path["observations"]).flatten() 39 | 40 | @overrides 41 | def get_param_values(self, **tags): 42 | return self._regressor.get_param_values(**tags) 43 | 44 | @overrides 45 | def set_param_values(self, flattened_params, **tags): 46 | self._regressor.set_param_values(flattened_params, **tags) 47 | -------------------------------------------------------------------------------- /rllab/rllab/baselines/gaussian_mlp_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.core.parameterized import Parameterized 5 | from rllab.baselines.base import Baseline 6 | from rllab.misc.overrides import overrides 7 | from rllab.regressors.gaussian_mlp_regressor import GaussianMLPRegressor 8 | 9 | 10 | class GaussianMLPBaseline(Baseline, Parameterized): 11 | 12 | def __init__( 13 | self, 14 | env_spec, 15 | subsample_factor=1., 16 | num_seq_inputs=1, 17 | regressor_args=None, 18 | ): 19 | Serializable.quick_init(self, locals()) 20 | super(GaussianMLPBaseline, self).__init__(env_spec) 21 | if regressor_args is None: 22 | regressor_args = dict() 23 | 24 | self._regressor = GaussianMLPRegressor( 25 | input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,), 26 | output_dim=1, 27 | name="vf", 28 | **regressor_args 29 | ) 30 | 31 | @overrides 32 | def fit(self, paths): 33 | observations = np.concatenate([p["observations"] for p in paths]) 34 | returns = np.concatenate([p["returns"] for p in paths]) 35 | self._regressor.fit(observations, returns.reshape((-1, 1))) 36 | 37 | @overrides 38 | def predict(self, path): 39 | return self._regressor.predict(path["observations"]).flatten() 40 | 41 | @overrides 42 | def get_param_values(self, **tags): 43 | return self._regressor.get_param_values(**tags) 44 | 45 | @overrides 46 | def set_param_values(self, flattened_params, **tags): 47 | self._regressor.set_param_values(flattened_params, **tags) 48 | -------------------------------------------------------------------------------- /rllab/rllab/baselines/linear_feature_baseline.py: -------------------------------------------------------------------------------- 1 | from rllab.baselines.base import Baseline 2 | from rllab.misc.overrides import overrides 3 | import numpy as np 4 | 5 | 6 | class LinearFeatureBaseline(Baseline): 7 | def __init__(self, env_spec, reg_coeff=1e-5): 8 | self._coeffs = None 9 | self._reg_coeff = reg_coeff 10 | 11 | @overrides 12 | def get_param_values(self, **tags): 13 | return self._coeffs 14 | 15 | @overrides 16 | def set_param_values(self, val, **tags): 17 | self._coeffs = val 18 | 19 | def _features(self, path): 20 | o = np.clip(path["observations"], -10, 10) 21 | l = len(path["rewards"]) 22 | al = np.arange(l).reshape(-1, 1) / 100.0 23 | return np.concatenate([o, o ** 2, al, al ** 2, al ** 3, np.ones((l, 1))], axis=1) 24 | 25 | @overrides 26 | def fit(self, paths): 27 | featmat = np.concatenate([self._features(path) for path in paths]) 28 | returns = np.concatenate([path["returns"] for path in paths]) 29 | reg_coeff = self._reg_coeff 30 | for _ in range(5): 31 | self._coeffs = np.linalg.lstsq( 32 | featmat.T.dot(featmat) + reg_coeff * np.identity(featmat.shape[1]), 33 | featmat.T.dot(returns) 34 | )[0] 35 | if not np.any(np.isnan(self._coeffs)): 36 | break 37 | reg_coeff *= 10 38 | 39 | @overrides 40 | def predict(self, path): 41 | if self._coeffs is None: 42 | return np.zeros(len(path["rewards"])) 43 | return self._features(path).dot(self._coeffs) 44 | -------------------------------------------------------------------------------- /rllab/rllab/baselines/zero_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rllab.baselines.base import Baseline 3 | from rllab.misc.overrides import overrides 4 | 5 | 6 | class ZeroBaseline(Baseline): 7 | 8 | def __init__(self, env_spec): 9 | pass 10 | 11 | @overrides 12 | def get_param_values(self, **kwargs): 13 | return None 14 | 15 | @overrides 16 | def set_param_values(self, val, **kwargs): 17 | pass 18 | 19 | @overrides 20 | def fit(self, paths): 21 | pass 22 | 23 | @overrides 24 | def predict(self, path): 25 | return np.zeros_like(path["rewards"]) 26 | -------------------------------------------------------------------------------- /rllab/rllab/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import os 3 | 4 | PROJECT_PATH = osp.abspath(osp.join(osp.dirname(__file__), '..')) 5 | 6 | LOG_DIR = PROJECT_PATH + "/data" 7 | 8 | USE_TF = False 9 | 10 | DOCKER_IMAGE = "DOCKER_IMAGE" 11 | 12 | DOCKERFILE_PATH = "/path/to/Dockerfile" 13 | 14 | KUBE_PREFIX = "rllab_" 15 | 16 | DOCKER_LOG_DIR = "/tmp/expt" 17 | 18 | POD_DIR = PROJECT_PATH + "/.pods" 19 | 20 | AWS_S3_PATH = None 21 | 22 | AWS_IMAGE_ID = None 23 | 24 | AWS_INSTANCE_TYPE = "m4.xlarge" 25 | 26 | AWS_KEY_NAME = "AWS_KEY_NAME" 27 | 28 | AWS_SPOT = True 29 | 30 | AWS_SPOT_PRICE = '1.0' 31 | 32 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", None) 33 | 34 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", None) 35 | 36 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab" 37 | 38 | AWS_SECURITY_GROUPS = ["rllab"] 39 | 40 | AWS_SECURITY_GROUP_IDS = [] 41 | 42 | AWS_NETWORK_INTERFACES = [] 43 | 44 | AWS_EXTRA_CONFIGS = dict() 45 | 46 | AWS_REGION_NAME = "us-east-1" 47 | 48 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*.pod/*"] 49 | 50 | DOCKER_CODE_DIR = "/root/code/rllab" 51 | 52 | AWS_CODE_SYNC_S3_PATH = "s3://to/be/overriden/in/personal" 53 | 54 | # whether to use fast code sync 55 | FAST_CODE_SYNC = True 56 | 57 | FAST_CODE_SYNC_IGNORES = [".git", "data", ".pods"] 58 | 59 | KUBE_DEFAULT_RESOURCES = { 60 | "requests": { 61 | "cpu": 0.8, 62 | } 63 | } 64 | 65 | KUBE_DEFAULT_NODE_SELECTOR = { 66 | "aws/type": "m4.xlarge", 67 | } 68 | 69 | MUJOCO_KEY_PATH = osp.expanduser("~/.mujoco") 70 | # MUJOCO_KEY_PATH = osp.join(osp.dirname(__file__), "../vendor/mujoco") 71 | 72 | ENV = {} 73 | 74 | EBS_OPTIMIZED = True 75 | 76 | if osp.exists(osp.join(osp.dirname(__file__), "config_personal.py")): 77 | from .config_personal import * 78 | else: 79 | print("Creating your personal config from template...") 80 | from shutil import copy 81 | copy(osp.join(PROJECT_PATH, "rllab/config_personal_template.py"), osp.join(PROJECT_PATH, "rllab/config_personal.py")) 82 | from .config_personal import * 83 | print("Personal config created, but you should probably edit it before further experiments " \ 84 | "are run") 85 | if 'CIRCLECI' not in os.environ: 86 | print("Exiting.") 87 | import sys; sys.exit(0) 88 | 89 | LABEL = "" 90 | -------------------------------------------------------------------------------- /rllab/rllab/config_personal_template.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | USE_GPU = False 4 | 5 | DOCKER_IMAGE = "dementrock/rllab3-shared" 6 | 7 | KUBE_PREFIX = "template_" 8 | 9 | DOCKER_LOG_DIR = "/tmp/expt" 10 | 11 | AWS_IMAGE_ID = "ami-67c5d00d" 12 | 13 | if USE_GPU: 14 | AWS_INSTANCE_TYPE = "g2.2xlarge" 15 | else: 16 | AWS_INSTANCE_TYPE = "c4.2xlarge" 17 | 18 | AWS_KEY_NAME = "research_virginia" 19 | 20 | AWS_SPOT = True 21 | 22 | AWS_SPOT_PRICE = '10.0' 23 | 24 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab" 25 | 26 | AWS_SECURITY_GROUPS = ["rllab"] 27 | 28 | AWS_REGION_NAME = "us-west-2" 29 | 30 | AWS_CODE_SYNC_S3_PATH = "e" 31 | 32 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*src/*", 33 | "*.pods/*", "*tests/*", "*examples/*", "docs/*"] 34 | 35 | LOCAL_CODE_DIR = "" 36 | 37 | AWS_S3_PATH = "" 38 | 39 | LABEL = "template" 40 | 41 | DOCKER_CODE_DIR = "/root/code/rllab" 42 | 43 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", "") 44 | 45 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", "") 46 | -------------------------------------------------------------------------------- /rllab/rllab/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/core/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/core/lasagne_powered.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | from rllab.misc.overrides import overrides 3 | import lasagne.layers as L 4 | 5 | 6 | class LasagnePowered(Parameterized): 7 | def __init__(self, output_layers): 8 | self._output_layers = output_layers 9 | super(LasagnePowered, self).__init__() 10 | 11 | @property 12 | def output_layers(self): 13 | return self._output_layers 14 | 15 | @overrides 16 | def get_params_internal(self, **tags): # this gives ALL the vars (not the params values) 17 | return L.get_all_params( # this lasagne function also returns all var below the passed layers 18 | L.concat(self._output_layers), 19 | **tags 20 | ) 21 | -------------------------------------------------------------------------------- /rllab/rllab/core/serializable.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | import sys 3 | 4 | 5 | class Serializable(object): 6 | 7 | def __init__(self, *args, **kwargs): 8 | self.__args = args 9 | self.__kwargs = kwargs 10 | 11 | def quick_init(self, locals_): 12 | if getattr(self, "_serializable_initialized", False): 13 | return 14 | if sys.version_info >= (3, 0): 15 | spec = inspect.getfullargspec(self.__init__) 16 | # Exclude the first "self" parameter 17 | if spec.varkw: 18 | kwargs = locals_[spec.varkw] 19 | else: 20 | kwargs = dict() 21 | else: 22 | spec = inspect.getargspec(self.__init__) 23 | if spec.keywords: 24 | kwargs = locals_[spec.keywords] 25 | else: 26 | kwargs = dict() 27 | if spec.varargs: 28 | varargs = locals_[spec.varargs] 29 | else: 30 | varargs = tuple() 31 | in_order_args = [locals_[arg] for arg in spec.args][1:] 32 | self.__args = tuple(in_order_args) + varargs 33 | self.__kwargs = kwargs 34 | setattr(self, "_serializable_initialized", True) 35 | 36 | def __getstate__(self): 37 | return {"__args": self.__args, "__kwargs": self.__kwargs} 38 | 39 | def __setstate__(self, d): 40 | out = type(self)(*d["__args"], **d["__kwargs"]) 41 | self.__dict__.update(out.__dict__) 42 | 43 | @classmethod 44 | def clone(cls, obj, **kwargs): 45 | assert isinstance(obj, Serializable) 46 | d = obj.__getstate__() 47 | 48 | # Split the entries in kwargs between positional and keyword arguments 49 | # and update d['__args'] and d['__kwargs'], respectively. 50 | if sys.version_info >= (3, 0): 51 | spec = inspect.getfullargspec(obj.__init__) 52 | else: 53 | spec = inspect.getargspec(obj.__init__) 54 | in_order_args = spec.args[1:] 55 | 56 | d["__args"] = list(d["__args"]) 57 | for kw, val in kwargs.items(): 58 | if kw in in_order_args: 59 | d["__args"][in_order_args.index(kw)] = val 60 | else: 61 | d["__kwargs"][kw] = val 62 | 63 | out = type(obj).__new__(type(obj)) 64 | out.__setstate__(d) 65 | return out 66 | -------------------------------------------------------------------------------- /rllab/rllab/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/distributions/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/distributions/base.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as TT 2 | 3 | class Distribution(object): 4 | 5 | @property 6 | def dim(self): 7 | raise NotImplementedError 8 | 9 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 10 | """ 11 | Compute the symbolic KL divergence of two distributions 12 | """ 13 | raise NotImplementedError 14 | 15 | def kl(self, old_dist_info, new_dist_info): 16 | """ 17 | Compute the KL divergence of two distributions 18 | """ 19 | raise NotImplementedError 20 | 21 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 22 | raise NotImplementedError 23 | 24 | def entropy(self, dist_info): 25 | raise NotImplementedError 26 | 27 | def log_likelihood_sym(self, x_var, dist_info_vars): 28 | raise NotImplementedError 29 | 30 | def likelihood_sym(self, x_var, dist_info_vars): 31 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars)) 32 | 33 | def log_likelihood(self, xs, dist_info): 34 | raise NotImplementedError 35 | 36 | @property 37 | def dist_info_keys(self): 38 | raise NotImplementedError 39 | -------------------------------------------------------------------------------- /rllab/rllab/distributions/bernoulli.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .base import Distribution 4 | import theano.tensor as TT 5 | import numpy as np 6 | 7 | TINY = 1e-8 8 | 9 | 10 | class Bernoulli(Distribution): 11 | def __init__(self, dim): 12 | self._dim = dim 13 | 14 | @property 15 | def dim(self): 16 | return self._dim 17 | 18 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 19 | old_p = old_dist_info_vars["p"] 20 | new_p = new_dist_info_vars["p"] 21 | kl = old_p * (TT.log(old_p + TINY) - TT.log(new_p + TINY)) + \ 22 | (1 - old_p) * (TT.log(1 - old_p + TINY) - TT.log(1 - new_p + TINY)) 23 | return TT.sum(kl, axis=-1) 24 | 25 | def kl(self, old_dist_info, new_dist_info): 26 | old_p = old_dist_info["p"] 27 | new_p = new_dist_info["p"] 28 | kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \ 29 | (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY)) 30 | return np.sum(kl, axis=-1) 31 | 32 | def sample(self, dist_info): 33 | p = np.asarray(dist_info["p"]) 34 | return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p) 35 | 36 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 37 | old_p = old_dist_info_vars["p"] 38 | new_p = new_dist_info_vars["p"] 39 | return TT.prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY), 40 | axis=-1) 41 | 42 | def log_likelihood_sym(self, x_var, dist_info_vars): 43 | p = dist_info_vars["p"] 44 | return TT.sum(x_var * TT.log(p + TINY) + (1 - x_var) * TT.log(1 - p + TINY), axis=-1) 45 | 46 | def log_likelihood(self, xs, dist_info): 47 | p = dist_info["p"] 48 | return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1) 49 | 50 | def entropy(self, dist_info): 51 | p = dist_info["p"] 52 | return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1) 53 | 54 | @property 55 | def dist_info_keys(self): 56 | return ["p"] 57 | -------------------------------------------------------------------------------- /rllab/rllab/distributions/delta.py: -------------------------------------------------------------------------------- 1 | from rllab.distributions.base import Distribution 2 | 3 | class Delta(Distribution): 4 | @property 5 | def dim(self): 6 | return 0 7 | 8 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 9 | return None 10 | 11 | def kl(self, old_dist_info, new_dist_info): 12 | return None 13 | 14 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 15 | raise NotImplementedError 16 | 17 | def entropy(self, dist_info): 18 | raise NotImplementedError 19 | 20 | def log_likelihood_sym(self, x_var, dist_info_vars): 21 | raise NotImplementedError 22 | 23 | def likelihood_sym(self, x_var, dist_info_vars): 24 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars)) 25 | 26 | def log_likelihood(self, xs, dist_info): 27 | return None 28 | 29 | @property 30 | def dist_info_keys(self): 31 | return None 32 | 33 | def entropy(self,dist_info): 34 | return 0 35 | -------------------------------------------------------------------------------- /rllab/rllab/distributions/recurrent_diagonal_gaussian.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as TT 2 | import numpy as np 3 | from rllab.distributions.base import Distribution 4 | from rllab.distributions.diagonal_gaussian import DiagonalGaussian 5 | 6 | RecurrentDiagonalGaussian = DiagonalGaussian 7 | -------------------------------------------------------------------------------- /rllab/rllab/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/box2d/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/cartpole_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rllab.envs.box2d.parser import find_body 3 | 4 | from rllab.core.serializable import Serializable 5 | from rllab.envs.box2d.box2d_env import Box2DEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc.overrides import overrides 8 | 9 | 10 | class CartpoleEnv(Box2DEnv, Serializable): 11 | 12 | @autoargs.inherit(Box2DEnv.__init__) 13 | def __init__(self, *args, **kwargs): 14 | self.max_pole_angle = .2 15 | self.max_cart_pos = 2.4 16 | self.max_cart_speed = 4. 17 | self.max_pole_speed = 4. 18 | self.reset_range = 0.05 19 | super(CartpoleEnv, self).__init__( 20 | self.model_path("cartpole.xml.mako"), 21 | *args, **kwargs 22 | ) 23 | self.cart = find_body(self.world, "cart") 24 | self.pole = find_body(self.world, "pole") 25 | Serializable.__init__(self, *args, **kwargs) 26 | 27 | @overrides 28 | def reset(self): 29 | self._set_state(self.initial_state) 30 | self._invalidate_state_caches() 31 | bounds = np.array([ 32 | self.max_cart_pos, 33 | self.max_cart_speed, 34 | self.max_pole_angle, 35 | self.max_pole_speed 36 | ]) 37 | low, high = -self.reset_range*bounds, self.reset_range*bounds 38 | xpos, xvel, apos, avel = np.random.uniform(low, high) 39 | self.cart.position = (xpos, self.cart.position[1]) 40 | self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1]) 41 | self.pole.angle = apos 42 | self.pole.angularVelocity = avel 43 | return self.get_current_obs() 44 | 45 | @overrides 46 | def compute_reward(self, action): 47 | yield 48 | notdone = 1 - int(self.is_current_done()) 49 | ucost = 1e-5*(action**2).sum() 50 | xcost = 1 - np.cos(self.pole.angle) 51 | yield notdone * 10 - notdone * xcost - notdone * ucost 52 | 53 | @overrides 54 | def is_current_done(self): 55 | return abs(self.cart.position[0]) > self.max_cart_pos or \ 56 | abs(self.pole.angle) > self.max_pole_angle 57 | 58 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/cartpole_swingup_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | from rllab.envs.box2d.parser import find_body 4 | 5 | from rllab.core.serializable import Serializable 6 | from rllab.envs.box2d.box2d_env import Box2DEnv 7 | from rllab.misc import autoargs 8 | from rllab.misc.overrides import overrides 9 | 10 | 11 | # Tornio, Matti, and Tapani Raiko. "Variational Bayesian approach for 12 | # nonlinear identification and control." Proc. of the IFAC Workshop on 13 | # Nonlinear Model Predictive Control for Fast Systems, NMPC FS06. 2006. 14 | class CartpoleSwingupEnv(Box2DEnv, Serializable): 15 | 16 | @autoargs.inherit(Box2DEnv.__init__) 17 | def __init__(self, *args, **kwargs): 18 | super(CartpoleSwingupEnv, self).__init__( 19 | self.model_path("cartpole.xml.mako"), 20 | *args, **kwargs 21 | ) 22 | self.max_cart_pos = 3 23 | self.max_reward_cart_pos = 3 24 | self.cart = find_body(self.world, "cart") 25 | self.pole = find_body(self.world, "pole") 26 | Serializable.__init__(self, *args, **kwargs) 27 | 28 | @overrides 29 | def reset(self): 30 | self._set_state(self.initial_state) 31 | self._invalidate_state_caches() 32 | bounds = np.array([ 33 | [-1, -2, np.pi-1, -3], 34 | [1, 2, np.pi+1, 3], 35 | ]) 36 | low, high = bounds 37 | xpos, xvel, apos, avel = np.random.uniform(low, high) 38 | self.cart.position = (xpos, self.cart.position[1]) 39 | self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1]) 40 | self.pole.angle = apos 41 | self.pole.angularVelocity = avel 42 | return self.get_current_obs() 43 | 44 | @overrides 45 | def compute_reward(self, action): 46 | yield 47 | if self.is_current_done(): 48 | yield -100 49 | else: 50 | if abs(self.cart.position[0]) > self.max_reward_cart_pos: 51 | yield -1 52 | else: 53 | yield np.cos(self.pole.angle) 54 | 55 | @overrides 56 | def is_current_done(self): 57 | return abs(self.cart.position[0]) > self.max_cart_pos 58 | 59 | @overrides 60 | def action_from_keys(self, keys): 61 | if keys[pygame.K_LEFT]: 62 | return np.asarray([-10]) 63 | elif keys[pygame.K_RIGHT]: 64 | return np.asarray([+10]) 65 | else: 66 | return np.asarray([0]) 67 | 68 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/double_pendulum_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rllab.envs.box2d.parser import find_body 3 | 4 | from rllab.core.serializable import Serializable 5 | from rllab.envs.box2d.box2d_env import Box2DEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc.overrides import overrides 8 | 9 | 10 | # http://mlg.eng.cam.ac.uk/pilco/ 11 | class DoublePendulumEnv(Box2DEnv, Serializable): 12 | 13 | @autoargs.inherit(Box2DEnv.__init__) 14 | def __init__(self, *args, **kwargs): 15 | # make sure mdp-level step is 100ms long 16 | kwargs["frame_skip"] = kwargs.get("frame_skip", 2) 17 | if kwargs.get("template_args", {}).get("noise", False): 18 | self.link_len = (np.random.rand()-0.5) + 1 19 | else: 20 | self.link_len = 1 21 | kwargs["template_args"] = kwargs.get("template_args", {}) 22 | kwargs["template_args"]["link_len"] = self.link_len 23 | super(DoublePendulumEnv, self).__init__( 24 | self.model_path("double_pendulum.xml.mako"), 25 | *args, **kwargs 26 | ) 27 | self.link1 = find_body(self.world, "link1") 28 | self.link2 = find_body(self.world, "link2") 29 | Serializable.__init__(self, *args, **kwargs) 30 | 31 | @overrides 32 | def reset(self): 33 | self._set_state(self.initial_state) 34 | self._invalidate_state_caches() 35 | stds = np.array([0.1, 0.1, 0.01, 0.01]) 36 | pos1, pos2, v1, v2 = np.random.randn(*stds.shape) * stds 37 | self.link1.angle = pos1 38 | self.link2.angle = pos2 39 | self.link1.angularVelocity = v1 40 | self.link2.angularVelocity = v2 41 | return self.get_current_obs() 42 | 43 | def get_tip_pos(self): 44 | cur_center_pos = self.link2.position 45 | cur_angle = self.link2.angle 46 | cur_pos = ( 47 | cur_center_pos[0] - self.link_len*np.sin(cur_angle), 48 | cur_center_pos[1] - self.link_len*np.cos(cur_angle) 49 | ) 50 | return cur_pos 51 | 52 | @overrides 53 | def compute_reward(self, action): 54 | yield 55 | tgt_pos = np.asarray([0, self.link_len * 2]) 56 | cur_pos = self.get_tip_pos() 57 | dist = np.linalg.norm(cur_pos - tgt_pos) 58 | yield -dist 59 | 60 | def is_current_done(self): 61 | return False 62 | 63 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/models/car_parking.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/models/car_parking.xml.rb: -------------------------------------------------------------------------------- 1 | car_height = 1.0 2 | car_width = 0.6 3 | car_mass = 1 4 | car_density = car_mass / car_height / car_width 5 | 6 | wheel_height = 0.3 7 | wheel_width = 0.1 8 | wheel_mass = 0.1 9 | wheel_density = wheel_mass / wheel_height / wheel_width 10 | wheel_max_deg = 30 11 | 12 | phantom_group = -1 13 | common = { group: phantom_group } 14 | 15 | box2d { 16 | world(timestep: 0.05, gravity: [0, 0]) { 17 | body(name: :goal, type: :static, position: [0, 0]) { 18 | fixture(common.merge(shape: :circle, radius: 1)) 19 | } 20 | 21 | car_pos = [3, 4] 22 | body(name: :car, type: :dynamic, position: car_pos) { 23 | rect( 24 | box: [car_width / 2, car_height / 2], 25 | density: car_density, 26 | group: phantom_group, 27 | ) 28 | } 29 | [:left_front_wheel, :right_front_wheel, :left_rear_wheel, :right_rear_wheel].each do |wheel| 30 | x_pos = car_width / 2 31 | x_pos *= wheel =~ /left/ ? -1 : 1 32 | y_pos = wheel =~ /front/ ? 0.2 : -0.3 33 | body(name: wheel, type: :dynamic, position: [car_pos[0] + x_pos, car_pos[1] + y_pos]) { 34 | rect( 35 | box: [wheel_width / 2, wheel_height / 2], 36 | density: wheel_density, 37 | group: phantom_group, 38 | ) 39 | } 40 | # limit = wheel =~ /front/ ? [-wheel_max_deg, wheel_max_deg] : [0, 0] 41 | limit = [0, 0] 42 | joint( 43 | type: :revolute, 44 | name: "#{wheel}_joint", 45 | bodyA: :car, 46 | bodyB: wheel, 47 | localAnchorA: [x_pos, y_pos], 48 | localAnchorB: [0, 0], 49 | limit: limit, 50 | ) 51 | end 52 | control( 53 | type: :force, 54 | bodies: [:left_front_wheel, :right_front_wheel], 55 | anchor: [0, 0], 56 | direction: [0, 1], 57 | ctrllimit: [-10.N, 10.N], 58 | ) 59 | state body: :car, type: :xvel 60 | state body: :car, type: :yvel 61 | state body: :car, type: :dist, to: :goal 62 | state body: :car, type: :angle, to: :goal, transform: :cos 63 | state body: :car, type: :angle, to: :goal, transform: :sin 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/models/cartpole.xml.mako: -------------------------------------------------------------------------------- 1 | <% 2 | from rllab.misc.mako_utils import compute_rect_vertices 3 | cart_width = 4.0 / (12 ** 0.5) 4 | cart_height = 3.0 / (12 ** 0.5) 5 | 6 | pole_width = 0.1 7 | pole_height = 1.0 8 | noise = opts.get("noise", False) 9 | if noise: 10 | import numpy as np 11 | pole_height += (np.random.rand()-0.5) * pole_height * 1 12 | 13 | cart_friction = 0.0005 14 | pole_friction = 0.000002 15 | %> 16 | 17 | 18 | 19 | 20 | 26 | 27 | 28 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/models/double_pendulum.xml.mako: -------------------------------------------------------------------------------- 1 | <% 2 | from rllab.misc.mako_utils import compute_rect_vertices 3 | link_len = opts['link_len'] 4 | link_width = 0.1 5 | %> 6 | 7 | 8 | 9 | 10 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/models/mountain_car.xml.mako: -------------------------------------------------------------------------------- 1 | <% 2 | noise = opts.get("noise", False) 3 | track_width = 4 4 | if noise: 5 | import numpy as np 6 | track_width += np.random.uniform(-1, 1) 7 | %> 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/mountain_car_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | from rllab.envs.box2d.parser import find_body 4 | 5 | from rllab.core.serializable import Serializable 6 | from rllab.envs.box2d.box2d_env import Box2DEnv 7 | from rllab.misc import autoargs 8 | from rllab.misc.overrides import overrides 9 | 10 | 11 | class MountainCarEnv(Box2DEnv, Serializable): 12 | 13 | @autoargs.inherit(Box2DEnv.__init__) 14 | @autoargs.arg("height_bonus_coeff", type=float, 15 | help="Height bonus added to each step's reward") 16 | @autoargs.arg("goal_cart_pos", type=float, 17 | help="Goal horizontal position") 18 | def __init__(self, 19 | height_bonus=1., 20 | goal_cart_pos=0.6, 21 | *args, **kwargs): 22 | super(MountainCarEnv, self).__init__( 23 | self.model_path("mountain_car.xml.mako"), 24 | *args, **kwargs 25 | ) 26 | self.max_cart_pos = 2 27 | self.goal_cart_pos = goal_cart_pos 28 | self.height_bonus = height_bonus 29 | self.cart = find_body(self.world, "cart") 30 | Serializable.quick_init(self, locals()) 31 | 32 | @overrides 33 | def compute_reward(self, action): 34 | yield 35 | yield (-1 + self.height_bonus * self.cart.position[1]) 36 | 37 | @overrides 38 | def is_current_done(self): 39 | return self.cart.position[0] >= self.goal_cart_pos \ 40 | or abs(self.cart.position[0]) >= self.max_cart_pos 41 | 42 | @overrides 43 | def reset(self): 44 | self._set_state(self.initial_state) 45 | self._invalidate_state_caches() 46 | bounds = np.array([ 47 | [-1], 48 | [1], 49 | ]) 50 | low, high = bounds 51 | xvel = np.random.uniform(low, high) 52 | self.cart.linearVelocity = (float(xvel), self.cart.linearVelocity[1]) 53 | return self.get_current_obs() 54 | 55 | @overrides 56 | def action_from_keys(self, keys): 57 | if keys[pygame.K_LEFT]: 58 | return np.asarray([-1]) 59 | elif keys[pygame.K_RIGHT]: 60 | return np.asarray([+1]) 61 | else: 62 | return np.asarray([0]) 63 | 64 | -------------------------------------------------------------------------------- /rllab/rllab/envs/box2d/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from .xml_box2d import world_from_xml, find_body, find_joint 2 | -------------------------------------------------------------------------------- /rllab/rllab/envs/env_spec.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.spaces.base import Space 3 | 4 | 5 | class EnvSpec(Serializable): 6 | 7 | def __init__( 8 | self, 9 | observation_space, 10 | action_space): 11 | """ 12 | :type observation_space: Space 13 | :type action_space: Space 14 | """ 15 | Serializable.quick_init(self, locals()) 16 | self._observation_space = observation_space 17 | self._action_space = action_space 18 | 19 | @property 20 | def observation_space(self): 21 | return self._observation_space 22 | 23 | @property 24 | def action_space(self): 25 | return self._action_space 26 | -------------------------------------------------------------------------------- /rllab/rllab/envs/identification_env.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.envs.proxy_env import ProxyEnv 3 | from rllab.misc.overrides import overrides 4 | 5 | 6 | class IdentificationEnv(ProxyEnv, Serializable): 7 | 8 | def __init__(self, mdp_cls, mdp_args): 9 | Serializable.quick_init(self, locals()) 10 | self.mdp_cls = mdp_cls 11 | self.mdp_args = dict(mdp_args) 12 | self.mdp_args["template_args"] = dict(noise=True) 13 | mdp = self.gen_mdp() 14 | super(IdentificationEnv, self).__init__(mdp) 15 | 16 | def gen_mdp(self): 17 | return self.mdp_cls(**self.mdp_args) 18 | 19 | @overrides 20 | def reset(self): 21 | if getattr(self, "_mdp", None): 22 | if hasattr(self._wrapped_env, "release"): 23 | self._wrapped_env.release() 24 | self._wrapped_env = self.gen_mdp() 25 | return super(IdentificationEnv, self).reset() 26 | 27 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/gather/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/gather/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/gather/ant_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.ant_env import AntEnv 3 | 4 | 5 | class AntGatherEnv(GatherEnv): 6 | 7 | MODEL_CLASS = AntEnv 8 | ORI_IND = 6 9 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/gather/point_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.point_env import PointEnv 3 | 4 | 5 | class PointGatherEnv(GatherEnv): 6 | 7 | MODEL_CLASS = PointEnv 8 | ORI_IND = 2 9 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/gather/swimmer_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 3 | 4 | 5 | class SwimmerGatherEnv(GatherEnv): 6 | 7 | MODEL_CLASS = SwimmerEnv 8 | ORI_IND = 2 9 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/half_cheetah_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.envs.base import Step 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv 6 | from rllab.misc import logger 7 | from rllab.misc.overrides import overrides 8 | 9 | 10 | def smooth_abs(x, param): 11 | return np.sqrt(np.square(x) + np.square(param)) - param 12 | 13 | 14 | class HalfCheetahEnv(MujocoEnv, Serializable): 15 | 16 | FILE = 'half_cheetah.xml' 17 | 18 | def __init__(self, *args, **kwargs): 19 | super(HalfCheetahEnv, self).__init__(*args, **kwargs) 20 | Serializable.__init__(self, *args, **kwargs) 21 | 22 | def get_current_obs(self): 23 | return np.concatenate([ 24 | self.model.data.qpos.flatten()[1:], 25 | self.model.data.qvel.flat, 26 | self.get_body_com("torso").flat, 27 | ]) 28 | 29 | def get_body_xmat(self, body_name): 30 | idx = self.model.body_names.index(body_name) 31 | return self.model.data.xmat[idx].reshape((3, 3)) 32 | 33 | def get_body_com(self, body_name): 34 | idx = self.model.body_names.index(body_name) 35 | return self.model.data.com_subtree[idx] 36 | 37 | def step(self, action): 38 | self.forward_dynamics(action) 39 | next_obs = self.get_current_obs() 40 | action = np.clip(action, *self.action_bounds) 41 | ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action)) 42 | run_cost = -1 * self.get_body_comvel("torso")[0] 43 | cost = ctrl_cost + run_cost 44 | reward = -cost 45 | done = False 46 | return Step(next_obs, reward, done) 47 | 48 | @overrides 49 | def log_diagnostics(self, paths): 50 | progs = [ 51 | path["observations"][-1][-3] - path["observations"][0][-3] 52 | for path in paths 53 | ] 54 | logger.record_tabular('AverageForwardProgress', np.mean(progs)) 55 | logger.record_tabular('MaxForwardProgress', np.max(progs)) 56 | logger.record_tabular('MinForwardProgress', np.min(progs)) 57 | logger.record_tabular('StdForwardProgress', np.std(progs)) 58 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/hill/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/ant_hill_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv 4 | from rllab.envs.mujoco.ant_env import AntEnv 5 | from rllab.misc.overrides import overrides 6 | import rllab.envs.mujoco.hill.terrain as terrain 7 | from rllab.spaces import Box 8 | 9 | class AntHillEnv(HillEnv): 10 | 11 | MODEL_CLASS = AntEnv 12 | 13 | @overrides 14 | def _mod_hfield(self, hfield): 15 | # clear a flat patch for the robot to start off from 16 | return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([0.0, 0.0]))) -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/half_cheetah_hill_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv 4 | from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv 5 | from rllab.misc.overrides import overrides 6 | import rllab.envs.mujoco.hill.terrain as terrain 7 | from rllab.spaces import Box 8 | 9 | class HalfCheetahHillEnv(HillEnv): 10 | 11 | MODEL_CLASS = HalfCheetahEnv 12 | 13 | @overrides 14 | def _mod_hfield(self, hfield): 15 | # clear a flat patch for the robot to start off from 16 | return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5]))) -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/hopper_hill_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv 4 | from rllab.envs.mujoco.hopper_env import HopperEnv 5 | from rllab.misc.overrides import overrides 6 | import rllab.envs.mujoco.hill.terrain as terrain 7 | from rllab.spaces import Box 8 | 9 | class HopperHillEnv(HillEnv): 10 | 11 | MODEL_CLASS = HopperEnv 12 | 13 | @overrides 14 | def _mod_hfield(self, hfield): 15 | # clear a flat patch for the robot to start off from 16 | return terrain.clear_patch(hfield, Box(np.array([-1.0, -1.0]), np.array([-0.5, -0.5]))) -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/swimmer3d_hill_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv 4 | from rllab.envs.mujoco.swimmer3d_env import Swimmer3DEnv 5 | from rllab.misc.overrides import overrides 6 | import rllab.envs.mujoco.hill.terrain as terrain 7 | from rllab.spaces import Box 8 | 9 | class Swimmer3DHillEnv(HillEnv): 10 | 11 | MODEL_CLASS = Swimmer3DEnv 12 | 13 | @overrides 14 | def _mod_hfield(self, hfield): 15 | # clear a flat patch for the robot to start off from 16 | return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5]))) -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/hill/walker2d_hill_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv 4 | from rllab.envs.mujoco.walker2d_env import Walker2DEnv 5 | from rllab.misc.overrides import overrides 6 | import rllab.envs.mujoco.hill.terrain as terrain 7 | from rllab.spaces import Box 8 | 9 | class Walker2DHillEnv(HillEnv): 10 | 11 | MODEL_CLASS = Walker2DEnv 12 | 13 | @overrides 14 | def _mod_hfield(self, hfield): 15 | # clear a flat patch for the robot to start off from 16 | return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([-0.5, -0.5]))) -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/humanoid_env.py: -------------------------------------------------------------------------------- 1 | from .simple_humanoid_env import SimpleHumanoidEnv 2 | 3 | 4 | # Taken from Wojciech's code 5 | class HumanoidEnv(SimpleHumanoidEnv): 6 | 7 | FILE = 'humanoid.xml' 8 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/inverted_double_pendulum_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.envs.base import Step 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc.overrides import overrides 8 | 9 | 10 | class InvertedDoublePendulumEnv(MujocoEnv, Serializable): 11 | FILE = 'inverted_double_pendulum.xml.mako' 12 | 13 | @autoargs.arg("random_start", type=bool, 14 | help="Randomized starting position by adjusting the angles" 15 | "When this is false, the double pendulum started out" 16 | "in balanced position") 17 | def __init__( 18 | self, 19 | *args, **kwargs): 20 | self.random_start = kwargs.get("random_start", True) 21 | super(InvertedDoublePendulumEnv, self).__init__(*args, **kwargs) 22 | Serializable.quick_init(self, locals()) 23 | 24 | @overrides 25 | def get_current_obs(self): 26 | return np.concatenate([ 27 | self.model.data.qpos[:1], # cart x pos 28 | np.sin(self.model.data.qpos[1:]), # link angles 29 | np.cos(self.model.data.qpos[1:]), 30 | np.clip(self.model.data.qvel, -10, 10), 31 | np.clip(self.model.data.qfrc_constraint, -10, 10) 32 | ]).reshape(-1) 33 | 34 | @overrides 35 | def step(self, action): 36 | self.forward_dynamics(action) 37 | next_obs = self.get_current_obs() 38 | x, _, y = self.model.data.site_xpos[0] 39 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 40 | v1, v2 = self.model.data.qvel[1:3] 41 | vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2 42 | alive_bonus = 10 43 | r = float(alive_bonus - dist_penalty - vel_penalty) 44 | done = y <= 1 45 | return Step(next_obs, r, done) 46 | 47 | @overrides 48 | def reset_mujoco(self, init_state=None): 49 | assert init_state is None 50 | qpos = np.copy(self.init_qpos) 51 | if self.random_start: 52 | qpos[1] = (np.random.rand() - 0.5) * 40 / 180. * np.pi 53 | self.model.data.qpos = qpos 54 | self.model.data.qvel = self.init_qvel 55 | self.model.data.qacc = self.init_qacc 56 | self.model.data.ctrl = self.init_ctrl 57 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/maze/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/maze/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/maze/ant_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.ant_env import AntEnv 3 | 4 | 5 | class AntMazeEnv(MazeEnv): 6 | 7 | MODEL_CLASS = AntEnv 8 | ORI_IND = 6 9 | 10 | MAZE_HEIGHT = 2 11 | MAZE_SIZE_SCALING = 3.0 12 | 13 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/maze/point_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.point_env import PointEnv 3 | 4 | 5 | class PointMazeEnv(MazeEnv): 6 | 7 | MODEL_CLASS = PointEnv 8 | ORI_IND = 2 9 | 10 | MAZE_HEIGHT = 2 11 | MAZE_SIZE_SCALING = 3.0 12 | 13 | MANUAL_COLLISION = True 14 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/maze/swimmer_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 3 | 4 | 5 | class SwimmerMazeEnv(MazeEnv): 6 | 7 | MODEL_CLASS = SwimmerEnv 8 | ORI_IND = 2 9 | 10 | MAZE_HEIGHT = 0.5 11 | MAZE_SIZE_SCALING = 4 12 | MAZE_MAKE_CONTACTS = True 13 | 14 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/point_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.base import Step 2 | from .mujoco_env import MujocoEnv 3 | from rllab.core.serializable import Serializable 4 | from rllab.misc.overrides import overrides 5 | import numpy as np 6 | import math 7 | from rllab.mujoco_py import glfw 8 | 9 | 10 | class PointEnv(MujocoEnv, Serializable): 11 | 12 | """ 13 | Use Left, Right, Up, Down, A (steer left), D (steer right) 14 | """ 15 | 16 | FILE = 'point.xml' 17 | 18 | def __init__(self, *args, **kwargs): 19 | super(PointEnv, self).__init__(*args, **kwargs) 20 | Serializable.quick_init(self, locals()) 21 | 22 | def step(self, action): 23 | qpos = np.copy(self.model.data.qpos) 24 | qpos[2, 0] += action[1] 25 | ori = qpos[2, 0] 26 | # compute increment in each direction 27 | dx = math.cos(ori) * action[0] 28 | dy = math.sin(ori) * action[0] 29 | # ensure that the robot is within reasonable range 30 | qpos[0, 0] = np.clip(qpos[0, 0] + dx, -7, 7) 31 | qpos[1, 0] = np.clip(qpos[1, 0] + dy, -7, 7) 32 | self.model.data.qpos = qpos 33 | self.model.forward() 34 | next_obs = self.get_current_obs() 35 | return Step(next_obs, 0, False) 36 | 37 | def get_xy(self): 38 | qpos = self.model.data.qpos 39 | return qpos[0, 0], qpos[1, 0] 40 | 41 | def set_xy(self, xy): 42 | qpos = np.copy(self.model.data.qpos) 43 | qpos[0, 0] = xy[0] 44 | qpos[1, 0] = xy[1] 45 | self.model.data.qpos = qpos 46 | self.model.forward() 47 | 48 | @overrides 49 | def action_from_key(self, key): 50 | lb, ub = self.action_bounds 51 | if key == glfw.KEY_LEFT: 52 | return np.array([0, ub[0]*0.3]) 53 | elif key == glfw.KEY_RIGHT: 54 | return np.array([0, lb[0]*0.3]) 55 | elif key == glfw.KEY_UP: 56 | return np.array([ub[1], 0]) 57 | elif key == glfw.KEY_DOWN: 58 | return np.array([lb[1], 0]) 59 | else: 60 | return np.array([0, 0]) 61 | 62 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/swimmer3d_env.py: -------------------------------------------------------------------------------- 1 | from .swimmer_env import SwimmerEnv 2 | 3 | class Swimmer3DEnv(SwimmerEnv): 4 | FILE = 'swimmer3d.xml' -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/swimmer_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.base import Step 2 | from rllab.misc.overrides import overrides 3 | from .mujoco_env import MujocoEnv 4 | import numpy as np 5 | from rllab.core.serializable import Serializable 6 | from rllab.misc import logger 7 | from rllab.misc import autoargs 8 | 9 | 10 | class SwimmerEnv(MujocoEnv, Serializable): 11 | 12 | FILE = 'swimmer.xml' 13 | ORI_IND = 2 14 | 15 | @autoargs.arg('ctrl_cost_coeff', type=float, 16 | help='cost coefficient for controls') 17 | def __init__( 18 | self, 19 | ctrl_cost_coeff=1e-2, 20 | *args, **kwargs): 21 | self.ctrl_cost_coeff = ctrl_cost_coeff 22 | super(SwimmerEnv, self).__init__(*args, **kwargs) 23 | Serializable.quick_init(self, locals()) 24 | 25 | def get_current_obs(self): 26 | return np.concatenate([ 27 | self.model.data.qpos.flat, 28 | self.model.data.qvel.flat, 29 | self.get_body_com("torso").flat, 30 | ]).reshape(-1) 31 | 32 | def get_ori(self): 33 | return self.model.data.qpos[self.__class__.ORI_IND] 34 | 35 | def step(self, action): 36 | self.forward_dynamics(action) 37 | next_obs = self.get_current_obs() 38 | lb, ub = self.action_bounds 39 | scaling = (ub - lb) * 0.5 40 | ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum( 41 | np.square(action / scaling)) 42 | forward_reward = self.get_body_comvel("torso")[0] 43 | reward = forward_reward - ctrl_cost 44 | done = False 45 | return Step(next_obs, reward, done) 46 | 47 | @overrides 48 | def log_diagnostics(self, paths): 49 | if len(paths) > 0: 50 | progs = [ 51 | path["observations"][-1][-3] - path["observations"][0][-3] 52 | for path in paths 53 | ] 54 | logger.record_tabular('AverageForwardProgress', np.mean(progs)) 55 | logger.record_tabular('MaxForwardProgress', np.max(progs)) 56 | logger.record_tabular('MinForwardProgress', np.min(progs)) 57 | logger.record_tabular('StdForwardProgress', np.std(progs)) 58 | else: 59 | logger.record_tabular('AverageForwardProgress', np.nan) 60 | logger.record_tabular('MaxForwardProgress', np.nan) 61 | logger.record_tabular('MinForwardProgress', np.nan) 62 | logger.record_tabular('StdForwardProgress', np.nan) 63 | -------------------------------------------------------------------------------- /rllab/rllab/envs/mujoco/walker2d_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.envs.base import Step 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc import logger 8 | from rllab.misc.overrides import overrides 9 | 10 | 11 | def smooth_abs(x, param): 12 | return np.sqrt(np.square(x) + np.square(param)) - param 13 | 14 | 15 | class Walker2DEnv(MujocoEnv, Serializable): 16 | 17 | FILE = 'walker2d.xml' 18 | 19 | @autoargs.arg('ctrl_cost_coeff', type=float, 20 | help='cost coefficient for controls') 21 | def __init__( 22 | self, 23 | ctrl_cost_coeff=1e-2, 24 | *args, **kwargs): 25 | self.ctrl_cost_coeff = ctrl_cost_coeff 26 | super(Walker2DEnv, self).__init__(*args, **kwargs) 27 | Serializable.quick_init(self, locals()) 28 | 29 | def get_current_obs(self): 30 | return np.concatenate([ 31 | self.model.data.qpos.flat, 32 | self.model.data.qvel.flat, 33 | self.get_body_com("torso").flat, 34 | ]) 35 | 36 | def step(self, action): 37 | self.forward_dynamics(action) 38 | next_obs = self.get_current_obs() 39 | action = np.clip(action, *self.action_bounds) 40 | lb, ub = self.action_bounds 41 | scaling = (ub - lb) * 0.5 42 | ctrl_cost = 0.5 * self.ctrl_cost_coeff * \ 43 | np.sum(np.square(action / scaling)) 44 | forward_reward = self.get_body_comvel("torso")[0] 45 | reward = forward_reward - ctrl_cost 46 | qpos = self.model.data.qpos 47 | done = not (qpos[0] > 0.8 and qpos[0] < 2.0 48 | and qpos[2] > -1.0 and qpos[2] < 1.0) 49 | return Step(next_obs, reward, done) 50 | 51 | @overrides 52 | def log_diagnostics(self, paths): 53 | progs = [ 54 | path["observations"][-1][-3] - path["observations"][0][-3] 55 | for path in paths 56 | ] 57 | logger.record_tabular('AverageForwardProgress', np.mean(progs)) 58 | logger.record_tabular('MaxForwardProgress', np.max(progs)) 59 | logger.record_tabular('MinForwardProgress', np.min(progs)) 60 | logger.record_tabular('StdForwardProgress', np.std(progs)) 61 | 62 | -------------------------------------------------------------------------------- /rllab/rllab/envs/proxy_env.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from .base import Env 3 | 4 | 5 | class ProxyEnv(Env, Serializable): 6 | def __init__(self, wrapped_env): 7 | Serializable.quick_init(self, locals()) 8 | self._wrapped_env = wrapped_env 9 | 10 | @property 11 | def wrapped_env(self): 12 | return self._wrapped_env 13 | 14 | def reset(self, *args, **kwargs): 15 | return self._wrapped_env.reset(*args, **kwargs) 16 | 17 | @property 18 | def action_space(self): 19 | return self._wrapped_env.action_space 20 | 21 | @property 22 | def observation_space(self): 23 | return self._wrapped_env.observation_space 24 | 25 | def step(self, action): 26 | return self._wrapped_env.step(action) 27 | 28 | def render(self, *args, **kwargs): 29 | return self._wrapped_env.render(*args, **kwargs) 30 | 31 | def log_diagnostics(self, paths, *args, **kwargs): 32 | self._wrapped_env.log_diagnostics(paths, *args, **kwargs) 33 | 34 | @property 35 | def horizon(self): 36 | return self._wrapped_env.horizon 37 | 38 | def terminate(self): 39 | self._wrapped_env.terminate() 40 | 41 | def get_param_values(self): 42 | return self._wrapped_env.get_param_values() 43 | 44 | def set_param_values(self, params): 45 | self._wrapped_env.set_param_values(params) 46 | -------------------------------------------------------------------------------- /rllab/rllab/envs/sliding_mem_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.envs.base import Step 5 | from rllab.envs.proxy_env import ProxyEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc.overrides import overrides 8 | from rllab.spaces import Box 9 | 10 | 11 | class SlidingMemEnv(ProxyEnv, Serializable): 12 | 13 | def __init__( 14 | self, 15 | env, 16 | n_steps=4, 17 | axis=0, 18 | ): 19 | super().__init__(env) 20 | Serializable.quick_init(self, locals()) 21 | self.n_steps = n_steps 22 | self.axis = axis 23 | self.buffer = None 24 | 25 | def reset_buffer(self, new_): 26 | assert self.axis == 0 27 | self.buffer = np.zeros(self.observation_space.shape, dtype=np.float32) 28 | self.buffer[0:] = new_ 29 | 30 | def add_to_buffer(self, new_): 31 | assert self.axis == 0 32 | self.buffer[1:] = self.buffer[:-1] 33 | self.buffer[:1] = new_ 34 | 35 | @property 36 | def observation_space(self): 37 | origin = self._wrapped_env.observation_space 38 | return Box( 39 | *[ 40 | np.repeat(b, self.n_steps, axis=self.axis) 41 | for b in origin.bounds 42 | ] 43 | ) 44 | 45 | @overrides 46 | def reset(self): 47 | obs = self._wrapped_env.reset() 48 | self.reset_buffer(obs) 49 | return self.buffer 50 | 51 | @overrides 52 | def step(self, action): 53 | next_obs, reward, done, info = self._wrapped_env.step(action) 54 | self.add_to_buffer(next_obs) 55 | return Step(self.buffer, reward, done, **info) 56 | 57 | -------------------------------------------------------------------------------- /rllab/rllab/exploration_strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/exploration_strategies/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/exploration_strategies/base.py: -------------------------------------------------------------------------------- 1 | class ExplorationStrategy(object): 2 | def get_action(self, t, observation, policy, **kwargs): 3 | raise NotImplementedError 4 | 5 | def reset(self): 6 | pass 7 | -------------------------------------------------------------------------------- /rllab/rllab/exploration_strategies/gaussian_strategy.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.spaces.box import Box 3 | from rllab.exploration_strategies.base import ExplorationStrategy 4 | import numpy as np 5 | 6 | 7 | class GaussianStrategy(ExplorationStrategy, Serializable): 8 | """ 9 | This strategy adds Gaussian noise to the action taken by the deterministic policy. 10 | """ 11 | 12 | def __init__(self, env_spec, max_sigma=1.0, min_sigma=0.1, decay_period=1000000): 13 | assert isinstance(env_spec.action_space, Box) 14 | assert len(env_spec.action_space.shape) == 1 15 | Serializable.quick_init(self, locals()) 16 | self._max_sigma = max_sigma 17 | self._min_sigma = min_sigma 18 | self._decay_period = decay_period 19 | self._action_space = env_spec.action_space 20 | 21 | def get_action(self, t, observation, policy, **kwargs): 22 | action, agent_info = policy.get_action(observation) 23 | sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min(1.0, t * 1.0 / self._decay_period) 24 | return np.clip(action + np.random.normal(size=len(action)) * sigma, self._action_space.low, 25 | self._action_space.high) 26 | -------------------------------------------------------------------------------- /rllab/rllab/exploration_strategies/ou_strategy.py: -------------------------------------------------------------------------------- 1 | from rllab.misc.overrides import overrides 2 | from rllab.misc.ext import AttrDict 3 | from rllab.core.serializable import Serializable 4 | from rllab.spaces.box import Box 5 | from rllab.exploration_strategies.base import ExplorationStrategy 6 | import numpy as np 7 | import numpy.random as nr 8 | 9 | 10 | class OUStrategy(ExplorationStrategy, Serializable): 11 | """ 12 | This strategy implements the Ornstein-Uhlenbeck process, which adds 13 | time-correlated noise to the actions taken by the deterministic policy. 14 | The OU process satisfies the following stochastic differential equation: 15 | dxt = theta*(mu - xt)*dt + sigma*dWt 16 | where Wt denotes the Wiener process 17 | """ 18 | 19 | def __init__(self, env_spec, mu=0, theta=0.15, sigma=0.3, **kwargs): 20 | assert isinstance(env_spec.action_space, Box) 21 | assert len(env_spec.action_space.shape) == 1 22 | Serializable.quick_init(self, locals()) 23 | self.mu = mu 24 | self.theta = theta 25 | self.sigma = sigma 26 | self.action_space = env_spec.action_space 27 | self.state = np.ones(self.action_space.flat_dim) * self.mu 28 | self.reset() 29 | 30 | def __getstate__(self): 31 | d = Serializable.__getstate__(self) 32 | d["state"] = self.state 33 | return d 34 | 35 | def __setstate__(self, d): 36 | Serializable.__setstate__(self, d) 37 | self.state = d["state"] 38 | 39 | @overrides 40 | def reset(self): 41 | self.state = np.ones(self.action_space.flat_dim) * self.mu 42 | 43 | def evolve_state(self): 44 | x = self.state 45 | dx = self.theta * (self.mu - x) + self.sigma * nr.randn(len(x)) 46 | self.state = x + dx 47 | return self.state 48 | 49 | @overrides 50 | def get_action(self, t, observation, policy, **kwargs): 51 | action, _ = policy.get_action(observation) 52 | ou_state = self.evolve_state() 53 | return np.clip(action + ou_state, self.action_space.low, self.action_space.high) 54 | 55 | 56 | if __name__ == "__main__": 57 | ou = OUStrategy(env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1,))), mu=0, theta=0.15, sigma=0.3) 58 | states = [] 59 | for i in range(1000): 60 | states.append(ou.evolve_state()[0]) 61 | import matplotlib.pyplot as plt 62 | 63 | plt.plot(states) 64 | plt.show() 65 | -------------------------------------------------------------------------------- /rllab/rllab/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/misc/mako_utils.py: -------------------------------------------------------------------------------- 1 | 2 | def compute_rect_vertices(fromp, to, radius): 3 | x1, y1 = fromp 4 | x2, y2 = to 5 | if abs(y1 - y2) < 1e-6: 6 | dx = 0 7 | dy = radius 8 | else: 9 | dx = radius * 1.0 / (((x1 - x2) / (y1 - y2)) ** 2 + 1) ** 0.5 10 | # equivalently dx = radius * (y2-y1).to_f / ((x2-x1)**2 + (y2-y1)**2)**0.5 11 | dy = (radius**2 - dx**2) ** 0.5 12 | dy *= -1 if (x1 - x2) * (y1 - y2) > 0 else 1 13 | 14 | return ";".join([",".join(map(str, r)) for r in [ 15 | [x1 + dx, y1 + dy], 16 | [x2 + dx, y2 + dy], 17 | [x2 - dx, y2 - dy], 18 | [x1 - dx, y1 - dy], 19 | ]]) 20 | 21 | -------------------------------------------------------------------------------- /rllab/rllab/misc/meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/meta.py -------------------------------------------------------------------------------- /rllab/rllab/misc/resolve.py: -------------------------------------------------------------------------------- 1 | from pydoc import locate 2 | import types 3 | from rllab.misc.ext import iscanr 4 | 5 | 6 | def classesinmodule(module): 7 | md = module.__dict__ 8 | return [ 9 | md[c] for c in md if ( 10 | isinstance(md[c], type) and md[c].__module__ == module.__name__ 11 | ) 12 | ] 13 | 14 | 15 | def locate_with_hint(class_path, prefix_hints=[]): 16 | module_or_class = locate(class_path) 17 | if module_or_class is None: 18 | # for hint in iscanr(lambda x, y: x + "." + y, prefix_hints): 19 | # module_or_class = locate(hint + "." + class_path) 20 | # if module_or_class: 21 | # break 22 | hint = ".".join(prefix_hints) 23 | module_or_class = locate(hint + "." + class_path) 24 | return module_or_class 25 | 26 | 27 | def load_class(class_path, superclass=None, prefix_hints=[]): 28 | module_or_class = locate_with_hint(class_path, prefix_hints) 29 | if module_or_class is None: 30 | raise ValueError("Cannot find module or class under path %s" % class_path) 31 | if type(module_or_class) == types.ModuleType: 32 | if superclass: 33 | classes = [x for x in classesinmodule(module_or_class) if issubclass(x, superclass)] 34 | if len(classes) == 0: 35 | if superclass: 36 | raise ValueError('Could not find any subclasses of %s defined in module %s' % (str(superclass), class_path)) 37 | else: 38 | raise ValueError('Could not find any classes defined in module %s' % (class_path)) 39 | elif len(classes) > 1: 40 | if superclass: 41 | raise ValueError('Multiple subclasses of %s are defined in the module %s' % (str(superclass), class_path)) 42 | else: 43 | raise ValueError('Multiple classes are defined in the module %s' % (class_path)) 44 | else: 45 | return classes[0] 46 | elif isinstance(module_or_class, type): 47 | if superclass is None or issubclass(module_or_class, superclass): 48 | return module_or_class 49 | else: 50 | raise ValueError('The class %s is not a subclass of %s' % (str(module_or_class), str(superclass))) 51 | else: 52 | raise ValueError('Unsupported object: %s' % str(module_or_class)) 53 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/.rvmrc: -------------------------------------------------------------------------------- 1 | rvm use 2.1.0@mjpy --create 2 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'pry' 4 | gem 'activesupport' 5 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (4.1.8) 5 | i18n (~> 0.6, >= 0.6.9) 6 | json (~> 1.7, >= 1.7.7) 7 | minitest (~> 5.1) 8 | thread_safe (~> 0.1) 9 | tzinfo (~> 1.1) 10 | coderay (1.1.0) 11 | i18n (0.7.0) 12 | json (1.8.1) 13 | method_source (0.8.2) 14 | minitest (5.5.1) 15 | pry (0.10.1) 16 | coderay (~> 1.1.0) 17 | method_source (~> 0.8.1) 18 | slop (~> 3.4) 19 | slop (3.6.0) 20 | thread_safe (0.3.4) 21 | tzinfo (1.2.2) 22 | thread_safe (~> 0.1) 23 | 24 | PLATFORMS 25 | ruby 26 | 27 | DEPENDENCIES 28 | activesupport 29 | pry 30 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .mjviewer import MjViewer 2 | from .mjcore import MjModel 3 | from .mjcore import register_license 4 | import os 5 | from .mjconstants import * 6 | 7 | register_license(os.path.join(os.path.dirname(__file__), 8 | '../../vendor/mujoco/mjkey.txt')) 9 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/gen_binding.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | parent_path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P ) 3 | mujoco_path=$parent_path/../../vendor/mujoco 4 | rm /tmp/code_gen_mujoco.h 5 | cat $mujoco_path/mjdata.h >> /tmp/code_gen_mujoco.h && \ 6 | cat $mujoco_path/mjmodel.h >> /tmp/code_gen_mujoco.h && \ 7 | cat $mujoco_path/mjrender.h >> /tmp/code_gen_mujoco.h && \ 8 | cat $mujoco_path/mjvisualize.h >> /tmp/code_gen_mujoco.h && \ 9 | ruby $parent_path/codegen.rb /tmp/code_gen_mujoco.h $mujoco_path/mjxmacro.h > $parent_path/mjtypes.py 10 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/mjconstants.py: -------------------------------------------------------------------------------- 1 | MOUSE_ROTATE_V = 1 2 | MOUSE_ROTATE_H = 2 3 | MOUSE_MOVE_V = 3 4 | MOUSE_MOVE_H = 4 5 | MOUSE_ZOOM = 5 6 | 7 | mjOBJ_BODY = 1 8 | -------------------------------------------------------------------------------- /rllab/rllab/mujoco_py/mjextra.py: -------------------------------------------------------------------------------- 1 | def append_objects(cur, extra): 2 | for i in range(cur.ngeom, cur.ngeom + extra.ngeom): 3 | cur.geoms[i] = extra.geoms[i - cur.ngeom] 4 | cur.ngeom = cur.ngeom + extra.ngeom 5 | if cur.ngeom > cur.maxgeom: 6 | raise ValueError("buffer limit exceeded!") 7 | -------------------------------------------------------------------------------- /rllab/rllab/optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/optimizers/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/optimizers/minibatch_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class BatchDataset(object): 5 | 6 | def __init__(self, inputs, batch_size, extra_inputs=None): 7 | self._inputs = [ 8 | i for i in inputs 9 | ] 10 | if extra_inputs is None: 11 | extra_inputs = [] 12 | self._extra_inputs = extra_inputs 13 | self._batch_size = batch_size 14 | if batch_size is not None: 15 | self._ids = np.arange(self._inputs[0].shape[0]) 16 | self.update() 17 | 18 | @property 19 | def number_batches(self): 20 | if self._batch_size is None: 21 | return 1 22 | return int(np.ceil(self._inputs[0].shape[0] * 1.0 / self._batch_size)) 23 | 24 | def iterate(self, update=True): 25 | if self._batch_size is None: 26 | yield list(self._inputs) + list(self._extra_inputs) 27 | else: 28 | for itr in range(self.number_batches): 29 | batch_start = itr * self._batch_size 30 | batch_end = (itr + 1) * self._batch_size 31 | batch_ids = self._ids[batch_start:batch_end] 32 | batch = [d[batch_ids] for d in self._inputs] 33 | yield list(batch) + list(self._extra_inputs) 34 | if update: 35 | self.update() 36 | 37 | def update(self): 38 | np.random.shuffle(self._ids) 39 | -------------------------------------------------------------------------------- /rllab/rllab/plotter/__init__.py: -------------------------------------------------------------------------------- 1 | from .plotter import * 2 | -------------------------------------------------------------------------------- /rllab/rllab/plotter/plotter.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | from queue import Empty 3 | from multiprocessing import Process, Queue 4 | from rllab.sampler.utils import rollout 5 | import numpy as np 6 | 7 | __all__ = [ 8 | 'init_worker', 9 | 'init_plot', 10 | 'update_plot' 11 | ] 12 | 13 | process = None 14 | queue = None 15 | 16 | 17 | def _worker_start(): 18 | env = None 19 | policy = None 20 | max_length = None 21 | try: 22 | while True: 23 | msgs = {} 24 | # Only fetch the last message of each type 25 | while True: 26 | try: 27 | msg = queue.get_nowait() 28 | msgs[msg[0]] = msg[1:] 29 | except Empty: 30 | break 31 | if 'stop' in msgs: 32 | break 33 | elif 'update' in msgs: 34 | env, policy = msgs['update'] 35 | # env.start_viewer() 36 | elif 'demo' in msgs: 37 | param_values, max_length = msgs['demo'] 38 | policy.set_param_values(param_values) 39 | rollout(env, policy, max_path_length=max_length, animated=True, speedup=5) 40 | else: 41 | if max_length: 42 | rollout(env, policy, max_path_length=max_length, animated=True, speedup=5) 43 | except KeyboardInterrupt: 44 | pass 45 | 46 | 47 | def _shutdown_worker(): 48 | if process: 49 | queue.put(['stop']) 50 | queue.close() 51 | process.join() 52 | 53 | 54 | def init_worker(): 55 | global process, queue 56 | queue = Queue() 57 | process = Process(target=_worker_start) 58 | process.start() 59 | atexit.register(_shutdown_worker) 60 | 61 | 62 | def init_plot(env, policy): 63 | queue.put(['update', env, policy]) 64 | 65 | 66 | def update_plot(policy, max_length=np.inf): 67 | queue.put(['demo', policy.get_param_values(), max_length]) 68 | -------------------------------------------------------------------------------- /rllab/rllab/policies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/policies/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/policies/base.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | 3 | 4 | class Policy(Parameterized): 5 | def __init__(self, env_spec): 6 | Parameterized.__init__(self) 7 | self._env_spec = env_spec 8 | 9 | # Should be implemented by all policies 10 | 11 | def get_action(self, observation): 12 | raise NotImplementedError 13 | 14 | def reset(self): 15 | pass 16 | 17 | @property 18 | def observation_space(self): 19 | return self._env_spec.observation_space 20 | 21 | @property 22 | def action_space(self): 23 | return self._env_spec.action_space 24 | 25 | @property 26 | def recurrent(self): 27 | """ 28 | Indicates whether the policy is recurrent. 29 | :return: 30 | """ 31 | return False 32 | 33 | def log_diagnostics(self, paths): 34 | """ 35 | Log extra information per iteration based on the collected paths 36 | """ 37 | pass 38 | 39 | @property 40 | def state_info_keys(self): 41 | """ 42 | Return keys for the information related to the policy's state when taking an action. 43 | :return: 44 | """ 45 | return list() 46 | 47 | def terminate(self): 48 | """ 49 | Clean up operation 50 | """ 51 | pass 52 | 53 | 54 | class StochasticPolicy(Policy): 55 | 56 | @property 57 | def distribution(self): 58 | """ 59 | :rtype Distribution 60 | """ 61 | raise NotImplementedError 62 | 63 | def dist_info_sym(self, obs_var, state_info_vars): 64 | """ 65 | Return the symbolic distribution information about the actions. 66 | :param obs_var: symbolic variable for observations 67 | :param state_info_vars: a dictionary whose values should contain information about the state of the policy at 68 | the time it received the observation 69 | :return: 70 | """ 71 | raise NotImplementedError 72 | 73 | def dist_info(self, obs, state_infos): 74 | """ 75 | Return the distribution information about the actions. 76 | :param obs_var: observation values 77 | :param state_info_vars: a dictionary whose values should contain information about the state of the policy at 78 | the time it received the observation 79 | :return: 80 | """ 81 | raise NotImplementedError 82 | -------------------------------------------------------------------------------- /rllab/rllab/policies/uniform_control_policy.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | from rllab.core.serializable import Serializable 3 | from rllab.distributions.delta import Delta 4 | from rllab.policies.base import Policy 5 | from rllab.misc.overrides import overrides 6 | 7 | 8 | class UniformControlPolicy(Policy): 9 | def __init__( 10 | self, 11 | env_spec, 12 | ): 13 | Serializable.quick_init(self, locals()) 14 | super(UniformControlPolicy, self).__init__(env_spec=env_spec) 15 | 16 | @overrides 17 | def get_action(self, observation): 18 | return self.action_space.sample(), dict() 19 | 20 | def get_params_internal(self, **tags): 21 | return [] 22 | 23 | def get_actions(self, observations): 24 | return self.action_space.sample_n(len(observations)), dict() 25 | 26 | @property 27 | def vectorized(self): 28 | return True 29 | 30 | def reset(self, dones=None): 31 | pass 32 | 33 | @property 34 | def distribution(self): 35 | # Just a placeholder 36 | return Delta() 37 | -------------------------------------------------------------------------------- /rllab/rllab/q_functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/q_functions/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/q_functions/base.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | 3 | 4 | class QFunction(Parameterized): 5 | pass 6 | -------------------------------------------------------------------------------- /rllab/rllab/regressors/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dementrock' 2 | -------------------------------------------------------------------------------- /rllab/rllab/regressors/product_regressor.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import numpy as np 5 | from rllab.core.serializable import Serializable 6 | 7 | 8 | class ProductRegressor(Serializable): 9 | """ 10 | A class for performing MLE regression by fitting a product distribution to the outputs. A separate regressor will 11 | be trained for each individual input distribution. 12 | """ 13 | 14 | def __init__(self, regressors): 15 | """ 16 | :param regressors: List of individual regressors 17 | """ 18 | Serializable.quick_init(self, locals()) 19 | self.regressors = regressors 20 | self.output_dims = [x.output_dim for x in regressors] 21 | 22 | def _split_ys(self, ys): 23 | ys = np.asarray(ys) 24 | split_ids = np.cumsum(self.output_dims)[:-1] 25 | return np.split(ys, split_ids, axis=1) 26 | 27 | def fit(self, xs, ys): 28 | for regressor, split_ys in zip(self.regressors, self._split_ys(ys)): 29 | regressor.fit(xs, split_ys) 30 | 31 | def predict(self, xs): 32 | return np.concatenate([ 33 | regressor.predict(xs) for regressor in self.regressors 34 | ], axis=1) 35 | 36 | def sample_predict(self, xs): 37 | return np.concatenate([ 38 | regressor.sample_predict(xs) for regressor in self.regressors 39 | ], axis=1) 40 | 41 | def predict_log_likelihood(self, xs, ys): 42 | return np.sum([ 43 | regressor.predict_log_likelihood(xs, split_ys) 44 | for regressor, split_ys in zip(self.regressors, self._split_ys(ys)) 45 | ], axis=0) 46 | 47 | def get_param_values(self, **tags): 48 | return np.concatenate( 49 | [regressor.get_param_values(**tags) for regressor in self.regressors] 50 | ) 51 | 52 | def set_param_values(self, flattened_params, **tags): 53 | param_dims = [ 54 | np.prod(regressor.get_param_shapes(**tags)) 55 | for regressor in self.regressors 56 | ] 57 | split_ids = np.cumsum(param_dims)[:-1] 58 | for regressor, split_param_values in zip(self.regressors, np.split(flattened_params, split_ids)): 59 | regressor.set_param_values(split_param_values) 60 | -------------------------------------------------------------------------------- /rllab/rllab/sampler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/sampler/__init__.py -------------------------------------------------------------------------------- /rllab/rllab/sampler/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rllab.misc import tensor_utils 3 | import time 4 | 5 | 6 | def rollout(env, agent, max_path_length=np.inf, animated=False, speedup=1, save_video=True, video_filename='sim_out.mp4', reset_args=None, policy_contexts=None): 7 | observations = [] 8 | actions = [] 9 | rewards = [] 10 | agent_infos = [] 11 | env_infos = [] 12 | images = [] 13 | o = env.reset(reset_args=reset_args, policy_contexts=policy_contexts) 14 | agent.reset() 15 | path_length = 0 16 | if animated: 17 | env.render() 18 | while path_length < max_path_length: 19 | a, agent_info = agent.get_action(o) 20 | next_o, r, d, env_info = env.step(a) 21 | observations.append(env.observation_space.flatten(o)) 22 | rewards.append(r) 23 | actions.append(env.action_space.flatten(a)) 24 | agent_infos.append(agent_info) 25 | env_infos.append(env_info) 26 | path_length += 1 27 | if d: # and not animated: # TODO testing 28 | break 29 | o = next_o 30 | if animated: 31 | env.render() 32 | timestep = 0.05 33 | time.sleep(timestep / speedup) 34 | if save_video: 35 | from PIL import Image 36 | image = env.wrapped_env.wrapped_env.get_viewer().get_image() 37 | pil_image = Image.frombytes('RGB', (image[1], image[2]), image[0]) 38 | images.append(np.flipud(np.array(pil_image))) 39 | 40 | if animated: 41 | if save_video and len(images) >= max_path_length: 42 | import moviepy.editor as mpy 43 | clip = mpy.ImageSequenceClip(images, fps=20*speedup) 44 | if video_filename[-3:] == 'gif': 45 | clip.write_gif(video_filename, fps=20*speedup) 46 | else: 47 | clip.write_videofile(video_filename, fps=20*speedup) 48 | #return 49 | 50 | return dict( 51 | observations=tensor_utils.stack_tensor_list(observations), 52 | actions=tensor_utils.stack_tensor_list(actions), 53 | rewards=tensor_utils.stack_tensor_list(rewards), 54 | agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos), 55 | env_infos=tensor_utils.stack_tensor_dict_list(env_infos), 56 | ) -------------------------------------------------------------------------------- /rllab/rllab/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .product import Product 2 | from .discrete import Discrete 3 | from .box import Box 4 | 5 | __all__ = ["Product", "Discrete", "Box"] -------------------------------------------------------------------------------- /rllab/rllab/spaces/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Space(object): 5 | """ 6 | Provides a classification state spaces and action spaces, 7 | so you can write generic code that applies to any Environment. 8 | E.g. to choose a random action. 9 | """ 10 | 11 | def sample(self, seed=0): 12 | """ 13 | Uniformly randomly sample a random elemnt of this space 14 | """ 15 | raise NotImplementedError 16 | 17 | def contains(self, x): 18 | """ 19 | Return boolean specifying if x is a valid 20 | member of this space 21 | """ 22 | raise NotImplementedError 23 | 24 | def flatten(self, x): 25 | raise NotImplementedError 26 | 27 | def unflatten(self, x): 28 | raise NotImplementedError 29 | 30 | def flatten_n(self, xs): 31 | raise NotImplementedError 32 | 33 | def unflatten_n(self, xs): 34 | raise NotImplementedError 35 | 36 | @property 37 | def flat_dim(self): 38 | """ 39 | The dimension of the flattened vector of the tensor representation 40 | """ 41 | raise NotImplementedError 42 | 43 | def new_tensor_variable(self, name, extra_dims): 44 | """ 45 | Create a Theano tensor variable given the name and extra dimensions prepended 46 | :param name: name of the variable 47 | :param extra_dims: extra dimensions in the front 48 | :return: the created tensor variable 49 | """ 50 | raise NotImplementedError 51 | -------------------------------------------------------------------------------- /rllab/rllab/spaces/box.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from .base import Space 3 | import numpy as np 4 | from rllab.misc import ext 5 | import theano 6 | 7 | 8 | class Box(Space): 9 | """ 10 | A box in R^n. 11 | I.e., each coordinate is bounded. 12 | """ 13 | 14 | def __init__(self, low, high, shape=None): 15 | """ 16 | Two kinds of valid input: 17 | Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided 18 | Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape 19 | """ 20 | if shape is None: 21 | assert low.shape == high.shape 22 | self.low = low 23 | self.high = high 24 | else: 25 | assert np.isscalar(low) and np.isscalar(high) 26 | self.low = low + np.zeros(shape) 27 | self.high = high + np.zeros(shape) 28 | 29 | def sample(self): 30 | return np.random.uniform(low=self.low, high=self.high, size=self.low.shape) 31 | 32 | def contains(self, x): 33 | return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all() 34 | 35 | @property 36 | def shape(self): 37 | return self.low.shape 38 | 39 | @property 40 | def flat_dim(self): 41 | return np.prod(self.low.shape) 42 | 43 | @property 44 | def bounds(self): 45 | return self.low, self.high 46 | 47 | def flatten(self, x): 48 | return np.asarray(x).flatten() 49 | 50 | def unflatten(self, x): 51 | return np.asarray(x).reshape(self.shape) 52 | 53 | def flatten_n(self, xs): 54 | xs = np.asarray(xs) 55 | return xs.reshape((xs.shape[0], -1)) 56 | 57 | def unflatten_n(self, xs): 58 | xs = np.asarray(xs) 59 | return xs.reshape((xs.shape[0],) + self.shape) 60 | 61 | def __repr__(self): 62 | return "Box" + str(self.shape) 63 | 64 | def __eq__(self, other): 65 | return isinstance(other, Box) and np.allclose(self.low, other.low) and \ 66 | np.allclose(self.high, other.high) 67 | 68 | def __hash__(self): 69 | return hash((self.low, self.high)) 70 | 71 | def new_tensor_variable(self, name, extra_dims): 72 | return ext.new_tensor( 73 | name=name, 74 | ndim=extra_dims+1, 75 | dtype=theano.config.floatX 76 | ) 77 | 78 | -------------------------------------------------------------------------------- /rllab/rllab/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | from .base import Space 2 | import numpy as np 3 | from rllab.misc import special 4 | from rllab.misc import ext 5 | 6 | 7 | class Discrete(Space): 8 | """ 9 | {0,1,...,n-1} 10 | """ 11 | 12 | def __init__(self, n): 13 | self._n = n 14 | 15 | @property 16 | def n(self): 17 | return self._n 18 | 19 | def sample(self): 20 | return np.random.randint(self.n) 21 | 22 | def contains(self, x): 23 | x = np.asarray(x) 24 | return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n 25 | 26 | def __repr__(self): 27 | return "Discrete(%d)" % self.n 28 | 29 | def __eq__(self, other): 30 | return self.n == other.n 31 | 32 | def flatten(self, x): 33 | return special.to_onehot(x, self.n) 34 | 35 | def unflatten(self, x): 36 | return special.from_onehot(x) 37 | 38 | def flatten_n(self, x): 39 | return special.to_onehot_n(x, self.n) 40 | 41 | def unflatten_n(self, x): 42 | return special.from_onehot_n(x) 43 | 44 | @property 45 | def flat_dim(self): 46 | return self.n 47 | 48 | def weighted_sample(self, weights): 49 | return special.weighted_sample(weights, range(self.n)) 50 | 51 | @property 52 | def default_value(self): 53 | return 0 54 | 55 | def new_tensor_variable(self, name, extra_dims): 56 | if self.n <= 2 ** 8: 57 | return ext.new_tensor( 58 | name=name, 59 | ndim=extra_dims+1, 60 | dtype='uint8' 61 | ) 62 | elif self.n <= 2 ** 16: 63 | return ext.new_tensor( 64 | name=name, 65 | ndim=extra_dims+1, 66 | dtype='uint16' 67 | ) 68 | else: 69 | return ext.new_tensor( 70 | name=name, 71 | ndim=extra_dims+1, 72 | dtype='uint32' 73 | ) 74 | 75 | def __eq__(self, other): 76 | if not isinstance(other, Discrete): 77 | return False 78 | return self.n == other.n 79 | 80 | def __hash__(self): 81 | return hash(self.n) -------------------------------------------------------------------------------- /rllab/rllab/viskit/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dementrock' 2 | -------------------------------------------------------------------------------- /rllab/sandbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/__init__.py -------------------------------------------------------------------------------- /rllab/sandbox/rocky/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/__init__.py -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/__init__.py -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/algos/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/algos/npg.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/algos/trpo.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from sandbox.rocky.tf.algos.npo import NPO 4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 5 | 6 | 7 | class TRPO(NPO): 8 | """ 9 | Trust Region Policy Optimization 10 | """ 11 | 12 | def __init__( 13 | self, 14 | optimizer=None, 15 | optimizer_args=None, 16 | **kwargs): 17 | if optimizer is None: 18 | if optimizer_args is None: 19 | optimizer_args = dict() 20 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 21 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs) 22 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/core/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/core/layers_powered.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.core.parameterized import Parameterized 2 | import sandbox.rocky.tf.core.layers as L 3 | import itertools 4 | 5 | 6 | class LayersPowered(Parameterized): 7 | 8 | def __init__(self, output_layers, input_layers=None): 9 | self._output_layers = output_layers 10 | self._input_layers = input_layers 11 | Parameterized.__init__(self) 12 | 13 | def get_params_internal(self, **tags): 14 | layers = L.get_all_layers(self._output_layers, treat_as_input=self._input_layers) 15 | params = itertools.chain.from_iterable(l.get_params(**tags) for l in layers) 16 | return L.unique(params) 17 | 18 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/distributions/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | class Distribution(object): 6 | @property 7 | def dim(self): 8 | raise NotImplementedError 9 | 10 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 11 | """ 12 | Compute the symbolic KL divergence of two distributions 13 | """ 14 | raise NotImplementedError 15 | 16 | def kl(self, old_dist_info, new_dist_info): 17 | """ 18 | Compute the KL divergence of two distributions 19 | """ 20 | raise NotImplementedError 21 | 22 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 23 | raise NotImplementedError 24 | 25 | def entropy(self, dist_info): 26 | raise NotImplementedError 27 | 28 | def log_likelihood_sym(self, x_var, dist_info_vars): 29 | raise NotImplementedError 30 | 31 | def log_likelihood(self, xs, dist_info): 32 | raise NotImplementedError 33 | 34 | @property 35 | def dist_info_specs(self): 36 | raise NotImplementedError 37 | 38 | @property 39 | def dist_info_keys(self): 40 | return [k for k, _ in self.dist_info_specs] 41 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/distributions/bernoulli.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from .base import Distribution 4 | import tensorflow as tf 5 | import numpy as np 6 | 7 | TINY = 1e-8 8 | 9 | 10 | class Bernoulli(Distribution): 11 | def __init__(self, dim): 12 | self._dim = dim 13 | 14 | @property 15 | def dim(self): 16 | return self._dim 17 | 18 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 19 | old_p = old_dist_info_vars["p"] 20 | new_p = new_dist_info_vars["p"] 21 | kl = old_p * (tf.log(old_p + TINY) - tf.log(new_p + TINY)) + \ 22 | (1 - old_p) * (tf.log(1 - old_p + TINY) - tf.log(1 - new_p + TINY)) 23 | ndims = kl.get_shape().ndims 24 | return tf.reduce_sum(kl, axis=ndims - 1) 25 | 26 | def kl(self, old_dist_info, new_dist_info): 27 | old_p = old_dist_info["p"] 28 | new_p = new_dist_info["p"] 29 | kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \ 30 | (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY)) 31 | return np.sum(kl, axis=-1) 32 | 33 | def sample(self, dist_info): 34 | p = np.asarray(dist_info["p"]) 35 | return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p) 36 | 37 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 38 | old_p = old_dist_info_vars["p"] 39 | new_p = new_dist_info_vars["p"] 40 | ndims = old_p.get_shape().ndims 41 | return tf.reduce_prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY), 42 | axis=ndims - 1) 43 | 44 | def log_likelihood_sym(self, x_var, dist_info_vars): 45 | p = dist_info_vars["p"] 46 | ndims = p.get_shape().ndims 47 | return tf.reduce_sum(x_var * tf.log(p + TINY) + (1 - x_var) * tf.log(1 - p + TINY), axis=ndims - 1) 48 | 49 | def log_likelihood(self, xs, dist_info): 50 | p = dist_info["p"] 51 | return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1) 52 | 53 | def entropy(self, dist_info): 54 | p = dist_info["p"] 55 | return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1) 56 | 57 | @property 58 | def dist_info_keys(self): 59 | return ["p"] 60 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from sandbox.rocky.tf.distributions.diagonal_gaussian import DiagonalGaussian 5 | 6 | RecurrentDiagonalGaussian = DiagonalGaussian 7 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/envs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/envs/vec_env_executor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle as pickle 3 | from sandbox.rocky.tf.misc import tensor_utils 4 | 5 | 6 | class VecEnvExecutor(object): 7 | def __init__(self, envs, max_path_length): 8 | self.envs = envs 9 | self._action_space = envs[0].action_space 10 | self._observation_space = envs[0].observation_space 11 | self.ts = np.zeros(len(self.envs), dtype='int') 12 | self.max_path_length = max_path_length 13 | 14 | def step(self, action_n, reset_args=None, policy_contexts=None): 15 | if reset_args is None: 16 | policy_contexts = [None]*len(self.envs) 17 | reset_args = [None]*len(self.envs) 18 | all_results = [env.step(a) for (a, env) in zip(action_n, self.envs)] 19 | obs, rewards, dones, env_infos = list(map(list, list(zip(*all_results)))) 20 | dones = np.asarray(dones) 21 | rewards = np.asarray(rewards) 22 | self.ts += 1 23 | if self.max_path_length is not None: 24 | dones[self.ts >= self.max_path_length] = True 25 | for (i, done) in enumerate(dones): 26 | if done: 27 | obs[i] = self.envs[i].reset(reset_args=reset_args[i], policy_contexts=policy_contexts[i]) 28 | self.ts[i] = 0 29 | return obs, rewards, dones, tensor_utils.stack_tensor_dict_list(env_infos) 30 | 31 | def reset(self, reset_args=None, policy_contexts=None): 32 | if reset_args is not None: 33 | assert policy_contexts is not None 34 | results = [env.reset(reset_args=arg, policy_contexts=policy_context) for env, arg, policy_context in zip(self.envs, reset_args, policy_contexts)] 35 | else: 36 | results = [env.reset() for env in self.envs] 37 | self.ts[:] = 0 38 | return results 39 | 40 | @property 41 | def num_envs(self): 42 | return len(self.envs) 43 | 44 | @property 45 | def action_space(self): 46 | return self._action_space 47 | 48 | @property 49 | def observation_space(self): 50 | return self._observation_space 51 | 52 | def terminate(self): 53 | pass -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/launchers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/launchers/trpo_cartpole.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 6 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp 7 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy 8 | from sandbox.rocky.tf.envs.base import TfEnv 9 | from rllab.misc.instrument import stub, run_experiment_lite 10 | 11 | env = TfEnv(normalize(CartpoleEnv())) 12 | 13 | policy = GaussianMLPPolicy( 14 | name="policy", 15 | env_spec=env.spec, 16 | # The neural network policy should have two hidden layers, each with 32 hidden units. 17 | hidden_sizes=(32, 32) 18 | ) 19 | 20 | baseline = LinearFeatureBaseline(env_spec=env.spec) 21 | 22 | algo = TRPO( 23 | env=env, 24 | policy=policy, 25 | baseline=baseline, 26 | batch_size=4000, 27 | max_path_length=100, 28 | n_itr=40, 29 | discount=0.99, 30 | step_size=0.01, 31 | # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)) 32 | 33 | ) 34 | algo.train() 35 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from sandbox.rocky.tf.policies.gaussian_gru_policy import GaussianGRUPolicy 6 | from sandbox.rocky.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy 7 | from sandbox.rocky.tf.envs.base import TfEnv 8 | import sandbox.rocky.tf.core.layers as L 9 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp 10 | from rllab.misc.instrument import stub, run_experiment_lite 11 | 12 | env = TfEnv(normalize(CartpoleEnv())) 13 | 14 | policy = GaussianLSTMPolicy( 15 | name="policy", 16 | env_spec=env.spec, 17 | lstm_layer_cls=L.TfBasicLSTMLayer, 18 | # gru_layer_cls=L.GRULayer, 19 | ) 20 | 21 | baseline = LinearFeatureBaseline(env_spec=env.spec) 22 | 23 | algo = TRPO( 24 | env=env, 25 | policy=policy, 26 | baseline=baseline, 27 | batch_size=4000, 28 | max_path_length=100, 29 | n_itr=10, 30 | discount=0.99, 31 | step_size=0.01, 32 | optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)) 33 | ) 34 | algo.train() 35 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/launchers/vpg_cartpole.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.algos.vpg import VPG 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | from sandbox.rocky.tf.envs.base import TfEnv 7 | from rllab.misc.instrument import stub, run_experiment_lite 8 | 9 | env = TfEnv(normalize(CartpoleEnv())) 10 | 11 | policy = GaussianMLPPolicy( 12 | name="policy", 13 | env_spec=env.spec, 14 | # The neural network policy should have two hidden layers, each with 32 hidden units. 15 | hidden_sizes=(32, 32) 16 | ) 17 | 18 | baseline = LinearFeatureBaseline(env_spec=env.spec) 19 | 20 | algo = VPG( 21 | env=env, 22 | policy=policy, 23 | baseline=baseline, 24 | batch_size=10000, 25 | max_path_length=100, 26 | n_itr=40, 27 | discount=0.99, 28 | optimizer_args=dict( 29 | tf_optimizer_args=dict( 30 | learning_rate=0.01, 31 | ) 32 | ) 33 | ) 34 | algo.train() 35 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/misc/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/policies/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/policies/uniform_control_policy.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.policies.base import Policy 2 | from rllab.core.serializable import Serializable 3 | 4 | 5 | class UniformControlPolicy(Policy, Serializable): 6 | def __init__( 7 | self, 8 | env_spec, 9 | ): 10 | Serializable.quick_init(self, locals()) 11 | super(UniformControlPolicy, self).__init__(env_spec=env_spec) 12 | 13 | @property 14 | def vectorized(self): 15 | return True 16 | 17 | def get_action(self, observation): 18 | return self.action_space.sample(), dict() 19 | 20 | def get_actions(self, observations): 21 | return self.action_space.sample_n(len(observations)), dict() 22 | 23 | def get_params_internal(self, **tags): 24 | return [] 25 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/q_functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/q_functions/__init__.py -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/q_functions/base.py: -------------------------------------------------------------------------------- 1 | from sandbox.rocky.tf.core.parameterized import Parameterized 2 | 3 | class QFunction(Parameterized): 4 | pass 5 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/regressors/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .product import Product 2 | from .discrete import Discrete 3 | from .box import Box 4 | 5 | __all__ = ["Product", "Discrete", "Box"] 6 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/spaces/box.py: -------------------------------------------------------------------------------- 1 | from rllab.spaces.box import Box as TheanoBox 2 | import tensorflow as tf 3 | 4 | 5 | class Box(TheanoBox): 6 | def new_tensor_variable(self, name, extra_dims, flatten=True): 7 | if flatten: 8 | return tf.placeholder(tf.float32, shape=[None] * extra_dims + [self.flat_dim], name=name) 9 | return tf.placeholder(tf.float32, shape=[None] * extra_dims + list(self.shape), name=name) 10 | 11 | @property 12 | def dtype(self): 13 | return tf.float32 14 | -------------------------------------------------------------------------------- /rllab/sandbox/rocky/tf/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | from rllab.spaces.base import Space 2 | import numpy as np 3 | from rllab.misc import special 4 | from rllab.misc import ext 5 | import tensorflow as tf 6 | 7 | 8 | class Discrete(Space): 9 | """ 10 | {0,1,...,n-1} 11 | """ 12 | 13 | def __init__(self, n): 14 | self._n = n 15 | 16 | @property 17 | def n(self): 18 | return self._n 19 | 20 | def sample(self): 21 | return np.random.randint(self.n) 22 | 23 | def sample_n(self, n): 24 | return np.random.randint(low=0, high=self.n, size=n) 25 | 26 | def contains(self, x): 27 | x = np.asarray(x) 28 | return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return self.n == other.n 35 | 36 | def flatten(self, x): 37 | return special.to_onehot(x, self.n) 38 | 39 | def unflatten(self, x): 40 | return special.from_onehot(x) 41 | 42 | def flatten_n(self, x): 43 | return special.to_onehot_n(x, self.n) 44 | 45 | def unflatten_n(self, x): 46 | return special.from_onehot_n(x) 47 | 48 | @property 49 | def default_value(self): 50 | return 0 51 | 52 | @property 53 | def flat_dim(self): 54 | return self.n 55 | 56 | def weighted_sample(self, weights): 57 | return special.weighted_sample(weights, range(self.n)) 58 | 59 | def new_tensor_variable(self, name, extra_dims): 60 | # needed for safe conversion to float32 61 | return tf.placeholder(dtype=tf.uint8, shape=[None] * extra_dims + [self.flat_dim], name=name) 62 | 63 | @property 64 | def dtype(self): 65 | return tf.uint8 66 | 67 | def __eq__(self, other): 68 | if not isinstance(other, Discrete): 69 | return False 70 | return self.n == other.n 71 | 72 | def __hash__(self): 73 | return hash(self.n) 74 | 75 | -------------------------------------------------------------------------------- /rllab/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/scripts/__init__.py -------------------------------------------------------------------------------- /rllab/scripts/resume_training.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from rllab.sampler.utils import rollout 5 | from rllab.algos.batch_polopt import BatchPolopt 6 | import argparse 7 | import joblib 8 | import uuid 9 | import os 10 | import random 11 | import numpy as np 12 | import json 13 | import subprocess 14 | from rllab.misc import logger 15 | from rllab.misc.instrument import to_local_command 16 | 17 | filename = str(uuid.uuid4()) 18 | 19 | if __name__ == "__main__": 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('file', type=str, 23 | help='path to the snapshot file') 24 | parser.add_argument('--log_dir', type=str, default=None, 25 | help='path to the new log directory') 26 | # Look for params.json file 27 | args = parser.parse_args() 28 | parent_dir = os.path.dirname(os.path.realpath(args.file)) 29 | json_file_path = os.path.join(parent_dir, "params.json") 30 | logger.log("Looking for params.json at %s..." % json_file_path) 31 | try: 32 | with open(json_file_path, "r") as f: 33 | params = json.load(f) 34 | # exclude certain parameters 35 | excluded = ['json_args'] 36 | for k in excluded: 37 | if k in params: 38 | del params[k] 39 | for k, v in list(params.items()): 40 | if v is None: 41 | del params[k] 42 | if args.log_dir is not None: 43 | params['log_dir'] = args.log_dir 44 | params['resume_from'] = args.file 45 | command = to_local_command(params, script='scripts/run_experiment_lite.py') 46 | print(command) 47 | try: 48 | subprocess.call(command, shell=True, env=os.environ) 49 | except Exception as e: 50 | print(e) 51 | if isinstance(e, KeyboardInterrupt): 52 | raise 53 | except IOError as e: 54 | logger.log("Failed to find json file. Continuing in non-stub mode...") 55 | data = joblib.load(args.file) 56 | assert 'algo' in data 57 | algo = data['algo'] 58 | assert isinstance(algo, BatchPolopt) 59 | algo.train() 60 | -------------------------------------------------------------------------------- /rllab/scripts/setup_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Make sure that conda is available 3 | 4 | hash conda 2>/dev/null || { 5 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer." 6 | exit 0 7 | } 8 | 9 | echo "Installing system dependencies" 10 | echo "You will probably be asked for your sudo password." 11 | sudo apt-get update 12 | sudo apt-get install -y python-pip python-dev swig cmake build-essential zlib1g-dev 13 | sudo apt-get build-dep -y python-pygame 14 | sudo apt-get build-dep -y python-scipy 15 | 16 | # Make sure that we're under the directory of the project 17 | cd "$(dirname "$0")/.." 18 | 19 | echo "Creating conda environment..." 20 | conda env create -f environment.yml 21 | conda env update 22 | 23 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab." 24 | -------------------------------------------------------------------------------- /rllab/scripts/setup_mujoco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$(uname)" == "Darwin" ]; then 4 | mujoco_file="libmujoco131.dylib" 5 | glfw_file="libglfw.3.dylib" 6 | zip_file="mjpro131_osx.zip" 7 | mktemp_cmd="mktemp -d /tmp/mujoco" 8 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 9 | mujoco_file="libmujoco131.so" 10 | glfw_file="libglfw.so.3" 11 | zip_file="mjpro131_linux.zip" 12 | mktemp_cmd="mktemp -d" 13 | fi 14 | 15 | if [ ! -f vendor/mujoco/$mujoco_file ]; then 16 | read -e -p "Please enter the path to the mujoco zip file [$zip_file]:" path 17 | path=${path:-$zip_file} 18 | eval path=\"$path\" 19 | if [ ! -f $path ]; then 20 | echo "No file found at $path" 21 | exit 0 22 | fi 23 | rm -r /tmp/mujoco 24 | dir=`$mktemp_cmd` 25 | unzip $path -d $dir 26 | if [ ! -f $dir/mjpro131/bin/$mujoco_file ]; then 27 | echo "mjpro/$mujoco_file not found. Make sure you have the correct file (most likely named $zip_file)" 28 | exit 0 29 | fi 30 | if [ ! -f $dir/mjpro131/bin/$glfw_file ]; then 31 | echo "mjpro/$glfw_file not found. Make sure you have the correct file (most likely named $zip_file)" 32 | exit 0 33 | fi 34 | 35 | mkdir -p vendor/mujoco 36 | cp $dir/mjpro131/bin/$mujoco_file vendor/mujoco/ 37 | cp $dir/mjpro131/bin/$glfw_file vendor/mujoco/ 38 | fi 39 | 40 | if [ ! -f vendor/mujoco/mjkey.txt ]; then 41 | read -e -p "Please enter the path to the mujoco license file [mjkey.txt]:" path 42 | path=${path:-mjkey.txt} 43 | eval path=$path 44 | if [ ! -f $path ]; then 45 | echo "No file found at $path" 46 | exit 0 47 | fi 48 | cp $path vendor/mujoco/mjkey.txt 49 | fi 50 | 51 | echo "Mujoco has been set up!" 52 | -------------------------------------------------------------------------------- /rllab/scripts/setup_osx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Make sure that pip is available 3 | hash brew 2>/dev/null || { 4 | echo "Please install homebrew before continuing. You can use the following command to install:" 5 | echo "/usr/bin/ruby -e \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)\"" 6 | exit 0 7 | } 8 | 9 | hash conda 2>/dev/null || { 10 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer." 11 | exit 0 12 | } 13 | 14 | 15 | echo "Installing system dependencies" 16 | echo "You will probably be asked for your sudo password." 17 | 18 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi 19 | 20 | # Make sure that we're under the directory of the project 21 | cd "$(dirname "$0")/.." 22 | echo "Creating conda environment..." 23 | conda env create -f environment.yml 24 | conda env update 25 | 26 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab." 27 | -------------------------------------------------------------------------------- /rllab/scripts/sim_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import joblib 4 | import tensorflow as tf 5 | 6 | from rllab.misc.console import query_yes_no 7 | from rllab.sampler.utils import rollout 8 | 9 | if __name__ == "__main__": 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('file', type=str, 13 | help='path to the snapshot file') 14 | parser.add_argument('--max_path_length', type=int, default=1000, 15 | help='Max length of rollout') 16 | parser.add_argument('--speedup', type=float, default=1, 17 | help='Speedup') 18 | args = parser.parse_args() 19 | 20 | # If the snapshot file use tensorflow, do: 21 | # import tensorflow as tf 22 | # with tf.Session(): 23 | # [rest of the code] 24 | with tf.Session() as sess: 25 | data = joblib.load(args.file) 26 | policy = data['policy'] 27 | env = data['env'] 28 | while True: 29 | path = rollout(env, policy, max_path_length=args.max_path_length, 30 | animated=True, speedup=args.speedup) 31 | if not query_yes_no('Continue simulation?'): 32 | break 33 | -------------------------------------------------------------------------------- /rllab/scripts/submit_gym.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import argparse 4 | import os 5 | import os.path as osp 6 | import gym 7 | from rllab.viskit.core import load_params 8 | 9 | if __name__ == "__main__": 10 | # rl_gym.api_key = 'g8JOpnNVmcjMShBiFtyji2VWX3P2uCzc' 11 | if 'OPENAI_GYM_API_KEY' not in os.environ: 12 | raise ValueError("OpenAi Gym API key not configured. Please register an account on https://gym.openai.com and" 13 | " set the OPENAI_GYM_API_KEY environment variable, and try the script again.") 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('log_dir', type=str, 17 | help='path to the logging directory') 18 | parser.add_argument('--algorithm_id', type=str, default=None, help='Algorithm ID') 19 | args = parser.parse_args() 20 | snapshot_dir = osp.abspath(osp.join(args.log_dir, "..")) 21 | params_file_path = osp.join(snapshot_dir, "params.json") 22 | gym.upload(args.log_dir, algorithm_id=args.algorithm_id) 23 | -------------------------------------------------------------------------------- /rllab/scripts/sync_s3.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | from rllab import config 4 | import os 5 | import argparse 6 | import ast 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('folder', type=str, default=None, nargs='?') 11 | parser.add_argument('--dry', action='store_true', default=False) 12 | parser.add_argument('--bare', action='store_true', default=False) 13 | args = parser.parse_args() 14 | remote_dir = config.AWS_S3_PATH 15 | local_dir = os.path.join(config.LOG_DIR, "s3") 16 | if args.folder: 17 | remote_dir = os.path.join(remote_dir, args.folder) 18 | local_dir = os.path.join(local_dir, args.folder) 19 | if args.bare: 20 | command = (""" 21 | aws s3 sync {remote_dir} {local_dir} --exclude '*' --include '*.csv' --include '*.json' --content-type "UTF-8" 22 | """.format(local_dir=local_dir, remote_dir=remote_dir)) 23 | else: 24 | command = (""" 25 | aws s3 sync {remote_dir} {local_dir} --exclude '*stdout.log' --exclude '*stdouterr.log' --content-type "UTF-8" 26 | """.format(local_dir=local_dir, remote_dir=remote_dir)) 27 | if args.dry: 28 | print(command) 29 | else: 30 | os.system(command) -------------------------------------------------------------------------------- /rllab/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup,find_packages 3 | 4 | setup( 5 | name='rllab', 6 | packages=[package for package in find_packages() 7 | if package.startswith('rllab')], 8 | version='0.1.0', 9 | ) 10 | -------------------------------------------------------------------------------- /rllab/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/__init__.py -------------------------------------------------------------------------------- /rllab/tests/algos/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/tests/algos/test_trpo.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rllab.envs.base import Env, Step 4 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 5 | from rllab.baselines.zero_baseline import ZeroBaseline 6 | from rllab.algos.trpo import TRPO 7 | from rllab.spaces.box import Box 8 | import lasagne.nonlinearities 9 | import numpy as np 10 | import theano.tensor as TT 11 | 12 | 13 | class DummyEnv(Env): 14 | @property 15 | def observation_space(self): 16 | return Box(low=-np.inf, high=np.inf, shape=(1,)) 17 | 18 | @property 19 | def action_space(self): 20 | return Box(low=-5.0, high=5.0, shape=(1,)) 21 | 22 | def reset(self): 23 | return np.zeros(1) 24 | 25 | def step(self, action): 26 | return Step(observation=np.zeros(1), reward=np.random.normal(), done=True) 27 | 28 | 29 | def naive_relu(x): 30 | return TT.max(x, 0) 31 | 32 | 33 | def test_trpo_relu_nan(): 34 | env = DummyEnv() 35 | policy = GaussianMLPPolicy( 36 | env_spec=env.spec, 37 | hidden_nonlinearity=naive_relu, 38 | hidden_sizes=(1,)) 39 | baseline = ZeroBaseline(env_spec=env.spec) 40 | algo = TRPO( 41 | env=env, policy=policy, baseline=baseline, n_itr=1, batch_size=1000, max_path_length=100, 42 | step_size=0.001 43 | ) 44 | algo.train() 45 | assert not np.isnan(np.sum(policy.get_param_values())) 46 | 47 | 48 | def test_trpo_deterministic_nan(): 49 | env = DummyEnv() 50 | policy = GaussianMLPPolicy( 51 | env_spec=env.spec, 52 | hidden_sizes=(1,)) 53 | policy._l_log_std.param.set_value([np.float32(np.log(1e-8))]) 54 | baseline = ZeroBaseline(env_spec=env.spec) 55 | algo = TRPO( 56 | env=env, policy=policy, baseline=baseline, n_itr=10, batch_size=1000, max_path_length=100, 57 | step_size=0.01 58 | ) 59 | algo.train() 60 | assert not np.isnan(np.sum(policy.get_param_values())) 61 | -------------------------------------------------------------------------------- /rllab/tests/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/envs/__init__.py -------------------------------------------------------------------------------- /rllab/tests/envs/test_maze_env.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from rllab.envs.mujoco.maze.maze_env_utils import line_intersect, ray_segment_intersect 4 | 5 | 6 | def test_line_intersect(): 7 | assert line_intersect((0, 0), (0, 1), (0, 0), (1, 0))[:2] == (0, 0) 8 | assert line_intersect((0, 0), (0, 1), (0, 0), (0, 1))[2] == 0 9 | assert ray_segment_intersect(ray=((0, 0), 0), segment=((1, -1), (1, 1))) == (1, 0) 10 | assert ray_segment_intersect(ray=((0, 0), math.pi), segment=((1, -1), (1, 1))) is None 11 | -------------------------------------------------------------------------------- /rllab/tests/regression_tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /rllab/tests/regression_tests/test_issue_3.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from nose2.tools import such 5 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | from rllab.algos.trpo import TRPO 8 | from rllab.baselines.zero_baseline import ZeroBaseline 9 | 10 | with such.A("Issue #3") as it: 11 | @it.should("be fixed") 12 | def test_issue_3(): 13 | """ 14 | As reported in https://github.com/rllab/rllab/issues/3, the adaptive_std parameter was not functioning properly 15 | """ 16 | env = CartpoleEnv() 17 | policy = GaussianMLPPolicy( 18 | env_spec=env, 19 | adaptive_std=True 20 | ) 21 | baseline = ZeroBaseline(env_spec=env.spec) 22 | algo = TRPO( 23 | env=env, 24 | policy=policy, 25 | baseline=baseline, 26 | batch_size=100, 27 | n_itr=1 28 | ) 29 | algo.train() 30 | 31 | it.createTests(globals()) 32 | -------------------------------------------------------------------------------- /rllab/tests/test_baselines.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ['THEANO_FLAGS'] = 'mode=FAST_COMPILE,optimizer=None' 4 | 5 | from rllab.algos.vpg import VPG 6 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 7 | from rllab.baselines.zero_baseline import ZeroBaseline 8 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 9 | from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline 10 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 11 | from nose2 import tools 12 | 13 | 14 | baselines = [ZeroBaseline, LinearFeatureBaseline, GaussianMLPBaseline] 15 | 16 | 17 | @tools.params(*baselines) 18 | def test_baseline(baseline_cls): 19 | env = CartpoleEnv() 20 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6,)) 21 | baseline = baseline_cls(env_spec=env.spec) 22 | algo = VPG( 23 | env=env, policy=policy, baseline=baseline, 24 | n_itr=1, batch_size=1000, max_path_length=100 25 | ) 26 | algo.train() 27 | -------------------------------------------------------------------------------- /rllab/tests/test_instrument.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from rllab.misc import instrument 5 | from nose2.tools import such 6 | 7 | 8 | class TestClass(object): 9 | @property 10 | def arr(self): 11 | return [1, 2, 3] 12 | 13 | @property 14 | def compound_arr(self): 15 | return [dict(a=1)] 16 | 17 | 18 | with such.A("instrument") as it: 19 | @it.should 20 | def test_concretize(): 21 | it.assertEqual(instrument.concretize([5]), [5]) 22 | it.assertEqual(instrument.concretize((5,)), (5,)) 23 | fake_globals = dict(TestClass=TestClass) 24 | instrument.stub(fake_globals) 25 | modified = fake_globals["TestClass"] 26 | it.assertIsInstance(modified, instrument.StubClass) 27 | it.assertIsInstance(modified(), instrument.StubObject) 28 | it.assertEqual(instrument.concretize((5,)), (5,)) 29 | it.assertIsInstance(instrument.concretize(modified()), TestClass) 30 | 31 | 32 | @it.should 33 | def test_chained_call(): 34 | fake_globals = dict(TestClass=TestClass) 35 | instrument.stub(fake_globals) 36 | modified = fake_globals["TestClass"] 37 | it.assertIsInstance(modified().arr[0], instrument.StubMethodCall) 38 | it.assertIsInstance(modified().compound_arr[0]["a"], instrument.StubMethodCall) 39 | it.assertEqual(instrument.concretize(modified().arr[0]), 1) 40 | 41 | 42 | @it.should 43 | def test_variant_generator(): 44 | 45 | vg = instrument.VariantGenerator() 46 | vg.add("key1", [1, 2, 3]) 47 | vg.add("key2", [True, False]) 48 | vg.add("key3", lambda key2: [1] if key2 else [1, 2]) 49 | it.assertEqual(len(vg.variants()), 9) 50 | 51 | class VG(instrument.VariantGenerator): 52 | 53 | @instrument.variant 54 | def key1(self): 55 | return [1, 2, 3] 56 | 57 | @instrument.variant 58 | def key2(self): 59 | yield True 60 | yield False 61 | 62 | @instrument.variant 63 | def key3(self, key2): 64 | if key2: 65 | yield 1 66 | else: 67 | yield 1 68 | yield 2 69 | 70 | it.assertEqual(len(VG().variants()), 9) 71 | 72 | it.createTests(globals()) 73 | -------------------------------------------------------------------------------- /rllab/tests/test_networks.py: -------------------------------------------------------------------------------- 1 | def test_gru_network(): 2 | from rllab.core.network import GRUNetwork 3 | import lasagne.layers as L 4 | from rllab.misc import ext 5 | import numpy as np 6 | network = GRUNetwork( 7 | input_shape=(2, 3), 8 | output_dim=5, 9 | hidden_dim=4, 10 | ) 11 | f_output = ext.compile_function( 12 | inputs=[network.input_layer.input_var], 13 | outputs=L.get_output(network.output_layer) 14 | ) 15 | assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5) 16 | -------------------------------------------------------------------------------- /rllab/tests/test_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | 6 | def test_truncate_paths(): 7 | from rllab.sampler.parallel_sampler import truncate_paths 8 | 9 | paths = [ 10 | dict( 11 | observations=np.zeros((100, 1)), 12 | actions=np.zeros((100, 1)), 13 | rewards=np.zeros(100), 14 | env_infos=dict(), 15 | agent_infos=dict(lala=np.zeros(100)), 16 | ), 17 | dict( 18 | observations=np.zeros((50, 1)), 19 | actions=np.zeros((50, 1)), 20 | rewards=np.zeros(50), 21 | env_infos=dict(), 22 | agent_infos=dict(lala=np.zeros(50)), 23 | ), 24 | ] 25 | 26 | truncated = truncate_paths(paths, 130) 27 | assert len(truncated) == 2 28 | assert len(truncated[-1]["observations"]) == 30 29 | assert len(truncated[0]["observations"]) == 100 30 | # make sure not to change the original one 31 | assert len(paths) == 2 32 | assert len(paths[-1]["observations"]) == 50 33 | -------------------------------------------------------------------------------- /rllab/tests/test_serializable.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from rllab.core.serializable import Serializable 4 | from sandbox.rocky.tf.core.parameterized import Parameterized, suppress_params_loading 5 | 6 | 7 | class Simple(Parameterized, Serializable): 8 | def __init__(self, name): 9 | Serializable.quick_init(self, locals()) 10 | with tf.variable_scope(name): 11 | self.w = tf.get_variable("w", [10, 10]) 12 | 13 | def get_params_internal(self, **tags): 14 | return [self.w] 15 | 16 | 17 | class AllArgs(Serializable): 18 | def __init__(self, vararg, *args, **kwargs): 19 | Serializable.quick_init(self, locals()) 20 | self.vararg = vararg 21 | self.args = args 22 | self.kwargs = kwargs 23 | 24 | 25 | def test_serializable(): 26 | with suppress_params_loading(): 27 | obj = Simple(name="obj") 28 | obj1 = Serializable.clone(obj, name="obj1") 29 | assert obj.w.name.startswith('obj/') 30 | assert obj1.w.name.startswith('obj1/') 31 | 32 | obj2 = AllArgs(0, *(1,), **{'kwarg': 2}) 33 | obj3 = Serializable.clone(obj2) 34 | assert obj3.vararg == 0 35 | assert len(obj3.args) == 1 and obj3.args[0] == 1 36 | assert len(obj3.kwargs) == 1 and obj3.kwargs['kwarg'] == 2 37 | 38 | 39 | if __name__ == "__main__": 40 | test_serializable() 41 | -------------------------------------------------------------------------------- /rllab/tests/test_spaces.py: -------------------------------------------------------------------------------- 1 | 2 | from rllab.spaces import Product, Discrete, Box 3 | import numpy as np 4 | 5 | 6 | def test_product_space(): 7 | _ = Product([Discrete(3), Discrete(2)]) 8 | product_space = Product(Discrete(3), Discrete(2)) 9 | sample = product_space.sample() 10 | assert product_space.contains(sample) 11 | 12 | 13 | def test_product_space_unflatten_n(): 14 | space = Product([Discrete(3), Discrete(3)]) 15 | np.testing.assert_array_equal(space.flatten((2, 2)), space.flatten_n([(2, 2)])[0]) 16 | np.testing.assert_array_equal( 17 | space.unflatten(space.flatten((2, 2))), 18 | space.unflatten_n(space.flatten_n([(2, 2)]))[0] 19 | ) 20 | 21 | 22 | def test_box(): 23 | space = Box(low=-1, high=1, shape=(2, 2)) 24 | np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4]) 25 | np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]]) 26 | np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]]) 27 | np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]]) 28 | -------------------------------------------------------------------------------- /rllab/tests/test_stateful_pool.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | def _worker_collect_once(_): 6 | return 'a', 1 7 | 8 | 9 | def test_stateful_pool(): 10 | from rllab.sampler import stateful_pool 11 | stateful_pool.singleton_pool.initialize(n_parallel=3) 12 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False) 13 | assert tuple(results) == ('a', 'a', 'a') 14 | 15 | 16 | def test_stateful_pool_over_capacity(): 17 | from rllab.sampler import stateful_pool 18 | stateful_pool.singleton_pool.initialize(n_parallel=4) 19 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False) 20 | assert len(results) >= 3 21 | -------------------------------------------------------------------------------- /rllab/vendor/mujoco_models/green_ball.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /rllab/vendor/mujoco_models/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 32 | -------------------------------------------------------------------------------- /rllab/vendor/mujoco_models/red_ball.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/maze_data_collect.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from inverse_rl.algos.trpo import TRPO 4 | from inverse_rl.models.tf_util import get_session_config 5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | from sandbox.rocky.tf.envs.base import TfEnv 7 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 8 | 9 | from inverse_rl.envs.env_utils import CustomGymEnv 10 | from inverse_rl.utils.log_utils import rllab_logdir 11 | from inverse_rl.utils.hyper_sweep import run_sweep_parallel, run_sweep_serial 12 | 13 | 14 | def main(exp_name, ent_wt=1.0, discrete=True): 15 | tf.reset_default_graph() 16 | if discrete: 17 | env = TfEnv(CustomGymEnv('PointMazeLeft-v0', record_video=False, record_log=False)) 18 | else: 19 | env = TfEnv(CustomGymEnv('PointMazeLeftCont-v0', record_video=False, record_log=False)) 20 | 21 | policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) 22 | with tf.Session(config=get_session_config()) as sess: 23 | algo = TRPO( 24 | env=env, 25 | sess=sess, 26 | policy=policy, 27 | n_itr=2000, 28 | batch_size=20000, 29 | max_path_length=500, 30 | discount=0.99, 31 | store_paths=True, 32 | entropy_weight=ent_wt, 33 | baseline=LinearFeatureBaseline(env_spec=env.spec), 34 | exp_name=exp_name, 35 | ) 36 | if discrete: 37 | output = 'data/maze_left_data_collect_discrete-15/%s' % exp_name 38 | else: 39 | output = 'data/maze_left_data_collect/%s' % exp_name 40 | with rllab_logdir(algo=algo, dirname=output): 41 | algo.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | params_dict = { 46 | 'ent_wt': [0.1], 47 | 'discrete': True # Setting discrete to 'True' to get training data, 'False' to get test data (test unseen positions) 48 | } 49 | run_sweep_parallel(main, params_dict, repeat=4) 50 | --------------------------------------------------------------------------------