├── .gitignore
├── README.md
├── data_fusion_discrete
    └── maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32
    │   └── 2019_05_14_02_33_17_0
    │       └── itr_2800.pkl
├── inverse_rl
    ├── __init__.py
    ├── __pycache__
    │   └── __init__.cpython-35.pyc
    ├── algos
    │   ├── __pycache__
    │   │   ├── batch_polopt.cpython-35.pyc
    │   │   ├── irl_batch_polopt.cpython-35.pyc
    │   │   ├── irl_npo.cpython-35.pyc
    │   │   ├── irl_trpo.cpython-35.pyc
    │   │   ├── meta_irl_batch_polopt.cpython-35.pyc
    │   │   ├── meta_irl_npo.cpython-35.pyc
    │   │   ├── meta_irl_trpo.cpython-35.pyc
    │   │   ├── npo.cpython-35.pyc
    │   │   ├── penalty_lbfgs_optimizer.cpython-35.pyc
    │   │   └── trpo.cpython-35.pyc
    │   ├── batch_polopt.py
    │   ├── irl_batch_polopt.py
    │   ├── irl_npo.py
    │   ├── irl_trpo.py
    │   ├── meta_irl_batch_polopt.py
    │   ├── meta_irl_npo.py
    │   ├── meta_irl_trpo.py
    │   ├── npo.py
    │   ├── penalty_lbfgs_optimizer.py
    │   └── trpo.py
    ├── envs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── env_utils.cpython-35.pyc
    │   │   └── point_maze_env.cpython-35.pyc
    │   ├── assets
    │   │   └── twod_maze.xml
    │   ├── dynamic_mjc
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── mjc_models.cpython-35.pyc
    │   │   │   └── model_builder.cpython-35.pyc
    │   │   ├── mjc_models.py
    │   │   └── model_builder.py
    │   ├── env_utils.py
    │   ├── point_maze_env.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── airl_state.cpython-35.pyc
    │   │   ├── architectures.cpython-35.pyc
    │   │   ├── fusion_manager.cpython-35.pyc
    │   │   ├── imitation_learning.cpython-35.pyc
    │   │   ├── old_imitation_learning.cpython-35.pyc
    │   │   ├── pretrain.cpython-35.pyc
    │   │   └── tf_util.cpython-35.pyc
    │   ├── airl_state.py
    │   ├── architectures.py
    │   ├── fusion_manager.py
    │   ├── imitation_learning.py
    │   ├── info_airl_state_test.py
    │   ├── info_airl_state_train.py
    │   ├── old_imitation_learning.py
    │   ├── pretrain.py
    │   └── tf_util.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       ├── general.cpython-35.pyc
    │       ├── hyper_sweep.cpython-35.pyc
    │       ├── hyperparametrized.cpython-35.pyc
    │       ├── log_utils.cpython-35.pyc
    │       └── math_utils.cpython-35.pyc
    │   ├── general.py
    │   ├── hyper_sweep.py
    │   ├── hyperparametrized.py
    │   ├── log_utils.py
    │   └── math_utils.py
├── rllab
    ├── .gitignore
    ├── CHANGELOG.md
    ├── LICENSE
    ├── README.md
    ├── circle.yml
    ├── contrib
    │   ├── __init__.py
    │   ├── alexbeloi
    │   │   ├── __init__.py
    │   │   ├── examples
    │   │   │   ├── __init__.py
    │   │   │   ├── trpois_cartpole.py
    │   │   │   └── vpgis_cartpole.py
    │   │   └── is_sampler.py
    │   ├── bichengcao
    │   │   ├── __init__.py
    │   │   └── examples
    │   │   │   ├── __init__.py
    │   │   │   ├── trpo_gym_Acrobot-v1.py
    │   │   │   ├── trpo_gym_CartPole-v0.py
    │   │   │   ├── trpo_gym_CartPole-v1.py
    │   │   │   ├── trpo_gym_MountainCar-v0.py
    │   │   │   └── trpo_gym_Pendulum-v0.py
    │   └── rllab_hyperopt
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── example
    │   │       ├── __init__.py
    │   │       ├── main.py
    │   │       ├── score.py
    │   │       └── task.py
    │   │   └── visualize_hyperopt_results.ipynb
    ├── docker
    │   ├── Dockerfile
    │   ├── gpu_Dockerfile
    │   ├── gpu_tf_Dockerfile
    │   └── tester_Dockerfile
    ├── docs
    │   ├── Makefile
    │   ├── conf.py
    │   ├── index.rst
    │   └── user
    │   │   ├── cluster.rst
    │   │   ├── cluster_1.png
    │   │   ├── cluster_2.png
    │   │   ├── cluster_3.png
    │   │   ├── experiments.rst
    │   │   ├── gym_integration.rst
    │   │   ├── implement_algo_advanced.rst
    │   │   ├── implement_algo_basic.rst
    │   │   ├── implement_env.rst
    │   │   └── installation.rst
    ├── environment.yml
    ├── examples
    │   ├── __init__.py
    │   ├── cluster_demo.py
    │   ├── cluster_gym_mujoco_demo.py
    │   ├── ddpg_cartpole.py
    │   ├── nop_cartpole.py
    │   ├── point_env.py
    │   ├── trpo_cartpole.py
    │   ├── trpo_cartpole_pickled.py
    │   ├── trpo_cartpole_recurrent.py
    │   ├── trpo_gym_cartpole.py
    │   ├── trpo_gym_pendulum.py
    │   ├── trpo_gym_tf_cartpole.py
    │   ├── trpo_point.py
    │   ├── trpo_swimmer.py
    │   ├── vpg_1.py
    │   └── vpg_2.py
    ├── rllab
    │   ├── __init__.py
    │   ├── algos
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── batch_polopt.py
    │   │   ├── cem.py
    │   │   ├── cma_es.py
    │   │   ├── cma_es_lib.py
    │   │   ├── ddpg.py
    │   │   ├── erwr.py
    │   │   ├── nop.py
    │   │   ├── npo.py
    │   │   ├── ppo.py
    │   │   ├── reps.py
    │   │   ├── tnpg.py
    │   │   ├── trpo.py
    │   │   ├── util.py
    │   │   └── vpg.py
    │   ├── baselines
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── gaussian_conv_baseline.py
    │   │   ├── gaussian_mlp_baseline.py
    │   │   ├── linear_feature_baseline.py
    │   │   └── zero_baseline.py
    │   ├── config.py
    │   ├── config_personal_template.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── lasagne_helpers.py
    │   │   ├── lasagne_layers.py
    │   │   ├── lasagne_powered.py
    │   │   ├── network.py
    │   │   ├── parameterized.py
    │   │   └── serializable.py
    │   ├── distributions
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── bernoulli.py
    │   │   ├── categorical.py
    │   │   ├── delta.py
    │   │   ├── diagonal_gaussian.py
    │   │   ├── recurrent_categorical.py
    │   │   └── recurrent_diagonal_gaussian.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── box2d
    │   │   │   ├── __init__.py
    │   │   │   ├── box2d_env.py
    │   │   │   ├── box2d_viewer.py
    │   │   │   ├── car_parking_env.py
    │   │   │   ├── cartpole_env.py
    │   │   │   ├── cartpole_swingup_env.py
    │   │   │   ├── double_pendulum_env.py
    │   │   │   ├── models
    │   │   │   │   ├── car_parking.xml
    │   │   │   │   ├── car_parking.xml.rb
    │   │   │   │   ├── cartpole.xml.mako
    │   │   │   │   ├── double_pendulum.xml.mako
    │   │   │   │   └── mountain_car.xml.mako
    │   │   │   ├── mountain_car_env.py
    │   │   │   └── parser
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── xml_attr_types.py
    │   │   │   │   ├── xml_box2d.py
    │   │   │   │   └── xml_types.py
    │   │   ├── env_spec.py
    │   │   ├── grid_world_env.py
    │   │   ├── gym_env.py
    │   │   ├── identification_env.py
    │   │   ├── mujoco
    │   │   │   ├── __init__.py
    │   │   │   ├── ant_env.py
    │   │   │   ├── gather
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ant_gather_env.py
    │   │   │   │   ├── embedded_viewer.py
    │   │   │   │   ├── gather_env.py
    │   │   │   │   ├── point_gather_env.py
    │   │   │   │   └── swimmer_gather_env.py
    │   │   │   ├── half_cheetah_env.py
    │   │   │   ├── hill
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ant_hill_env.py
    │   │   │   │   ├── half_cheetah_hill_env.py
    │   │   │   │   ├── hill_env.py
    │   │   │   │   ├── hopper_hill_env.py
    │   │   │   │   ├── swimmer3d_hill_env.py
    │   │   │   │   ├── terrain.py
    │   │   │   │   └── walker2d_hill_env.py
    │   │   │   ├── hopper_env.py
    │   │   │   ├── humanoid_env.py
    │   │   │   ├── inverted_double_pendulum_env.py
    │   │   │   ├── maze
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ant_maze_env.py
    │   │   │   │   ├── maze_env.py
    │   │   │   │   ├── maze_env_utils.py
    │   │   │   │   ├── point_maze_env.py
    │   │   │   │   └── swimmer_maze_env.py
    │   │   │   ├── mujoco_env.py
    │   │   │   ├── point_env.py
    │   │   │   ├── simple_humanoid_env.py
    │   │   │   ├── swimmer3d_env.py
    │   │   │   ├── swimmer_env.py
    │   │   │   └── walker2d_env.py
    │   │   ├── noisy_env.py
    │   │   ├── normalized_env.py
    │   │   ├── occlusion_env.py
    │   │   ├── proxy_env.py
    │   │   └── sliding_mem_env.py
    │   ├── exploration_strategies
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── gaussian_strategy.py
    │   │   └── ou_strategy.py
    │   ├── misc
    │   │   ├── __init__.py
    │   │   ├── autoargs.py
    │   │   ├── console.py
    │   │   ├── ext.py
    │   │   ├── instrument.py
    │   │   ├── krylov.py
    │   │   ├── logger.py
    │   │   ├── mako_utils.py
    │   │   ├── meta.py
    │   │   ├── nb_utils.py
    │   │   ├── overrides.py
    │   │   ├── resolve.py
    │   │   ├── special.py
    │   │   ├── tabulate.py
    │   │   ├── tensor_utils.py
    │   │   └── viewer2d.py
    │   ├── mujoco_py
    │   │   ├── .rvmrc
    │   │   ├── Gemfile
    │   │   ├── Gemfile.lock
    │   │   ├── __init__.py
    │   │   ├── codegen.rb
    │   │   ├── gen_binding.sh
    │   │   ├── glfw.py
    │   │   ├── mjconstants.py
    │   │   ├── mjcore.py
    │   │   ├── mjextra.py
    │   │   ├── mjlib.py
    │   │   ├── mjtypes.py
    │   │   ├── mjviewer.py
    │   │   └── util.py
    │   ├── optimizers
    │   │   ├── __init__.py
    │   │   ├── conjugate_gradient_optimizer.py
    │   │   ├── first_order_optimizer.py
    │   │   ├── hessian_free_optimizer.py
    │   │   ├── hf.py
    │   │   ├── lbfgs_optimizer.py
    │   │   ├── minibatch_dataset.py
    │   │   └── penalty_lbfgs_optimizer.py
    │   ├── plotter
    │   │   ├── __init__.py
    │   │   └── plotter.py
    │   ├── policies
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── categorical_conv_policy.py
    │   │   ├── categorical_gru_policy.py
    │   │   ├── categorical_mlp_policy.py
    │   │   ├── deterministic_mlp_policy.py
    │   │   ├── gaussian_gru_policy.py
    │   │   ├── gaussian_mlp_policy.py
    │   │   └── uniform_control_policy.py
    │   ├── q_functions
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── continuous_mlp_q_function.py
    │   ├── regressors
    │   │   ├── __init__.py
    │   │   ├── categorical_mlp_regressor.py
    │   │   ├── gaussian_conv_regressor.py
    │   │   ├── gaussian_mlp_regressor.py
    │   │   └── product_regressor.py
    │   ├── sampler
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── parallel_sampler.py
    │   │   ├── stateful_pool.py
    │   │   └── utils.py
    │   ├── spaces
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── box.py
    │   │   ├── discrete.py
    │   │   └── product.py
    │   └── viskit
    │   │   ├── __init__.py
    │   │   ├── core.py
    │   │   ├── frontend.py
    │   │   ├── static
    │   │       ├── css
    │   │       │   ├── bootstrap.min.css
    │   │       │   └── dropdowns-enhancement.css
    │   │       └── js
    │   │       │   ├── bootstrap.min.js
    │   │       │   ├── dropdowns-enhancement.js
    │   │       │   ├── jquery-1.10.2.min.js
    │   │       │   ├── jquery.loadTemplate-1.5.6.js
    │   │       │   └── plotly-latest.min.js
    │   │   └── templates
    │   │       └── main.html
    ├── sandbox
    │   ├── __init__.py
    │   └── rocky
    │   │   ├── __init__.py
    │   │   └── tf
    │   │       ├── __init__.py
    │   │       ├── algos
    │   │           ├── __init__.py
    │   │           ├── batch_polopt.py
    │   │           ├── npg.py
    │   │           ├── npo.py
    │   │           ├── trpo.py
    │   │           └── vpg.py
    │   │       ├── core
    │   │           ├── __init__.py
    │   │           ├── layers.py
    │   │           ├── layers_powered.py
    │   │           ├── network.py
    │   │           └── parameterized.py
    │   │       ├── distributions
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── bernoulli.py
    │   │           ├── categorical.py
    │   │           ├── diagonal_gaussian.py
    │   │           ├── recurrent_categorical.py
    │   │           └── recurrent_diagonal_gaussian.py
    │   │       ├── envs
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── parallel_vec_env_executor.py
    │   │           └── vec_env_executor.py
    │   │       ├── launchers
    │   │           ├── __init__.py
    │   │           ├── trpo_cartpole.py
    │   │           ├── trpo_cartpole_recurrent.py
    │   │           └── vpg_cartpole.py
    │   │       ├── misc
    │   │           ├── __init__.py
    │   │           └── tensor_utils.py
    │   │       ├── optimizers
    │   │           ├── __init__.py
    │   │           ├── conjugate_gradient_optimizer.py
    │   │           ├── first_order_optimizer.py
    │   │           ├── lbfgs_optimizer.py
    │   │           └── penalty_lbfgs_optimizer.py
    │   │       ├── policies
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           ├── categorical_conv_policy.py
    │   │           ├── categorical_gru_policy.py
    │   │           ├── categorical_lstm_policy.py
    │   │           ├── categorical_mlp_policy.py
    │   │           ├── deterministic_mlp_policy.py
    │   │           ├── gaussian_gru_policy.py
    │   │           ├── gaussian_lstm_policy.py
    │   │           ├── gaussian_mlp_policy.py
    │   │           ├── latent_gaussian_mlp_policy.py
    │   │           └── uniform_control_policy.py
    │   │       ├── q_functions
    │   │           ├── __init__.py
    │   │           ├── base.py
    │   │           └── continuous_mlp_q_function.py
    │   │       ├── regressors
    │   │           ├── __init__.py
    │   │           ├── bernoulli_mlp_regressor.py
    │   │           ├── categorical_mlp_regressor.py
    │   │           ├── deterministic_mlp_regressor.py
    │   │           └── gaussian_mlp_regressor.py
    │   │       ├── samplers
    │   │           ├── __init__.py
    │   │           ├── batch_sampler.py
    │   │           └── vectorized_sampler.py
    │   │       └── spaces
    │   │           ├── __init__.py
    │   │           ├── box.py
    │   │           ├── discrete.py
    │   │           └── product.py
    ├── scripts
    │   ├── __init__.py
    │   ├── resume_training.py
    │   ├── run_experiment_lite.py
    │   ├── setup_ec2_for_rllab.py
    │   ├── setup_linux.sh
    │   ├── setup_mujoco.sh
    │   ├── setup_osx.sh
    │   ├── sim_env.py
    │   ├── sim_policy.py
    │   ├── submit_gym.py
    │   └── sync_s3.py
    ├── setup.py
    ├── tests
    │   ├── __init__.py
    │   ├── algos
    │   │   ├── __init__.py
    │   │   └── test_trpo.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── test_envs.py
    │   │   └── test_maze_env.py
    │   ├── regression_tests
    │   │   ├── __init__.py
    │   │   └── test_issue_3.py
    │   ├── test_algos.py
    │   ├── test_baselines.py
    │   ├── test_instrument.py
    │   ├── test_networks.py
    │   ├── test_sampler.py
    │   ├── test_serializable.py
    │   ├── test_spaces.py
    │   └── test_stateful_pool.py
    └── vendor
    │   └── mujoco_models
    │       ├── ant.xml
    │       ├── green_ball.xml
    │       ├── half_cheetah.xml
    │       ├── hill_ant_env.xml.mako
    │       ├── hill_half_cheetah_env.xml.mako
    │       ├── hill_hopper_env.xml.mako
    │       ├── hill_swimmer3d_env.xml.mako
    │       ├── hill_walker2d_env.xml.mako
    │       ├── hopper.xml
    │       ├── humanoid.xml
    │       ├── inverted_double_pendulum.xml
    │       ├── inverted_double_pendulum.xml.mako
    │       ├── point.xml
    │       ├── red_ball.xml
    │       ├── simple_humanoid.xml
    │       ├── swimmer.xml
    │       ├── swimmer3d.xml
    │       ├── utils.mako
    │       └── walker2d.xml
└── scripts
    ├── maze_data_collect.py
    ├── maze_visualize_reward.py
    ├── maze_wall_meta_irl.py
    └── maze_wall_meta_irl_test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Meta-Inverse Reinforcement Learning with Probabilistic Context Variables<br>
 2 | Lantao Yu*, Tianhe Yu*, Chelsea Finn, Stefano Ermon.<br>
 3 | The 33rd Conference on Neural Information Processing Systems. (NeurIPS 2019)<br>
 4 | [[Paper]](https://arxiv.org/pdf/1909.09314.pdf) [[Website]](https://sites.google.com/view/pemirl)
 5 | 
 6 | ### Usage
 7 | Requirement: The rllab package used in this project is provided [here](https://github.com/ermongroup/MetaIRL/tree/master/rllab).
 8 | 
 9 | To get expert trajectories for downstream tasks:
10 | ```
11 | python scripts/maze_data_collect.py
12 | ```
13 | 
14 | After getting expert trajectories, run Meta-Inverse RL to learn context dependent reward functions:
15 | ```
16 | python scripts/maze_wall_meta_irl.py
17 | ```
18 | We provided a pretrained IRL model [here](https://github.com/ermongroup/MetaIRL/tree/master/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0), which will be loaded by the following codes by default.
19 | 
20 | To visualize the context-dependent reward function (Figure 2 in the paper):
21 | ```
22 | python scripts/maze_visualize_reward.py
23 | ```
24 | 
25 | To use the context-dependent reward function to train a new policy under new dynamics:
26 | ```
27 | python scripts/maze_wall_meta_irl_test.py
28 | ```


--------------------------------------------------------------------------------
/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/data_fusion_discrete/maze_wall_meta_irl_imitcoeff-0.01_infocoeff-0.1_mbs-50_bs-16_itr-20_preepoch-1000_entropy-1.0_RandomPol_Rew-2-32/2019_05_14_02_33_17_0/itr_2800.pkl


--------------------------------------------------------------------------------
/inverse_rl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__init__.py


--------------------------------------------------------------------------------
/inverse_rl/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/batch_polopt.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_batch_polopt.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_npo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/irl_trpo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_batch_polopt.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_npo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/meta_irl_trpo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/npo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/npo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/penalty_lbfgs_optimizer.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/__pycache__/trpo.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/algos/__pycache__/trpo.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/algos/irl_trpo.py:
--------------------------------------------------------------------------------
 1 | from inverse_rl.algos.irl_npo import IRLNPO
 2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 3 | 
 4 | 
 5 | class IRLTRPO(IRLNPO):
 6 |     """
 7 |     Trust Region Policy Optimization
 8 |     """
 9 | 
10 |     def __init__(
11 |             self,
12 |             optimizer=None,
13 |             optimizer_args=None,
14 |             **kwargs):
15 |         if optimizer is None:
16 |             if optimizer_args is None:
17 |                 optimizer_args = dict()
18 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
19 |         super(IRLTRPO, self).__init__(optimizer=optimizer, **kwargs)
20 | 


--------------------------------------------------------------------------------
/inverse_rl/algos/meta_irl_trpo.py:
--------------------------------------------------------------------------------
 1 | from inverse_rl.algos.meta_irl_npo import MetaIRLNPO
 2 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 3 | 
 4 | 
 5 | class MetaIRLTRPO(MetaIRLNPO):
 6 |     """
 7 |     Trust Region Policy Optimization
 8 |     """
 9 | 
10 |     def __init__(
11 |             self,
12 |             optimizer=None,
13 |             optimizer_args=None,
14 |             **kwargs):
15 |         if optimizer is None:
16 |             if optimizer_args is None:
17 |                 optimizer_args = dict()
18 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
19 |         super(MetaIRLTRPO, self).__init__(optimizer=optimizer, **kwargs)
20 | 


--------------------------------------------------------------------------------
/inverse_rl/algos/trpo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from inverse_rl.algos.npo import NPO
 4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 5 | 
 6 | 
 7 | class TRPO(NPO):
 8 |     """
 9 |     Trust Region Policy Optimization
10 |     """
11 | 
12 |     def __init__(
13 |             self,
14 |             optimizer=None,
15 |             optimizer_args=None,
16 |             **kwargs):
17 |         if optimizer is None:
18 |             if optimizer_args is None:
19 |                 optimizer_args = dict()
20 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
21 |         super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
22 | 


--------------------------------------------------------------------------------
/inverse_rl/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from gym.envs import register
 4 | 
 5 | LOGGER = logging.getLogger(__name__)
 6 | 
 7 | _REGISTERED = False
 8 | def register_custom_envs():
 9 |     global _REGISTERED
10 |     if _REGISTERED:
11 |         return
12 |     _REGISTERED = True
13 | 
14 |     LOGGER.info("Registering custom gym environments")
15 |     register(id='PointMazeRight-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
16 |              kwargs={'sparse_reward': False, 'direction': 1, 'discrete': True})
17 |     register(id='PointMazeLeft-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
18 |              kwargs={'sparse_reward': False, 'direction': 0, 'discrete': True})
19 |     register(id='PointMazeRightCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
20 |              kwargs={'sparse_reward': False, 'direction': 1, 'discrete': False})
21 |     register(id='PointMazeLeftCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
22 |              kwargs={'sparse_reward': False, 'direction': 0, 'discrete': False})
23 | 
24 | 


--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/env_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/__pycache__/point_maze_env.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/assets/twod_maze.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="twod_point">
 2 |     <compiler inertiafromgeom="true" angle="radian" coordinate="local" />
 3 |     <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
 4 |     <default>
 5 |         <joint limited="false" damping="1" />
 6 |         <geom contype="2" conaffinity="1" condim="1" friction=".5 .1 .1" density="1000" margin="0.002" />
 7 |     </default>
 8 | 
 9 |     <worldbody>
10 |         <!-- Pointmass -->
11 |         <body name="particle" pos="-0.15 -0.15 0">
12 |             <geom name="particle_geom" type="sphere" size="0.03" rgba="0.0 0.0 1.0 1" contype="1"/>
13 |             <site name="particle_site" pos="0 0 0" size="0.01" />
14 |             <joint name="ball_x" type="slide" pos="0 0 0" axis="1 0 0" />
15 |             <joint name="ball_y" type="slide" pos="0 0 0" axis="0 1 0" />
16 |         </body>
17 | 
18 |         <!-- Target -->
19 |         <body name="target" pos="0.15 -0.15 0">
20 |             <geom conaffinity="2" name="target_geom" type="sphere" size="0.03" rgba="0 0.9 0.1 1"/>
21 |         </body>
22 | 
23 |         <!-- Arena -->
24 |         <geom conaffinity="1" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
25 | 		<geom conaffinity="1" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
26 | 		<geom conaffinity="1" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
27 | 		<geom conaffinity="1" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
28 | 
29 |         <geom conaffinity="1" fromto="0 -.3 .01 0 -.05 .01" name="wall" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
30 |     </worldbody>
31 | 
32 |     <actuator>
33 |         <motor joint="ball_x" ctrlrange="-5.0 5.0" ctrllimited="true"/>
34 |         <motor joint="ball_y" ctrlrange="-5.0 5.0" ctrllimited="true"/>
35 |     </actuator>
36 | </mujoco>


--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__init__.py


--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/mjc_models.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/envs/dynamic_mjc/__pycache__/model_builder.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/envs/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def flat_to_one_hot(val, ndim):
 4 |     """
 5 | 
 6 |     >>> flat_to_one_hot(2, ndim=4)
 7 |     array([ 0.,  0.,  1.,  0.])
 8 |     >>> flat_to_one_hot(4, ndim=5)
 9 |     array([ 0.,  0.,  0.,  0.,  1.])
10 |     >>> flat_to_one_hot(np.array([2, 4, 3]), ndim=5)
11 |     array([[ 0.,  0.,  1.,  0.,  0.],
12 |            [ 0.,  0.,  0.,  0.,  1.],
13 |            [ 0.,  0.,  0.,  1.,  0.]])
14 |     """
15 |     shape =np.array(val).shape
16 |     v = np.zeros(shape + (ndim,))
17 |     if len(shape) == 1:
18 |         v[np.arange(shape[0]), val] = 1.0
19 |     else:
20 |         v[val] = 1.0
21 |     return v
22 | 
23 | def one_hot_to_flat(val):
24 |     """
25 |     >>> one_hot_to_flat(np.array([0,0,0,0,1]))
26 |     4
27 |     >>> one_hot_to_flat(np.array([0,0,1,0]))
28 |     2
29 |     >>> one_hot_to_flat(np.array([[0,0,1,0], [1,0,0,0], [0,1,0,0]]))
30 |     array([2, 0, 1])
31 |     """
32 |     idxs = np.array(np.where(val == 1.0))[-1]
33 |     if len(val.shape) == 1:
34 |         return int(idxs)
35 |     return idxs


--------------------------------------------------------------------------------
/inverse_rl/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__init__.py


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/airl_state.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/airl_state.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/architectures.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/architectures.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/fusion_manager.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/imitation_learning.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/old_imitation_learning.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/pretrain.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/pretrain.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/models/__pycache__/tf_util.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/models/architectures.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from inverse_rl.models.tf_util import relu_layer, linear
 3 | 
 4 | 
 5 | def make_relu_net(layers=2, dout=1, d_hidden=32):
 6 |     def relu_net(x, last_layer_bias=True):
 7 |         out = x
 8 |         for i in range(layers):
 9 |             out = relu_layer(out, dout=d_hidden, name='l%d'%i)
10 |         out = linear(out, dout=dout, name='lfinal', bias=last_layer_bias)
11 |         return out
12 |     return relu_net
13 | 
14 | 
15 | def relu_net(x, layers=2, dout=1, d_hidden=32):
16 |     out = x
17 |     for i in range(layers):
18 |         out = relu_layer(out, dout=d_hidden, name='l%d'%i)
19 |     out = linear(out, dout=dout, name='lfinal')
20 |     return out
21 | 
22 | 
23 | def linear_net(x, dout=1):
24 |     out = x
25 |     out = linear(out, dout=dout, name='lfinal')
26 |     return out
27 | 
28 | 
29 | def feedforward_energy(obs_act, ff_arch=relu_net):
30 |     # for trajectories, using feedforward nets rather than RNNs
31 |     dimOU = int(obs_act.get_shape()[2])
32 |     orig_shape = tf.shape(obs_act)
33 | 
34 |     obs_act = tf.reshape(obs_act, [-1, dimOU])
35 |     outputs = ff_arch(obs_act) 
36 |     dOut = int(outputs.get_shape()[-1])
37 | 
38 |     new_shape = tf.stack([orig_shape[0],orig_shape[1], dOut])
39 |     outputs = tf.reshape(outputs, new_shape)
40 |     return outputs
41 | 
42 | 
43 | def rnn_trajectory_energy(obs_act):
44 |     """
45 |     Operates on trajectories
46 |     """
47 |     # for trajectories
48 |     dimOU = int(obs_act.get_shape()[2])
49 | 
50 |     cell = tf.contrib.rnn.GRUCell(num_units=dimOU)
51 |     cell_out = tf.contrib.rnn.OutputProjectionWrapper(cell, 1)
52 |     outputs, hidden = tf.nn.dynamic_rnn(cell_out, obs_act, time_major=False, dtype=tf.float32)
53 |     return outputs
54 | 
55 | 


--------------------------------------------------------------------------------
/inverse_rl/models/tf_util.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | REG_VARS = 'reg_vars'
 5 | 
 6 | def linear(X, dout, name, bias=True):
 7 |     with tf.variable_scope(name):
 8 |         dX = int(X.get_shape()[-1])
 9 |         W = tf.get_variable('W', shape=(dX, dout))
10 |         tf.add_to_collection(REG_VARS, W)
11 |         if bias:
12 |             b = tf.get_variable('b', initializer=tf.constant(np.zeros(dout).astype(np.float32)))
13 |         else:
14 |             b = 0
15 |     return tf.matmul(X, W)+b
16 | 
17 | def discounted_reduce_sum(X, discount, axis=-1):
18 |     if discount != 1.0:
19 |         disc = tf.cumprod(discount*tf.ones_like(X), axis=axis)
20 |     else:
21 |         disc = 1.0
22 |     return tf.reduce_sum(X*disc, axis=axis)
23 | 
24 | def assert_shape(tens, shape):
25 |     assert tens.get_shape().is_compatible_with(shape)
26 | 
27 | def relu_layer(X, dout, name):
28 |     return tf.nn.relu(linear(X, dout, name))
29 | 
30 | def softplus_layer(X, dout, name):
31 |     return tf.nn.softplus(linear(X, dout, name))
32 | 
33 | def tanh_layer(X, dout, name):
34 |     return tf.nn.tanh(linear(X, dout, name))
35 | 
36 | def get_session_config():
37 |     session_config = tf.ConfigProto()
38 |     session_config.gpu_options.allow_growth = True
39 |     #session_config.gpu_options.per_process_gpu_memory_fraction = 0.2
40 |     return session_config
41 | 
42 | 
43 | def load_prior_params(pkl_fname, key='irl_params'):
44 |     import joblib
45 |     with tf.Session(config=get_session_config()):
46 |         params = joblib.load(pkl_fname)
47 | 
48 |     tf.reset_default_graph()
49 |     #joblib.dump(params, file_name, compress=3)
50 |     params = params[key]
51 |     #print(params)
52 |     assert params is not None
53 |     return params
54 | 


--------------------------------------------------------------------------------
/inverse_rl/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from inverse_rl.utils.general import *
2 | 


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/general.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/general.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyper_sweep.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/hyperparametrized.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/log_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/inverse_rl/utils/__pycache__/math_utils.cpython-35.pyc


--------------------------------------------------------------------------------
/inverse_rl/utils/general.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | 
 4 | def flatten_list(lol):
 5 |     return [ a for b in lol for a in b ]
 6 | 
 7 | class TrainingIterator(object):
 8 |     def __init__(self, itrs, heartbeat=float('inf')):
 9 |         self.itrs = itrs
10 |         self.heartbeat_time = heartbeat
11 |         self.__vals = {}
12 | 
13 |     def random_idx(self, N, size):
14 |         return np.random.randint(0, N, size=size)
15 | 
16 |     @property
17 |     def itr(self):
18 |         return self.__itr
19 | 
20 |     @property
21 |     def heartbeat(self):
22 |         return self.__heartbeat
23 | 
24 |     @property
25 |     def elapsed(self):
26 |         assert self.heartbeat, 'elapsed is only valid when heartbeat=True'
27 |         return self.__elapsed
28 | 
29 |     def itr_message(self):
30 |         return '==> Itr %d/%d (elapsed:%.2f)' % (self.itr+1, self.itrs, self.elapsed)
31 | 
32 |     def record(self, key, value):
33 |         if key in self.__vals:
34 |             self.__vals[key].append(value)
35 |         else:
36 |             self.__vals[key] = [value]
37 | 
38 |     def pop(self, key):
39 |         vals = self.__vals.get(key, [])
40 |         del self.__vals[key]
41 |         return vals
42 | 
43 |     def pop_mean(self, key):
44 |         return np.mean(self.pop(key))
45 | 
46 |     def __iter__(self):
47 |         prev_time = time.time()
48 |         self.__heartbeat = False
49 |         for i in range(self.itrs):
50 |             self.__itr = i
51 |             cur_time = time.time()
52 |             if (cur_time-prev_time) > self.heartbeat_time or i==(self.itrs-1):
53 |                 self.__heartbeat = True
54 |                 self.__elapsed = cur_time-prev_time
55 |                 prev_time = cur_time
56 |             yield self
57 |             self.__heartbeat = False


--------------------------------------------------------------------------------
/inverse_rl/utils/hyperparametrized.py:
--------------------------------------------------------------------------------
 1 | CLSNAME = '__clsname__'
 2 | _HYPER_ = '__hyper__'
 3 | _HYPERNAME_ = '__hyper_clsname__'
 4 | 
 5 | 
 6 | def extract_hyperparams(obj):
 7 |     if any([isinstance(obj, type_) for type_ in (int, float, str)]):
 8 |         return obj
 9 |     elif isinstance(type(obj), Hyperparametrized):
10 |         hypers = getattr(obj, _HYPER_)
11 |         hypers[CLSNAME] = getattr(obj, _HYPERNAME_)
12 |         for attr in hypers:
13 |             hypers[attr] = extract_hyperparams(hypers[attr])
14 |         return hypers
15 |     return type(obj).__name__
16 | 
17 | class Hyperparametrized(type):
18 |     def __new__(self, clsname, bases, clsdict):
19 |         old_init = clsdict.get('__init__', bases[0].__init__)
20 |         def init_wrapper(inst, *args, **kwargs):
21 |             hyper = getattr(inst, _HYPER_, {})
22 |             hyper.update(kwargs)
23 |             setattr(inst, _HYPER_, hyper)
24 | 
25 |             if getattr(inst, _HYPERNAME_, None) is None:
26 |                 setattr(inst, _HYPERNAME_, clsname)
27 |             return old_init(inst, *args, **kwargs)
28 |         clsdict['__init__'] = init_wrapper
29 | 
30 |         cls = super(Hyperparametrized, self).__new__(self, clsname, bases, clsdict)
31 |         return cls
32 | 
33 | 
34 | class HyperparamWrapper(object, metaclass=Hyperparametrized):
35 |     def __init__(self, **hyper_kwargs):
36 |         pass
37 | 
38 | if __name__ == "__main__":
39 |     class Algo1(object, metaclass=Hyperparametrized):
40 |         def __init__(self, hyper1=1.0, hyper2=2.0, model1=None):
41 |             pass
42 | 
43 | 
44 |     class Algo2(Algo1):
45 |         def __init__(self, hyper3=5.0, **kwargs):
46 |             super(Algo2, self).__init__(**kwargs)
47 | 
48 | 
49 |     class Model1(object, metaclass=Hyperparametrized):
50 |         def __init__(self, hyper1=None):
51 |             pass
52 | 
53 | 
54 |     def get_params_json(**kwargs):
55 |         hyper_dict = extract_hyperparams(HyperparamWrapper(**kwargs))
56 |         del hyper_dict[CLSNAME]
57 |         return hyper_dict
58 | 
59 |     m1 = Model1(hyper1='Test')
60 |     a1 = Algo2(hyper1=1.0, hyper2=5.0, hyper3=10.0, model1=m1)
61 | 
62 |     print( isinstance(type(a1), Hyperparametrized))
63 |     print(get_params_json(a1=a1))
64 | 


--------------------------------------------------------------------------------
/inverse_rl/utils/math_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import scipy.stats
 4 | 
 5 | def rle(inarray):
 6 |         """ run length encoding. Partial credit to R rle function. 
 7 |             Multi datatype arrays catered for including non Numpy
 8 |             returns: tuple (runlengths, startpositions, values) """
 9 |         ia = np.array(inarray)                  # force numpy
10 |         n = len(ia)
11 |         if n == 0: 
12 |             return (None, None, None)
13 |         else:
14 |             y = np.array(ia[1:] != ia[:-1])     # pairwise unequal (string safe)
15 |             i = np.append(np.where(y), n - 1)   # must include last element posi
16 |             z = np.diff(np.append(-1, i))       # run lengths
17 |             p = np.cumsum(np.append(0, z))[:-1] # positions
18 |             return(z, p, ia[i])
19 | 
20 | def split_list_by_lengths(values, lengths):
21 |     """
22 | 
23 |     >>> split_list_by_lengths([0,0,0,1,1,1,2,2,2], [2,2,5])
24 |     [[0, 0], [0, 1], [1, 1, 2, 2, 2]]
25 |     """
26 |     assert np.sum(lengths) == len(values)
27 |     idxs = np.cumsum(lengths)
28 |     idxs = np.insert(idxs, 0, 0)
29 |     return [ values[idxs[i]:idxs[i+1] ] for i in range(len(idxs)-1)]
30 | 
31 | def clip_sing(X, clip_val=1):
32 |     U, E, V = np.linalg.svd(X, full_matrices=False)
33 |     E = np.clip(E, -clip_val, clip_val)
34 |     return U.dot(np.diag(E)).dot(V)
35 | 
36 | def gauss_log_pdf(params, x):
37 |     mean, log_diag_std = params
38 |     N, d = mean.shape
39 |     cov =  np.square(np.exp(log_diag_std))
40 |     diff = x-mean
41 |     exp_term = -0.5 * np.sum(np.square(diff)/cov, axis=1)
42 |     norm_term = -0.5*d*np.log(2*np.pi)
43 |     var_term = -0.5 * np.sum(np.log(cov), axis=1)
44 |     log_probs = norm_term + var_term + exp_term
45 |     return log_probs #sp.stats.multivariate_normal.logpdf(x, mean=mean, cov=cov)
46 | 
47 | def categorical_log_pdf(params, x, one_hot=True):
48 |     if not one_hot:
49 |         raise NotImplementedError()
50 |     probs = params[0]
51 |     return np.log(np.max(probs * x, axis=1))
52 | 
53 | 


--------------------------------------------------------------------------------
/rllab/.gitignore:
--------------------------------------------------------------------------------
 1 | data
 2 | *.pyc
 3 | *-checkpoint.ipynb
 4 | .DS_Store
 5 | *.h5
 6 | *.log
 7 | *.npz
 8 | secrets.py
 9 | *.avi
10 | *.mp4
11 | build
12 | build_linux
13 | .idea
14 | .sublime-project
15 | run_experiment.sh
16 | scratch-notebooks
17 | launch_scripts
18 | *.sh.e*
19 | *.sh.o*
20 | MUJOCO_LOG.TXT
21 | vendor/mujoco
22 | .project
23 | .pydevproject
24 | *.pdf
25 | .env
26 | snippets
27 | private
28 | lua
29 | iterate.dat
30 | .env
31 | src/
32 | .settings
33 | .pods
34 | docs/_build
35 | blackbox.zip
36 | blackbox
37 | rllab/config_personal.py
38 | *.swp
39 | 


--------------------------------------------------------------------------------
/rllab/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 rllab contributors
 4 | 
 5 | rllab uses a shared copyright model: each contributor holds copyright over
 6 | their contributions to rllab. The project versioning records all such
 7 | contribution and copyright details.
 8 | By contributing to the rllab repository through pull-request, comment,
 9 | or otherwise, the contributor releases their content to the license and
10 | copyright terms herein.
11 | 
12 | Permission is hereby granted, free of charge, to any person obtaining a copy
13 | of this software and associated documentation files (the "Software"), to deal
14 | in the Software without restriction, including without limitation the rights
15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 | copies of the Software, and to permit persons to whom the Software is
17 | furnished to do so, subject to the following conditions:
18 | 
19 | The above copyright notice and this permission notice shall be included in all
20 | copies or substantial portions of the Software.
21 | 
22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 | SOFTWARE.
29 | 


--------------------------------------------------------------------------------
/rllab/circle.yml:
--------------------------------------------------------------------------------
 1 | machine:
 2 |   services:
 3 |     - docker
 4 | 
 5 | dependencies:
 6 |   cache_directories:
 7 |     - "~/docker"
 8 |   override:
 9 |     - docker info
10 |     - if [[ -e ~/docker/image.tar ]]; then docker load -i ~/docker/image.tar; fi
11 |     - docker build -t tester -f docker/tester_Dockerfile .
12 |     - mkdir -p ~/docker; docker save tester > ~/docker/image.tar
13 | 
14 | test:
15 |   override:
16 |     - docker run tester /bin/bash -li -c "CIRCLECI=true nose2"
17 | 


--------------------------------------------------------------------------------
/rllab/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/alexbeloi/examples/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/trpois_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.algos.tnpg import TNPG
 3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 4 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 5 | from rllab.envs.normalized_env import normalize
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | from contrib.alexbeloi.is_sampler import ISSampler
 8 | 
 9 | """
10 | Example using VPG with ISSampler, iterations alternate between live and
11 | importance sampled iterations.
12 | """
13 | 
14 | env = normalize(CartpoleEnv())
15 | 
16 | policy = GaussianMLPPolicy(
17 |     env_spec=env.spec,
18 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
19 |     hidden_sizes=(32, 32)
20 | )
21 | 
22 | baseline = LinearFeatureBaseline(env_spec=env.spec)
23 | 
24 | optimizer_args = dict(
25 |     # debug_nan=True,
26 |     # reg_coeff=0.1,
27 |     # cg_iters=2
28 | )
29 | 
30 | algo = TRPO(
31 |     env=env,
32 |     policy=policy,
33 |     baseline=baseline,
34 |     batch_size=4000,
35 |     max_path_length=100,
36 |     n_itr=200,
37 |     discount=0.99,
38 |     step_size=0.01,
39 |     sampler_cls=ISSampler,
40 |     sampler_args=dict(n_backtrack=1),
41 |     optimizer_args=optimizer_args
42 | )
43 | algo.train()
44 | 


--------------------------------------------------------------------------------
/rllab/contrib/alexbeloi/examples/vpgis_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.vpg import VPG
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | from contrib.alexbeloi.is_sampler import ISSampler
 7 | 
 8 | """
 9 | Example using VPG with ISSampler, iterations alternate between live and
10 | importance sampled iterations.
11 | """
12 | 
13 | env = normalize(CartpoleEnv())
14 | 
15 | policy = GaussianMLPPolicy(
16 |     env_spec=env.spec,
17 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
18 |     hidden_sizes=(32, 32)
19 | )
20 | 
21 | baseline = LinearFeatureBaseline(env_spec=env.spec)
22 | 
23 | algo = VPG(
24 |     env=env,
25 |     policy=policy,
26 |     baseline=baseline,
27 |     batch_size=4000,
28 |     max_path_length=100,
29 |     n_itr=40,
30 |     discount=0.99,
31 |     step_size=0.01,
32 |     sampler_cls=ISSampler,
33 |     sampler_args=dict(n_backtrack=1),
34 | )
35 | algo.train()
36 | 


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/bichengcao/examples/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     env = normalize(GymEnv("Acrobot-v1"))
11 | 
12 |     policy = CategoricalMLPPolicy(
13 |         env_spec=env.spec,
14 |         hidden_sizes=(32, 32)
15 |     )
16 | 
17 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
18 | 
19 |     algo = TRPO(
20 |         env=env,
21 |         policy=policy,
22 |         baseline=baseline,
23 |         batch_size=4000,
24 |         max_path_length=env.horizon,
25 |         n_itr=50,
26 |         discount=0.99,
27 |         step_size=0.01,
28 |         plot=True,
29 |     )
30 |     algo.train()
31 | 
32 | 
33 | run_experiment_lite(
34 |     run_task,
35 |     n_parallel=1,
36 |     snapshot_mode="last",
37 |     plot=True,
38 | )
39 | 


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     env = normalize(GymEnv("CartPole-v0"))
11 | 
12 |     policy = CategoricalMLPPolicy(
13 |         env_spec=env.spec,
14 |         hidden_sizes=(32, 32)
15 |     )
16 | 
17 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
18 | 
19 |     algo = TRPO(
20 |         env=env,
21 |         policy=policy,
22 |         baseline=baseline,
23 |         batch_size=4000,
24 |         max_path_length=env.horizon,
25 |         n_itr=50,
26 |         discount=0.99,
27 |         step_size=0.01,
28 |         plot=True,
29 |     )
30 |     algo.train()
31 | 
32 | 
33 | run_experiment_lite(
34 |     run_task,
35 |     n_parallel=1,
36 |     snapshot_mode="last",
37 |     plot=True,
38 | )
39 | 


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     env = normalize(GymEnv("CartPole-v1"))
11 | 
12 |     policy = CategoricalMLPPolicy(
13 |         env_spec=env.spec,
14 |         hidden_sizes=(32, 32)
15 |     )
16 | 
17 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
18 | 
19 |     algo = TRPO(
20 |         env=env,
21 |         policy=policy,
22 |         baseline=baseline,
23 |         batch_size=4000,
24 |         max_path_length=env.horizon,
25 |         n_itr=50,
26 |         discount=0.99,
27 |         step_size=0.01,
28 |         plot=True,
29 |     )
30 |     algo.train()
31 | 
32 | 
33 | run_experiment_lite(
34 |     run_task,
35 |     n_parallel=1,
36 |     snapshot_mode="last",
37 |     plot=True,
38 | )
39 | 


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py:
--------------------------------------------------------------------------------
 1 | # This doesn't work. After 150 iterations still didn't learn anything.
 2 | 
 3 | from rllab.algos.trpo import TRPO
 4 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 5 | from rllab.envs.gym_env import GymEnv
 6 | from rllab.envs.normalized_env import normalize
 7 | from rllab.misc.instrument import run_experiment_lite
 8 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
 9 | 
10 | 
11 | def run_task(*_):
12 |     env = normalize(GymEnv("MountainCar-v0"))
13 | 
14 |     policy = CategoricalMLPPolicy(
15 |         env_spec=env.spec,
16 |         hidden_sizes=(32, 32)
17 |     )
18 | 
19 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
20 | 
21 |     algo = TRPO(
22 |         env=env,
23 |         policy=policy,
24 |         baseline=baseline,
25 |         batch_size=4000,
26 |         max_path_length=env.horizon,
27 |         n_itr=150,
28 |         discount=0.99,
29 |         step_size=0.1,
30 |         plot=True,
31 |     )
32 |     algo.train()
33 | 
34 | 
35 | run_experiment_lite(
36 |     run_task,
37 |     n_parallel=1,
38 |     snapshot_mode="last",
39 |     plot=True,
40 | )
41 | 


--------------------------------------------------------------------------------
/rllab/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     env = normalize(GymEnv("Pendulum-v0"))
11 | 
12 |     policy = GaussianMLPPolicy(
13 |         env_spec=env.spec,
14 |         hidden_sizes=(32, 32)
15 |     )
16 | 
17 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
18 | 
19 |     algo = TRPO(
20 |         env=env,
21 |         policy=policy,
22 |         baseline=baseline,
23 |         batch_size=4000,
24 |         max_path_length=env.horizon,
25 |         n_itr=50,
26 |         discount=0.99,
27 |         step_size=0.01,
28 |         plot=True,
29 |     )
30 |     algo.train()
31 | 
32 | 
33 | run_experiment_lite(
34 |     run_task,
35 |     n_parallel=1,
36 |     snapshot_mode="last",
37 |     plot=True,
38 | )
39 | 


--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/contrib/rllab_hyperopt/example/__init__.py


--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/main.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Main module to launch an example hyperopt search on EC2.
 3 | 
 4 | Launch this from outside the rllab main dir. Otherwise, rllab will try to ship the logfiles being written by this process,
 5 | which will fail because tar doesn't want to tar files that are being written to. Alternatively, disable the packaging of
 6 | log files by rllab, but I couldn't quickly find how to do this.
 7 | 
 8 | You can use Jupyter notebook visualize_hyperopt_results.ipynb to inspect results.
 9 | '''
10 | from hyperopt import hp
11 | 
12 | from contrib.rllab_hyperopt.core import launch_hyperopt_search
13 | # the functions to run the task and process result do not need to be in separate files. They do need to be separate from
14 | # the main file though. Also, anything you import in the module that contains run_task needs to be on the Rllab AMI.
15 | # Therefore, since I use pandas to process results, I have put them in separate files here.
16 | from contrib.rllab_hyperopt.example.score import process_result
17 | from contrib.rllab_hyperopt.example.task import run_task
18 | 
19 | # define a search space. See https://github.com/hyperopt/hyperopt/wiki/FMin, sect 2 for more detail
20 | param_space = {'step_size': hp.uniform('step_size', 0.01, 0.1),
21 |                 'seed': hp.choice('seed',[0, 1, 2])}
22 | 
23 | # just by way of example, pass a different config to run_experiment_lite
24 | run_experiment_kwargs = dict(
25 |     n_parallel=16,
26 |     aws_config=dict(instance_type="c4.4xlarge",spot_price='0.7')
27 |     )
28 | 
29 | launch_hyperopt_search(
30 |         run_task,                           # the task to run
31 |         process_result,                     # the function that will process results and return a score
32 |         param_space,                        # param search space
33 |         hyperopt_experiment_key='test12',   # key for hyperopt DB, and also exp_prefix for run_experiment_lite
34 |         n_hyperopt_workers=3,               # nr of local workers AND nr of EC2 instances that will be started in parallel
35 |         hyperopt_max_evals=5,               # nr of parameter values to eval
36 |         result_timeout=600,                 # wait this long for results from S3 before timing out
37 |         run_experiment_kwargs=run_experiment_kwargs) # additional kwargs to pass to run_experiment_lite


--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/score.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | from rllab import config
 5 | 
 6 | def process_result(exp_prefix, exp_name):
 7 |     # Open the default rllab path for storing results
 8 |     result_path = os.path.join(config.LOG_DIR, "s3", exp_prefix, exp_name, 'progress.csv')
 9 |     print("Processing result from",result_path) 
10 |     
11 |     # This example uses pandas to easily read in results and create a simple smoothed learning curve
12 |     df = pd.read_csv(result_path)
13 |     curve = df['AverageReturn'].rolling(window=max(1,int(0.05*df.shape[0])), min_periods=1, center=True).mean().values.flatten()
14 |     max_ix = curve.argmax()
15 |     max_score = curve.max()
16 |     
17 |     # The result dict can contain arbitrary values, but ALWAYS needs to have a "loss" entry.
18 |     return dict(
19 |         max_score=max_score,
20 |         max_iter=max_ix,
21 |         scores=curve, # returning the curve allows you to plot best, worst etc curve later
22 |         loss=-max_score
23 |         )


--------------------------------------------------------------------------------
/rllab/contrib/rllab_hyperopt/example/task.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | 
 7 | def run_task(v):
 8 |     env = normalize(CartpoleEnv())
 9 | 
10 |     policy = GaussianMLPPolicy(
11 |         env_spec=env.spec,
12 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
13 |         hidden_sizes=(32, 32)
14 |     )
15 | 
16 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
17 | 
18 |     algo = TRPO(
19 |         env=env,
20 |         policy=policy,
21 |         baseline=baseline,
22 |         batch_size=4000,
23 |         max_path_length=100,
24 |         n_itr=40,
25 |         discount=0.99,
26 |         step_size=v["step_size"],
27 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
28 |         # plot=True,
29 |     )
30 |     algo.train()


--------------------------------------------------------------------------------
/rllab/docker/tester_Dockerfile:
--------------------------------------------------------------------------------
1 | FROM neocxi/rllab_exp_gpu_tf:py3
2 | 
3 | RUN bash -c 'source activate rllab3 && conda install -y nomkl && conda uninstall -y scipy && conda install -y scipy'
4 | 
5 | ADD . /root/code/rllab
6 | WORKDIR /root/code/rllab
7 | 


--------------------------------------------------------------------------------
/rllab/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. rllab documentation master file, created by
 2 |    sphinx-quickstart on Mon Feb 15 20:07:12 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to rllab
 7 | ================
 8 | 
 9 | rllab is a framework for developing and evaluating reinforcement learning algorithms.
10 | 
11 | rllab is a work in progress, input is welcome. The available documentation is limited for now.
12 | 
13 | User Guide
14 | ==========
15 | 
16 | The rllab user guide explains how to install rllab, how to run experiments, and how to implement new MDPs and new algorithms.
17 | 
18 | .. toctree::
19 |    :maxdepth: 2
20 | 
21 |    user/installation
22 |    user/experiments
23 |    user/gym_integration
24 |    user/implement_env
25 |    user/implement_algo_basic
26 |    user/implement_algo_advanced
27 |    user/cluster
28 | 
29 | 
30 | Citing rllab
31 | ============
32 | 
33 | If you use rllab for academic research, you are highly encouraged to cite the following paper:
34 | 
35 | - Yan Duan, Xi Chen, Rein Houthooft, John Schulman, Pieter Abbeel. "`Benchmarking Deep Reinforcement Learning for Continuous Control <http://arxiv.org/abs/1604.06778>`_. *Proceedings of the 33rd International Conference on Machine Learning (ICML), 2016.*
36 | 
37 | 
38 | Indices and tables
39 | ==================
40 | 
41 | * :ref:`genindex`
42 | * :ref:`modindex`
43 | * :ref:`search`
44 | 
45 | 


--------------------------------------------------------------------------------
/rllab/docs/user/cluster_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_1.png


--------------------------------------------------------------------------------
/rllab/docs/user/cluster_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_2.png


--------------------------------------------------------------------------------
/rllab/docs/user/cluster_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/docs/user/cluster_3.png


--------------------------------------------------------------------------------
/rllab/docs/user/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | 
 4 | ============
 5 | Installation
 6 | ============
 7 | 
 8 | Preparation
 9 | ===========
10 | 
11 | You need to edit your :code:`PYTHONPATH` to include the rllab directory:
12 | 
13 | .. code-block:: bash
14 | 
15 |     export PYTHONPATH=path_to_rllab:$PYTHONPATH
16 | 
17 | Express Install
18 | ===============
19 | 
20 | The fastest way to set up dependencies for rllab is via running the setup script.
21 | 
22 | - On Linux, run the following:
23 | 
24 | .. code-block:: bash
25 | 
26 |     ./scripts/setup_linux.sh
27 | 
28 | - On Mac OS X, run the following:
29 | 
30 | .. code-block:: bash
31 | 
32 |     ./scripts/setup_osx.sh
33 | 
34 | The script sets up a conda environment, which is similar to :code:`virtualenv`. To start using it, run the following:
35 | 
36 | .. code-block:: bash
37 | 
38 |     source activate rllab3
39 | 
40 | 
41 | Optionally, if you would like to run experiments that depends on the Mujoco environment, you can set it up by running the following command:
42 | 
43 | .. code-block:: bash
44 | 
45 |     ./scripts/setup_mujoco.sh
46 | 
47 | and follow the instructions. You need to have the zip file for Mujoco v1.31 and the license file ready.
48 | 
49 | 
50 | 
51 | Manual Install
52 | ==============
53 | 
54 | Anaconda
55 | ------------
56 | 
57 | :code:`rllab` assumes that you are using Anaconda Python distribution. You can download it from `https://www.continuum.io/downloads<https://www.continuum.io/downloads>`.  Make sure to download the installer for Python 2.7.
58 | 
59 | 
60 | System dependencies for pygame
61 | ------------------------------
62 | 
63 | A few environments in rllab are implemented using Box2D, which uses pygame for visualization.
64 | It requires a few system dependencies to be installed first.
65 | 
66 | On Linux, run the following:
67 | 
68 | .. code-block:: bash
69 | 
70 |   sudo apt-get install swig
71 |   sudo apt-get build-dep python-pygame
72 | 
73 | On Mac OS X, run the following:
74 | 
75 | .. code-block:: bash
76 | 
77 |   brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi
78 | 
79 | System dependencies for scipy
80 | -----------------------------
81 | 
82 | This step is only needed under Linux:
83 | 
84 | .. code-block:: bash
85 | 
86 |   sudo apt-get install build-dep python-scipy
87 | 
88 | Install Python modules
89 | ----------------------
90 | 
91 | .. code-block:: bash
92 | 
93 |   conda env create -f environment.yml
94 | 


--------------------------------------------------------------------------------
/rllab/environment.yml:
--------------------------------------------------------------------------------
 1 | name: rllab3
 2 | channels:
 3 |     - https://conda.anaconda.org/kne 
 4 |     - https://conda.anaconda.org/tlatorre 
 5 |     - https://conda.anaconda.org/cjs14
 6 |     - https://conda.anaconda.org/menpo
 7 |     - jjhelmus
 8 |     - soumith
 9 | dependencies:
10 |     - python==3.5.2
11 |     - numpy==1.12.0
12 |     - scipy
13 |     - path.py
14 |     - python-dateutil
15 |     - joblib==0.10.3
16 |     - mako
17 |     - ipywidgets
18 |     - numba
19 |     - flask
20 |     - pybox2d
21 |     - pygame
22 |     - h5py
23 |     - matplotlib
24 |     - opencv3=3.1.0
25 |     - scikit-learn
26 |     - pytorch==0.1.9
27 |     - torchvision==0.1.6
28 |     - mpi4py
29 |     - pandas
30 |     - pip:
31 |         - Pillow
32 |         - atari-py
33 |         - pyprind
34 |         - ipdb
35 |         - boto3
36 |         - PyOpenGL
37 |         - nose2
38 |         - pyzmq
39 |         - tqdm
40 |         - msgpack-python
41 |         - git+https://github.com/inksci/mujoco-py-v0.5.7.git
42 |         # - mujoco-py==1.50.1.68
43 |         - cached_property
44 |         - line_profiler
45 |         - cloudpickle
46 |         - Cython
47 |         - redis
48 |         - keras==1.2.1
49 |         - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano
50 |         - git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne
51 |         - git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly
52 |         - awscli
53 |         - git+https://github.com/openai/gym.git@v0.7.4#egg=gym
54 |         - pyglet
55 |         - git+https://github.com/neocxi/prettytensor.git
56 |         - jupyter
57 |         - progressbar2
58 |         - chainer==1.18.0
59 |         - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.7.0-cp35-cp35m-linux_x86_64.whl; 'linux' in sys_platform
60 |         - https://storage.googleapis.com/tensorflow/mac/gpu/tensorflow_gpu-1.7.0-py3-none-any.whl; sys_platform == 'darwin'
61 |         - numpy-stl==2.2.0
62 |         - nibabel==2.1.0
63 |         - pylru==1.0.9
64 |         - hyperopt
65 |         - polling
66 | 


--------------------------------------------------------------------------------
/rllab/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/examples/__init__.py


--------------------------------------------------------------------------------
/rllab/examples/cluster_demo.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import stub, run_experiment_lite
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | import sys
 8 | 
 9 | 
10 | def run_task(v):
11 |     env = normalize(CartpoleEnv())
12 | 
13 |     policy = GaussianMLPPolicy(
14 |         env_spec=env.spec,
15 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
16 |         hidden_sizes=(32, 32)
17 |     )
18 | 
19 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
20 | 
21 |     algo = TRPO(
22 |         env=env,
23 |         policy=policy,
24 |         baseline=baseline,
25 |         batch_size=4000,
26 |         max_path_length=100,
27 |         n_itr=40,
28 |         discount=0.99,
29 |         step_size=v["step_size"],
30 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
31 |         # plot=True,
32 |     )
33 |     algo.train()
34 | 
35 | 
36 | for step_size in [0.01, 0.05, 0.1]:
37 |     for seed in [1, 11, 21, 31, 41]:
38 |         run_experiment_lite(
39 |             run_task,
40 |             exp_prefix="first_exp",
41 |             # Number of parallel workers for sampling
42 |             n_parallel=1,
43 |             # Only keep the snapshot parameters for the last iteration
44 |             snapshot_mode="last",
45 |             # Specifies the seed for the experiment. If this is not provided, a random seed
46 |             # will be used
47 |             seed=seed,
48 |             # mode="local",
49 |             mode="ec2",
50 |             variant=dict(step_size=step_size, seed=seed)
51 |             # plot=True,
52 |             # terminate_machine=False,
53 |         )
54 |         sys.exit()
55 | 


--------------------------------------------------------------------------------
/rllab/examples/cluster_gym_mujoco_demo.py:
--------------------------------------------------------------------------------
 1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 2 | from rllab.envs.normalized_env import normalize
 3 | from sandbox.rocky.tf.envs.base import TfEnv
 4 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
 5 | from sandbox.rocky.tf.algos.trpo import TRPO
 6 | from rllab.misc.instrument import run_experiment_lite
 7 | from rllab.envs.gym_env import GymEnv
 8 | import sys
 9 | 
10 | from rllab.misc.instrument import VariantGenerator, variant
11 | 
12 | 
13 | class VG(VariantGenerator):
14 | 
15 |     @variant
16 |     def step_size(self):
17 |         return [0.01, 0.05, 0.1]
18 | 
19 |     @variant
20 |     def seed(self):
21 |         return [1, 11, 21, 31, 41]
22 | 
23 | 
24 | def run_task(vv):
25 | 
26 |     env = TfEnv(normalize(GymEnv('HalfCheetah-v1', record_video=False, record_log=False)))
27 | 
28 |     policy = GaussianMLPPolicy(
29 |         env_spec=env.spec,
30 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
31 |         hidden_sizes=(32, 32),
32 |         name="policy"
33 |     )
34 | 
35 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
36 | 
37 |     algo = TRPO(
38 |         env=env,
39 |         policy=policy,
40 |         baseline=baseline,
41 |         batch_size=4000,
42 |         max_path_length=100,
43 |         n_itr=40,
44 |         discount=0.99,
45 |         step_size=vv["step_size"],
46 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
47 |         # plot=True,
48 |     )
49 |     algo.train()
50 | 
51 | 
52 | variants = VG().variants()
53 | 
54 | for v in variants:
55 | 
56 |     run_experiment_lite(
57 |         run_task,
58 |         exp_prefix="first_exp",
59 |         # Number of parallel workers for sampling
60 |         n_parallel=1,
61 |         # Only keep the snapshot parameters for the last iteration
62 |         snapshot_mode="last",
63 |         # Specifies the seed for the experiment. If this is not provided, a random seed
64 |         # will be used
65 |         seed=v["seed"],
66 |         # mode="local",
67 |         mode="ec2",
68 |         variant=v,
69 |         # plot=True,
70 |         # terminate_machine=False,
71 |     )
72 |     sys.exit()
73 | 


--------------------------------------------------------------------------------
/rllab/examples/ddpg_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.ddpg import DDPG
 2 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 3 | from rllab.envs.normalized_env import normalize
 4 | from rllab.misc.instrument import run_experiment_lite
 5 | from rllab.exploration_strategies.ou_strategy import OUStrategy
 6 | from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
 7 | from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
 8 | 
 9 | 
10 | def run_task(*_):
11 |     env = normalize(CartpoleEnv())
12 | 
13 |     policy = DeterministicMLPPolicy(
14 |         env_spec=env.spec,
15 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
16 |         hidden_sizes=(32, 32)
17 |     )
18 | 
19 |     es = OUStrategy(env_spec=env.spec)
20 | 
21 |     qf = ContinuousMLPQFunction(env_spec=env.spec)
22 | 
23 |     algo = DDPG(
24 |         env=env,
25 |         policy=policy,
26 |         es=es,
27 |         qf=qf,
28 |         batch_size=32,
29 |         max_path_length=100,
30 |         epoch_length=1000,
31 |         min_pool_size=10000,
32 |         n_epochs=1000,
33 |         discount=0.99,
34 |         scale_reward=0.01,
35 |         qf_learning_rate=1e-3,
36 |         policy_learning_rate=1e-4,
37 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
38 |         # plot=True,
39 |     )
40 |     algo.train()
41 | 
42 | run_experiment_lite(
43 |     run_task,
44 |     # Number of parallel workers for sampling
45 |     n_parallel=1,
46 |     # Only keep the snapshot parameters for the last iteration
47 |     snapshot_mode="last",
48 |     # Specifies the seed for the experiment. If this is not provided, a random seed
49 |     # will be used
50 |     seed=1,
51 |     # plot=True,
52 | )
53 | 


--------------------------------------------------------------------------------
/rllab/examples/nop_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.nop import NOP
 2 | from rllab.baselines.zero_baseline import ZeroBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.uniform_control_policy import UniformControlPolicy
 6 | 
 7 | env = normalize(CartpoleEnv())
 8 | 
 9 | policy = UniformControlPolicy(
10 |     env_spec=env.spec,
11 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
12 | )
13 | 
14 | baseline = ZeroBaseline(env_spec=env.spec)
15 | 
16 | algo = NOP(
17 |     env=env,
18 |     policy=policy,
19 |     baseline=baseline,
20 |     batch_size=4000,
21 |     max_path_length=100,
22 |     n_itr=40,
23 |     discount=0.99,
24 |     step_size=0.01,
25 | )
26 | algo.train()
27 | 


--------------------------------------------------------------------------------
/rllab/examples/point_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.base import Env
 2 | from rllab.spaces import Box
 3 | from rllab.envs.base import Step
 4 | import numpy as np
 5 | 
 6 | 
 7 | class PointEnv(Env):
 8 |     @property
 9 |     def observation_space(self):
10 |         return Box(low=-np.inf, high=np.inf, shape=(2,))
11 | 
12 |     @property
13 |     def action_space(self):
14 |         return Box(low=-0.1, high=0.1, shape=(2,))
15 | 
16 |     def reset(self):
17 |         self._state = np.random.uniform(-1, 1, size=(2,))
18 |         observation = np.copy(self._state)
19 |         return observation
20 | 
21 |     def step(self, action):
22 |         self._state = self._state + action
23 |         x, y = self._state
24 |         reward = - (x ** 2 + y ** 2) ** 0.5
25 |         done = abs(x) < 0.01 and abs(y) < 0.01
26 |         next_observation = np.copy(self._state)
27 |         return Step(observation=next_observation, reward=reward, done=done)
28 | 
29 |     def render(self):
30 |         print('current state:', self._state)
31 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | 
 7 | env = normalize(CartpoleEnv())
 8 | 
 9 | policy = GaussianMLPPolicy(
10 |     env_spec=env.spec,
11 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
12 |     hidden_sizes=(32, 32)
13 | )
14 | 
15 | baseline = LinearFeatureBaseline(env_spec=env.spec)
16 | 
17 | algo = TRPO(
18 |     env=env,
19 |     policy=policy,
20 |     baseline=baseline,
21 |     batch_size=4000,
22 |     max_path_length=100,
23 |     n_itr=40,
24 |     discount=0.99,
25 |     step_size=0.01,
26 | )
27 | algo.train()
28 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole_pickled.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     env = normalize(CartpoleEnv())
11 | 
12 |     policy = GaussianMLPPolicy(
13 |         env_spec=env.spec,
14 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
15 |         hidden_sizes=(32, 32)
16 |     )
17 | 
18 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
19 | 
20 |     algo = TRPO(
21 |         env=env,
22 |         policy=policy,
23 |         baseline=baseline,
24 |         batch_size=4000,
25 |         max_path_length=100,
26 |         n_itr=1000,
27 |         discount=0.99,
28 |         step_size=0.01,
29 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
30 |         #plot=True
31 |     )
32 |     algo.train()
33 | 
34 | 
35 | run_experiment_lite(
36 |     run_task,
37 |     # Number of parallel workers for sampling
38 |     n_parallel=2,
39 |     # Only keep the snapshot parameters for the last iteration
40 |     snapshot_mode="last",
41 |     # Specifies the seed for the experiment. If this is not provided, a random seed
42 |     # will be used
43 |     seed=1,
44 |     #plot=True
45 | )
46 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_cartpole_recurrent.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_gru_policy import GaussianGRUPolicy
 6 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp
 7 | from rllab.misc.instrument import run_experiment_lite
 8 | 
 9 | 
10 | def run_task(*_):
11 |     env = normalize(CartpoleEnv())
12 | 
13 |     policy = GaussianGRUPolicy(
14 |         env_spec=env.spec,
15 |     )
16 | 
17 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
18 | 
19 |     algo = TRPO(
20 |         env=env,
21 |         policy=policy,
22 |         baseline=baseline,
23 |         batch_size=4000,
24 |         max_path_length=100,
25 |         n_itr=10,
26 |         discount=0.99,
27 |         step_size=0.01,
28 |         optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
29 |     )
30 |     algo.train()
31 | 
32 | 
33 | run_experiment_lite(
34 |     run_task,
35 |     n_parallel=1,
36 |     seed=1,
37 | )
38 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.categorical_mlp_policy import CategoricalMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     # Please note that different environments with different action spaces may
11 |     # require different policies. For example with a Discrete action space, a
12 |     # CategoricalMLPPolicy works, but for a Box action space may need to use
13 |     # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
14 |     env = normalize(GymEnv("CartPole-v0"))
15 | 
16 |     policy = CategoricalMLPPolicy(
17 |         env_spec=env.spec,
18 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
19 |         hidden_sizes=(32, 32)
20 |     )
21 | 
22 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
23 | 
24 |     algo = TRPO(
25 |         env=env,
26 |         policy=policy,
27 |         baseline=baseline,
28 |         batch_size=4000,
29 |         max_path_length=env.horizon,
30 |         n_itr=50,
31 |         discount=0.99,
32 |         step_size=0.01,
33 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
34 |         # plot=True,
35 |     )
36 |     algo.train()
37 | 
38 | 
39 | run_experiment_lite(
40 |     run_task,
41 |     # Number of parallel workers for sampling
42 |     n_parallel=1,
43 |     # Only keep the snapshot parameters for the last iteration
44 |     snapshot_mode="last",
45 |     # Specifies the seed for the experiment. If this is not provided, a random seed
46 |     # will be used
47 |     seed=1,
48 |     # plot=True,
49 | )
50 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_pendulum.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.gym_env import GymEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.misc.instrument import run_experiment_lite
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | 
 8 | 
 9 | def run_task(*_):
10 |     # Please note that different environments with different action spaces may require different
11 |     # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
12 |     # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
13 |     env = normalize(GymEnv("Pendulum-v0"))
14 | 
15 |     policy = GaussianMLPPolicy(
16 |         env_spec=env.spec,
17 |         # The neural network policy should have two hidden layers, each with 32 hidden units.
18 |         hidden_sizes=(32, 32)
19 |     )
20 | 
21 |     baseline = LinearFeatureBaseline(env_spec=env.spec)
22 | 
23 |     algo = TRPO(
24 |         env=env,
25 |         policy=policy,
26 |         baseline=baseline,
27 |         batch_size=4000,
28 |         max_path_length=env.horizon,
29 |         n_itr=50,
30 |         discount=0.99,
31 |         step_size=0.01,
32 |         # Uncomment both lines (this and the plot parameter below) to enable plotting
33 |         # plot=True,
34 |     )
35 |     algo.train()
36 | 
37 | 
38 | run_experiment_lite(
39 |     run_task,
40 |     # Number of parallel workers for sampling
41 |     n_parallel=1,
42 |     # Only keep the snapshot parameters for the last iteration
43 |     snapshot_mode="last",
44 |     # Specifies the seed for the experiment. If this is not provided, a random seed
45 |     # will be used
46 |     seed=1,
47 |     # plot=True,
48 | )
49 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_gym_tf_cartpole.py:
--------------------------------------------------------------------------------
 1 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 2 | from rllab.envs.gym_env import GymEnv
 3 | from rllab.envs.normalized_env import normalize
 4 | from rllab.misc.instrument import stub, run_experiment_lite
 5 | 
 6 | from sandbox.rocky.tf.envs.base import TfEnv
 7 | from sandbox.rocky.tf.policies.categorical_mlp_policy import CategoricalMLPPolicy
 8 | from sandbox.rocky.tf.algos.trpo import TRPO
 9 | 
10 | stub(globals())
11 | 
12 | # Need to wrap in a tf environment and force_reset to true
13 | # see https://github.com/openai/rllab/issues/87#issuecomment-282519288
14 | env = TfEnv(normalize(GymEnv("CartPole-v0", force_reset=True)))
15 | 
16 | policy = CategoricalMLPPolicy(
17 | name="policy",
18 | env_spec=env.spec,
19 | # The neural network policy should have two hidden layers, each with 32 hidden units.
20 | hidden_sizes=(32, 32)
21 | )
22 | 
23 | baseline = LinearFeatureBaseline(env_spec=env.spec)
24 | 
25 | algo = TRPO(
26 |     env=env,
27 |     policy=policy,
28 |     baseline=baseline,
29 |     batch_size=4000,
30 |     max_path_length=200,
31 |     n_itr=120,
32 |     discount=0.99,
33 |     step_size=0.01,
34 |     # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
35 | )
36 | 
37 | run_experiment_lite(
38 |     algo.train(),
39 |     n_parallel=1,
40 |     snapshot_mode="last",
41 |     seed=1
42 | )
43 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_point.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from examples.point_env import PointEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | 
 7 | env = normalize(PointEnv())
 8 | policy = GaussianMLPPolicy(
 9 |     env_spec=env.spec,
10 | )
11 | baseline = LinearFeatureBaseline(env_spec=env.spec)
12 | algo = TRPO(
13 |     env=env,
14 |     policy=policy,
15 |     baseline=baseline,
16 | )
17 | algo.train()
18 | 


--------------------------------------------------------------------------------
/rllab/examples/trpo_swimmer.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | 
 7 | env = normalize(SwimmerEnv())
 8 | 
 9 | policy = GaussianMLPPolicy(
10 |     env_spec=env.spec,
11 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
12 |     hidden_sizes=(32, 32)
13 | )
14 | 
15 | baseline = LinearFeatureBaseline(env_spec=env.spec)
16 | 
17 | algo = TRPO(
18 |     env=env,
19 |     policy=policy,
20 |     baseline=baseline,
21 |     batch_size=4000,
22 |     max_path_length=500,
23 |     n_itr=40,
24 |     discount=0.99,
25 |     step_size=0.01,
26 | )
27 | algo.train()
28 | 


--------------------------------------------------------------------------------
/rllab/rllab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/algos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/algos/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/algos/base.py:
--------------------------------------------------------------------------------
1 | class Algorithm(object):
2 |     pass
3 | 
4 | 
5 | class RLAlgorithm(Algorithm):
6 | 
7 |     def train(self):
8 |         raise NotImplementedError
9 | 


--------------------------------------------------------------------------------
/rllab/rllab/algos/erwr.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.vpg import VPG
 2 | from rllab.optimizers.lbfgs_optimizer import LbfgsOptimizer
 3 | from rllab.core.serializable import Serializable
 4 | 
 5 | 
 6 | class ERWR(VPG, Serializable):
 7 |     """
 8 |     Episodic Reward Weighted Regression [1]_
 9 | 
10 |     Notes
11 |     -----
12 |     This does not implement the original RwR [2]_ that deals with "immediate reward problems" since
13 |     it doesn't find solutions that optimize for temporally delayed rewards.
14 | 
15 |     .. [1] Kober, Jens, and Jan R. Peters. "Policy search for motor primitives in robotics." Advances in neural information processing systems. 2009.
16 |     .. [2] Peters, Jan, and Stefan Schaal. "Using reward-weighted regression for reinforcement learning of task space control." Approximate Dynamic Programming and Reinforcement Learning, 2007. ADPRL 2007. IEEE International Symposium on. IEEE, 2007.
17 |     """
18 | 
19 |     def __init__(
20 |             self,
21 |             optimizer=None,
22 |             optimizer_args=None,
23 |             positive_adv=None,
24 |             **kwargs):
25 |         Serializable.quick_init(self, locals())
26 |         if optimizer is None:
27 |             if optimizer_args is None:
28 |                 optimizer_args = dict()
29 |             optimizer = LbfgsOptimizer(**optimizer_args)
30 |         super(ERWR, self).__init__(
31 |             optimizer=optimizer,
32 |             positive_adv=True if positive_adv is None else positive_adv,
33 |             **kwargs
34 |         )
35 | 
36 | 


--------------------------------------------------------------------------------
/rllab/rllab/algos/nop.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.batch_polopt import BatchPolopt
 2 | from rllab.misc.overrides import overrides
 3 | 
 4 | 
 5 | class NOP(BatchPolopt):
 6 |     """
 7 |     NOP (no optimization performed) policy search algorithm
 8 |     """
 9 | 
10 |     def __init__(
11 |             self,
12 |             **kwargs):
13 |         super(NOP, self).__init__(**kwargs)
14 | 
15 |     @overrides
16 |     def init_opt(self):
17 |         pass
18 | 
19 |     @overrides
20 |     def optimize_policy(self, itr, samples_data):
21 |         pass
22 | 
23 |     @overrides
24 |     def get_itr_snapshot(self, itr, samples_data):
25 |         return dict()
26 | 


--------------------------------------------------------------------------------
/rllab/rllab/algos/ppo.py:
--------------------------------------------------------------------------------
 1 | from rllab.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer
 2 | from rllab.algos.npo import NPO
 3 | from rllab.core.serializable import Serializable
 4 | 
 5 | 
 6 | class PPO(NPO, Serializable):
 7 |     """
 8 |     Penalized Policy Optimization.
 9 |     """
10 | 
11 |     def __init__(
12 |             self,
13 |             optimizer=None,
14 |             optimizer_args=None,
15 |             **kwargs):
16 |         Serializable.quick_init(self, locals())
17 |         if optimizer is None:
18 |             if optimizer_args is None:
19 |                 optimizer_args = dict()
20 |             optimizer = PenaltyLbfgsOptimizer(**optimizer_args)
21 |         super(PPO, self).__init__(optimizer=optimizer, **kwargs)
22 | 


--------------------------------------------------------------------------------
/rllab/rllab/algos/tnpg.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.npo import NPO
 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 3 | from rllab.misc import ext
 4 | 
 5 | 
 6 | class TNPG(NPO):
 7 |     """
 8 |     Truncated Natural Policy Gradient.
 9 |     """
10 | 
11 |     def __init__(
12 |             self,
13 |             optimizer=None,
14 |             optimizer_args=None,
15 |             **kwargs):
16 |         if optimizer is None:
17 |             default_args = dict(max_backtracks=1)
18 |             if optimizer_args is None:
19 |                 optimizer_args = default_args
20 |             else:
21 |                 optimizer_args = dict(default_args, **optimizer_args)
22 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
23 |         super(TNPG, self).__init__(optimizer=optimizer, **kwargs)
24 | 


--------------------------------------------------------------------------------
/rllab/rllab/algos/trpo.py:
--------------------------------------------------------------------------------
 1 | from rllab.algos.npo import NPO
 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 3 | from rllab.core.serializable import Serializable
 4 | 
 5 | 
 6 | class TRPO(NPO):
 7 |     """
 8 |     Trust Region Policy Optimization
 9 |     """
10 | 
11 |     def __init__(
12 |             self,
13 |             optimizer=None,
14 |             optimizer_args=None,
15 |             **kwargs):
16 |         if optimizer is None:
17 |             if optimizer_args is None:
18 |                 optimizer_args = dict()
19 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
20 |         super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
21 | 


--------------------------------------------------------------------------------
/rllab/rllab/baselines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/baselines/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/baselines/base.py:
--------------------------------------------------------------------------------
 1 | from rllab.misc import autoargs
 2 | 
 3 | 
 4 | class Baseline(object):
 5 | 
 6 |     def __init__(self, env_spec):
 7 |         self._mdp_spec = env_spec
 8 | 
 9 |     @property
10 |     def algorithm_parallelized(self):
11 |         return False
12 | 
13 |     def get_param_values(self):
14 |         raise NotImplementedError
15 | 
16 |     def set_param_values(self, val):
17 |         raise NotImplementedError
18 | 
19 |     def fit(self, paths):
20 |         raise NotImplementedError
21 | 
22 |     def predict(self, path):
23 |         raise NotImplementedError
24 | 
25 |     @classmethod
26 |     @autoargs.add_args
27 |     def add_args(cls, parser):
28 |         pass
29 | 
30 |     @classmethod
31 |     @autoargs.new_from_args
32 |     def new_from_args(cls, args, mdp):
33 |         pass
34 | 
35 |     def log_diagnostics(self, paths):
36 |         """
37 |         Log extra information per iteration based on the collected paths
38 |         """
39 |         pass
40 | 


--------------------------------------------------------------------------------
/rllab/rllab/baselines/gaussian_conv_baseline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.misc.overrides import overrides
 5 | from rllab.core.parameterized import Parameterized
 6 | from rllab.baselines.base import Baseline
 7 | from rllab.regressors.gaussian_conv_regressor import GaussianConvRegressor
 8 | 
 9 | 
10 | class GaussianConvBaseline(Baseline, Parameterized):
11 | 
12 |     def __init__(
13 |             self,
14 |             env_spec,
15 |             subsample_factor=1.,
16 |             regressor_args=None,
17 |     ):
18 |         Serializable.quick_init(self, locals())
19 |         super(GaussianConvBaseline, self).__init__(env_spec)
20 |         if regressor_args is None:
21 |             regressor_args = dict()
22 | 
23 |         self._regressor = GaussianConvRegressor(
24 |             input_shape=env_spec.observation_space.shape,
25 |             output_dim=1,
26 |             name="vf",
27 |             **regressor_args
28 |         )
29 | 
30 |     @overrides
31 |     def fit(self, paths):
32 |         observations = np.concatenate([p["observations"] for p in paths])
33 |         returns = np.concatenate([p["returns"] for p in paths])
34 |         self._regressor.fit(observations, returns.reshape((-1, 1)))
35 | 
36 |     @overrides
37 |     def predict(self, path):
38 |         return self._regressor.predict(path["observations"]).flatten()
39 | 
40 |     @overrides
41 |     def get_param_values(self, **tags):
42 |         return self._regressor.get_param_values(**tags)
43 | 
44 |     @overrides
45 |     def set_param_values(self, flattened_params, **tags):
46 |         self._regressor.set_param_values(flattened_params, **tags)
47 | 


--------------------------------------------------------------------------------
/rllab/rllab/baselines/gaussian_mlp_baseline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.core.parameterized import Parameterized
 5 | from rllab.baselines.base import Baseline
 6 | from rllab.misc.overrides import overrides
 7 | from rllab.regressors.gaussian_mlp_regressor import GaussianMLPRegressor
 8 | 
 9 | 
10 | class GaussianMLPBaseline(Baseline, Parameterized):
11 | 
12 |     def __init__(
13 |             self,
14 |             env_spec,
15 |             subsample_factor=1.,
16 |             num_seq_inputs=1,
17 |             regressor_args=None,
18 |     ):
19 |         Serializable.quick_init(self, locals())
20 |         super(GaussianMLPBaseline, self).__init__(env_spec)
21 |         if regressor_args is None:
22 |             regressor_args = dict()
23 | 
24 |         self._regressor = GaussianMLPRegressor(
25 |             input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,),
26 |             output_dim=1,
27 |             name="vf",
28 |             **regressor_args
29 |         )
30 | 
31 |     @overrides
32 |     def fit(self, paths):
33 |         observations = np.concatenate([p["observations"] for p in paths])
34 |         returns = np.concatenate([p["returns"] for p in paths])
35 |         self._regressor.fit(observations, returns.reshape((-1, 1)))
36 | 
37 |     @overrides
38 |     def predict(self, path):
39 |         return self._regressor.predict(path["observations"]).flatten()
40 | 
41 |     @overrides
42 |     def get_param_values(self, **tags):
43 |         return self._regressor.get_param_values(**tags)
44 | 
45 |     @overrides
46 |     def set_param_values(self, flattened_params, **tags):
47 |         self._regressor.set_param_values(flattened_params, **tags)
48 | 


--------------------------------------------------------------------------------
/rllab/rllab/baselines/linear_feature_baseline.py:
--------------------------------------------------------------------------------
 1 | from rllab.baselines.base import Baseline
 2 | from rllab.misc.overrides import overrides
 3 | import numpy as np
 4 | 
 5 | 
 6 | class LinearFeatureBaseline(Baseline):
 7 |     def __init__(self, env_spec, reg_coeff=1e-5):
 8 |         self._coeffs = None
 9 |         self._reg_coeff = reg_coeff
10 | 
11 |     @overrides
12 |     def get_param_values(self, **tags):
13 |         return self._coeffs
14 | 
15 |     @overrides
16 |     def set_param_values(self, val, **tags):
17 |         self._coeffs = val
18 | 
19 |     def _features(self, path):
20 |         o = np.clip(path["observations"], -10, 10)
21 |         l = len(path["rewards"])
22 |         al = np.arange(l).reshape(-1, 1) / 100.0
23 |         return np.concatenate([o, o ** 2, al, al ** 2, al ** 3, np.ones((l, 1))], axis=1)
24 | 
25 |     @overrides
26 |     def fit(self, paths):
27 |         featmat = np.concatenate([self._features(path) for path in paths])
28 |         returns = np.concatenate([path["returns"] for path in paths])
29 |         reg_coeff = self._reg_coeff
30 |         for _ in range(5):
31 |             self._coeffs = np.linalg.lstsq(
32 |                 featmat.T.dot(featmat) + reg_coeff * np.identity(featmat.shape[1]),
33 |                 featmat.T.dot(returns)
34 |             )[0]
35 |             if not np.any(np.isnan(self._coeffs)):
36 |                 break
37 |             reg_coeff *= 10
38 | 
39 |     @overrides
40 |     def predict(self, path):
41 |         if self._coeffs is None:
42 |             return np.zeros(len(path["rewards"]))
43 |         return self._features(path).dot(self._coeffs)
44 | 


--------------------------------------------------------------------------------
/rllab/rllab/baselines/zero_baseline.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rllab.baselines.base import Baseline
 3 | from rllab.misc.overrides import overrides
 4 | 
 5 | 
 6 | class ZeroBaseline(Baseline):
 7 | 
 8 |     def __init__(self, env_spec):
 9 |         pass
10 | 
11 |     @overrides
12 |     def get_param_values(self, **kwargs):
13 |         return None
14 | 
15 |     @overrides
16 |     def set_param_values(self, val, **kwargs):
17 |         pass
18 | 
19 |     @overrides
20 |     def fit(self, paths):
21 |         pass
22 | 
23 |     @overrides
24 |     def predict(self, path):
25 |         return np.zeros_like(path["rewards"])
26 | 


--------------------------------------------------------------------------------
/rllab/rllab/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import os
 3 | 
 4 | PROJECT_PATH = osp.abspath(osp.join(osp.dirname(__file__), '..'))
 5 | 
 6 | LOG_DIR = PROJECT_PATH + "/data"
 7 | 
 8 | USE_TF = False
 9 | 
10 | DOCKER_IMAGE = "DOCKER_IMAGE"
11 | 
12 | DOCKERFILE_PATH = "/path/to/Dockerfile"
13 | 
14 | KUBE_PREFIX = "rllab_"
15 | 
16 | DOCKER_LOG_DIR = "/tmp/expt"
17 | 
18 | POD_DIR = PROJECT_PATH + "/.pods"
19 | 
20 | AWS_S3_PATH = None
21 | 
22 | AWS_IMAGE_ID = None
23 | 
24 | AWS_INSTANCE_TYPE = "m4.xlarge"
25 | 
26 | AWS_KEY_NAME = "AWS_KEY_NAME"
27 | 
28 | AWS_SPOT = True
29 | 
30 | AWS_SPOT_PRICE = '1.0'
31 | 
32 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", None)
33 | 
34 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", None)
35 | 
36 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab"
37 | 
38 | AWS_SECURITY_GROUPS = ["rllab"]
39 | 
40 | AWS_SECURITY_GROUP_IDS = []
41 | 
42 | AWS_NETWORK_INTERFACES = []
43 | 
44 | AWS_EXTRA_CONFIGS = dict()
45 | 
46 | AWS_REGION_NAME = "us-east-1"
47 | 
48 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*.pod/*"]
49 | 
50 | DOCKER_CODE_DIR = "/root/code/rllab"
51 | 
52 | AWS_CODE_SYNC_S3_PATH = "s3://to/be/overriden/in/personal"
53 | 
54 | # whether to use fast code sync
55 | FAST_CODE_SYNC = True
56 | 
57 | FAST_CODE_SYNC_IGNORES = [".git", "data", ".pods"]
58 | 
59 | KUBE_DEFAULT_RESOURCES = {
60 |     "requests": {
61 |         "cpu": 0.8,
62 |     }
63 | }
64 | 
65 | KUBE_DEFAULT_NODE_SELECTOR = {
66 |     "aws/type": "m4.xlarge",
67 | }
68 | 
69 | MUJOCO_KEY_PATH = osp.expanduser("~/.mujoco")
70 | # MUJOCO_KEY_PATH = osp.join(osp.dirname(__file__), "../vendor/mujoco")
71 | 
72 | ENV = {}
73 | 
74 | EBS_OPTIMIZED = True
75 | 
76 | if osp.exists(osp.join(osp.dirname(__file__), "config_personal.py")):
77 |     from .config_personal import *
78 | else:
79 |     print("Creating your personal config from template...")
80 |     from shutil import copy
81 |     copy(osp.join(PROJECT_PATH, "rllab/config_personal_template.py"), osp.join(PROJECT_PATH, "rllab/config_personal.py"))
82 |     from .config_personal import *
83 |     print("Personal config created, but you should probably edit it before further experiments " \
84 |           "are run")
85 |     if 'CIRCLECI' not in os.environ:
86 |         print("Exiting.")
87 |         import sys; sys.exit(0)
88 | 
89 | LABEL = ""
90 | 


--------------------------------------------------------------------------------
/rllab/rllab/config_personal_template.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | USE_GPU = False
 4 | 
 5 | DOCKER_IMAGE = "dementrock/rllab3-shared"
 6 | 
 7 | KUBE_PREFIX = "template_"
 8 | 
 9 | DOCKER_LOG_DIR = "/tmp/expt"
10 | 
11 | AWS_IMAGE_ID = "ami-67c5d00d"
12 | 
13 | if USE_GPU:
14 |     AWS_INSTANCE_TYPE = "g2.2xlarge"
15 | else:
16 |     AWS_INSTANCE_TYPE = "c4.2xlarge"
17 | 
18 | AWS_KEY_NAME = "research_virginia"
19 | 
20 | AWS_SPOT = True
21 | 
22 | AWS_SPOT_PRICE = '10.0'
23 | 
24 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab"
25 | 
26 | AWS_SECURITY_GROUPS = ["rllab"]
27 | 
28 | AWS_REGION_NAME = "us-west-2"
29 | 
30 | AWS_CODE_SYNC_S3_PATH = "<insert aws s3 bucket url for code>e"
31 | 
32 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*src/*",
33 |                      "*.pods/*", "*tests/*", "*examples/*", "docs/*"]
34 | 
35 | LOCAL_CODE_DIR = "<insert local code dir>"
36 | 
37 | AWS_S3_PATH = "<insert aws s3 bucket url>"
38 | 
39 | LABEL = "template"
40 | 
41 | DOCKER_CODE_DIR = "/root/code/rllab"
42 | 
43 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", "<insert aws key>")
44 | 
45 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", "<insert aws secret>")
46 | 


--------------------------------------------------------------------------------
/rllab/rllab/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/core/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/core/lasagne_powered.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.parameterized import Parameterized
 2 | from rllab.misc.overrides import overrides
 3 | import lasagne.layers as L
 4 | 
 5 | 
 6 | class LasagnePowered(Parameterized):
 7 |     def __init__(self, output_layers):
 8 |         self._output_layers = output_layers
 9 |         super(LasagnePowered, self).__init__()
10 | 
11 |     @property
12 |     def output_layers(self):
13 |         return self._output_layers
14 | 
15 |     @overrides
16 |     def get_params_internal(self, **tags):  # this gives ALL the vars (not the params values)
17 |         return L.get_all_params(  # this lasagne function also returns all var below the passed layers
18 |             L.concat(self._output_layers),
19 |             **tags
20 |         )
21 | 


--------------------------------------------------------------------------------
/rllab/rllab/core/serializable.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import sys
 3 | 
 4 | 
 5 | class Serializable(object):
 6 | 
 7 |     def __init__(self, *args, **kwargs):
 8 |         self.__args = args
 9 |         self.__kwargs = kwargs
10 | 
11 |     def quick_init(self, locals_):
12 |         if getattr(self, "_serializable_initialized", False):
13 |             return
14 |         if sys.version_info >= (3, 0):
15 |             spec = inspect.getfullargspec(self.__init__)
16 |             # Exclude the first "self" parameter
17 |             if spec.varkw:
18 |                 kwargs = locals_[spec.varkw]
19 |             else:
20 |                 kwargs = dict()
21 |         else:
22 |             spec = inspect.getargspec(self.__init__)
23 |             if spec.keywords:
24 |                 kwargs = locals_[spec.keywords]
25 |             else:
26 |                 kwargs = dict()
27 |         if spec.varargs:
28 |             varargs = locals_[spec.varargs]
29 |         else:
30 |             varargs = tuple()
31 |         in_order_args = [locals_[arg] for arg in spec.args][1:]
32 |         self.__args = tuple(in_order_args) + varargs
33 |         self.__kwargs = kwargs
34 |         setattr(self, "_serializable_initialized", True)
35 | 
36 |     def __getstate__(self):
37 |         return {"__args": self.__args, "__kwargs": self.__kwargs}
38 | 
39 |     def __setstate__(self, d):
40 |         out = type(self)(*d["__args"], **d["__kwargs"])
41 |         self.__dict__.update(out.__dict__)
42 | 
43 |     @classmethod
44 |     def clone(cls, obj, **kwargs):
45 |         assert isinstance(obj, Serializable)
46 |         d = obj.__getstate__()
47 | 
48 |         # Split the entries in kwargs between positional and keyword arguments
49 |         # and update d['__args'] and d['__kwargs'], respectively.
50 |         if sys.version_info >= (3, 0):
51 |             spec = inspect.getfullargspec(obj.__init__)
52 |         else:
53 |             spec = inspect.getargspec(obj.__init__)
54 |         in_order_args = spec.args[1:]
55 | 
56 |         d["__args"] = list(d["__args"])
57 |         for kw, val in kwargs.items():
58 |             if kw in in_order_args:
59 |                 d["__args"][in_order_args.index(kw)] = val
60 |             else:
61 |                 d["__kwargs"][kw] = val
62 | 
63 |         out = type(obj).__new__(type(obj))
64 |         out.__setstate__(d)
65 |         return out
66 | 


--------------------------------------------------------------------------------
/rllab/rllab/distributions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/distributions/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/distributions/base.py:
--------------------------------------------------------------------------------
 1 | import theano.tensor as TT
 2 | 
 3 | class Distribution(object):
 4 | 
 5 |     @property
 6 |     def dim(self):
 7 |         raise NotImplementedError
 8 | 
 9 |     def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
10 |         """
11 |         Compute the symbolic KL divergence of two distributions
12 |         """
13 |         raise NotImplementedError
14 | 
15 |     def kl(self, old_dist_info, new_dist_info):
16 |         """
17 |         Compute the KL divergence of two distributions
18 |         """
19 |         raise NotImplementedError
20 | 
21 |     def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
22 |         raise NotImplementedError
23 | 
24 |     def entropy(self, dist_info):
25 |         raise NotImplementedError
26 | 
27 |     def log_likelihood_sym(self, x_var, dist_info_vars):
28 |         raise NotImplementedError
29 | 
30 |     def likelihood_sym(self, x_var, dist_info_vars):
31 |         return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars))
32 | 
33 |     def log_likelihood(self, xs, dist_info):
34 |         raise NotImplementedError
35 | 
36 |     @property
37 |     def dist_info_keys(self):
38 |         raise NotImplementedError
39 | 


--------------------------------------------------------------------------------
/rllab/rllab/distributions/bernoulli.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from .base import Distribution
 4 | import theano.tensor as TT
 5 | import numpy as np
 6 | 
 7 | TINY = 1e-8
 8 | 
 9 | 
10 | class Bernoulli(Distribution):
11 |     def __init__(self, dim):
12 |         self._dim = dim
13 | 
14 |     @property
15 |     def dim(self):
16 |         return self._dim
17 | 
18 |     def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
19 |         old_p = old_dist_info_vars["p"]
20 |         new_p = new_dist_info_vars["p"]
21 |         kl = old_p * (TT.log(old_p + TINY) - TT.log(new_p + TINY)) + \
22 |              (1 - old_p) * (TT.log(1 - old_p + TINY) - TT.log(1 - new_p + TINY))
23 |         return TT.sum(kl, axis=-1)
24 | 
25 |     def kl(self, old_dist_info, new_dist_info):
26 |         old_p = old_dist_info["p"]
27 |         new_p = new_dist_info["p"]
28 |         kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \
29 |              (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
30 |         return np.sum(kl, axis=-1)
31 | 
32 |     def sample(self, dist_info):
33 |         p = np.asarray(dist_info["p"])
34 |         return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p)
35 | 
36 |     def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
37 |         old_p = old_dist_info_vars["p"]
38 |         new_p = new_dist_info_vars["p"]
39 |         return TT.prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
40 |                        axis=-1)
41 | 
42 |     def log_likelihood_sym(self, x_var, dist_info_vars):
43 |         p = dist_info_vars["p"]
44 |         return TT.sum(x_var * TT.log(p + TINY) + (1 - x_var) * TT.log(1 - p + TINY), axis=-1)
45 | 
46 |     def log_likelihood(self, xs, dist_info):
47 |         p = dist_info["p"]
48 |         return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)
49 | 
50 |     def entropy(self, dist_info):
51 |         p = dist_info["p"]
52 |         return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)
53 | 
54 |     @property
55 |     def dist_info_keys(self):
56 |         return ["p"]
57 | 


--------------------------------------------------------------------------------
/rllab/rllab/distributions/delta.py:
--------------------------------------------------------------------------------
 1 | from rllab.distributions.base import Distribution
 2 | 
 3 | class Delta(Distribution):
 4 |     @property
 5 |     def dim(self):
 6 |         return 0
 7 | 
 8 |     def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
 9 |         return None
10 | 
11 |     def kl(self, old_dist_info, new_dist_info):
12 |         return None
13 | 
14 |     def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
15 |         raise NotImplementedError
16 | 
17 |     def entropy(self, dist_info):
18 |         raise NotImplementedError
19 | 
20 |     def log_likelihood_sym(self, x_var, dist_info_vars):
21 |         raise NotImplementedError
22 | 
23 |     def likelihood_sym(self, x_var, dist_info_vars):
24 |         return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars))
25 | 
26 |     def log_likelihood(self, xs, dist_info):
27 |         return None
28 | 
29 |     @property
30 |     def dist_info_keys(self):
31 |         return None
32 | 
33 |     def entropy(self,dist_info):
34 |         return 0
35 | 


--------------------------------------------------------------------------------
/rllab/rllab/distributions/recurrent_diagonal_gaussian.py:
--------------------------------------------------------------------------------
1 | import theano.tensor as TT
2 | import numpy as np
3 | from rllab.distributions.base import Distribution
4 | from rllab.distributions.diagonal_gaussian import DiagonalGaussian
5 | 
6 | RecurrentDiagonalGaussian = DiagonalGaussian
7 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/box2d/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/cartpole_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rllab.envs.box2d.parser import find_body
 3 | 
 4 | from rllab.core.serializable import Serializable
 5 | from rllab.envs.box2d.box2d_env import Box2DEnv
 6 | from rllab.misc import autoargs
 7 | from rllab.misc.overrides import overrides
 8 | 
 9 | 
10 | class CartpoleEnv(Box2DEnv, Serializable):
11 | 
12 |     @autoargs.inherit(Box2DEnv.__init__)
13 |     def __init__(self, *args, **kwargs):
14 |         self.max_pole_angle = .2
15 |         self.max_cart_pos = 2.4
16 |         self.max_cart_speed = 4.
17 |         self.max_pole_speed = 4.
18 |         self.reset_range = 0.05
19 |         super(CartpoleEnv, self).__init__(
20 |             self.model_path("cartpole.xml.mako"),
21 |             *args, **kwargs
22 |         )
23 |         self.cart = find_body(self.world, "cart")
24 |         self.pole = find_body(self.world, "pole")
25 |         Serializable.__init__(self, *args, **kwargs)
26 | 
27 |     @overrides
28 |     def reset(self):
29 |         self._set_state(self.initial_state)
30 |         self._invalidate_state_caches()
31 |         bounds = np.array([
32 |             self.max_cart_pos,
33 |             self.max_cart_speed,
34 |             self.max_pole_angle,
35 |             self.max_pole_speed
36 |         ])
37 |         low, high = -self.reset_range*bounds, self.reset_range*bounds
38 |         xpos, xvel, apos, avel = np.random.uniform(low, high)
39 |         self.cart.position = (xpos, self.cart.position[1])
40 |         self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1])
41 |         self.pole.angle = apos
42 |         self.pole.angularVelocity = avel
43 |         return self.get_current_obs()
44 | 
45 |     @overrides
46 |     def compute_reward(self, action):
47 |         yield
48 |         notdone = 1 - int(self.is_current_done())
49 |         ucost = 1e-5*(action**2).sum()
50 |         xcost = 1 - np.cos(self.pole.angle)
51 |         yield notdone * 10 - notdone * xcost - notdone * ucost
52 | 
53 |     @overrides
54 |     def is_current_done(self):
55 |         return abs(self.cart.position[0]) > self.max_cart_pos or \
56 |             abs(self.pole.angle) > self.max_pole_angle
57 | 
58 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/cartpole_swingup_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pygame
 3 | from rllab.envs.box2d.parser import find_body
 4 | 
 5 | from rllab.core.serializable import Serializable
 6 | from rllab.envs.box2d.box2d_env import Box2DEnv
 7 | from rllab.misc import autoargs
 8 | from rllab.misc.overrides import overrides
 9 | 
10 | 
11 | # Tornio, Matti, and Tapani Raiko. "Variational Bayesian approach for
12 | # nonlinear identification and control." Proc. of the IFAC Workshop on
13 | # Nonlinear Model Predictive Control for Fast Systems, NMPC FS06. 2006.
14 | class CartpoleSwingupEnv(Box2DEnv, Serializable):
15 | 
16 |     @autoargs.inherit(Box2DEnv.__init__)
17 |     def __init__(self, *args, **kwargs):
18 |         super(CartpoleSwingupEnv, self).__init__(
19 |             self.model_path("cartpole.xml.mako"),
20 |             *args, **kwargs
21 |         )
22 |         self.max_cart_pos = 3
23 |         self.max_reward_cart_pos = 3
24 |         self.cart = find_body(self.world, "cart")
25 |         self.pole = find_body(self.world, "pole")
26 |         Serializable.__init__(self, *args, **kwargs)
27 | 
28 |     @overrides
29 |     def reset(self):
30 |         self._set_state(self.initial_state)
31 |         self._invalidate_state_caches()
32 |         bounds = np.array([
33 |             [-1, -2, np.pi-1, -3],
34 |             [1, 2, np.pi+1, 3],
35 |         ])
36 |         low, high = bounds
37 |         xpos, xvel, apos, avel = np.random.uniform(low, high)
38 |         self.cart.position = (xpos, self.cart.position[1])
39 |         self.cart.linearVelocity = (xvel, self.cart.linearVelocity[1])
40 |         self.pole.angle = apos
41 |         self.pole.angularVelocity = avel
42 |         return self.get_current_obs()
43 | 
44 |     @overrides
45 |     def compute_reward(self, action):
46 |         yield
47 |         if self.is_current_done():
48 |             yield -100
49 |         else:
50 |             if abs(self.cart.position[0]) > self.max_reward_cart_pos:
51 |                 yield -1
52 |             else:
53 |                 yield np.cos(self.pole.angle)
54 | 
55 |     @overrides
56 |     def is_current_done(self):
57 |         return abs(self.cart.position[0]) > self.max_cart_pos
58 | 
59 |     @overrides
60 |     def action_from_keys(self, keys):
61 |         if keys[pygame.K_LEFT]:
62 |             return np.asarray([-10])
63 |         elif keys[pygame.K_RIGHT]:
64 |             return np.asarray([+10])
65 |         else:
66 |             return np.asarray([0])
67 | 
68 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/double_pendulum_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rllab.envs.box2d.parser import find_body
 3 | 
 4 | from rllab.core.serializable import Serializable
 5 | from rllab.envs.box2d.box2d_env import Box2DEnv
 6 | from rllab.misc import autoargs
 7 | from rllab.misc.overrides import overrides
 8 | 
 9 | 
10 | # http://mlg.eng.cam.ac.uk/pilco/
11 | class DoublePendulumEnv(Box2DEnv, Serializable):
12 | 
13 |     @autoargs.inherit(Box2DEnv.__init__)
14 |     def __init__(self, *args, **kwargs):
15 |         # make sure mdp-level step is 100ms long
16 |         kwargs["frame_skip"] = kwargs.get("frame_skip", 2)
17 |         if kwargs.get("template_args", {}).get("noise", False):
18 |             self.link_len = (np.random.rand()-0.5) + 1
19 |         else:
20 |             self.link_len = 1
21 |         kwargs["template_args"] = kwargs.get("template_args", {})
22 |         kwargs["template_args"]["link_len"] = self.link_len
23 |         super(DoublePendulumEnv, self).__init__(
24 |             self.model_path("double_pendulum.xml.mako"),
25 |             *args, **kwargs
26 |         )
27 |         self.link1 = find_body(self.world, "link1")
28 |         self.link2 = find_body(self.world, "link2")
29 |         Serializable.__init__(self, *args, **kwargs)
30 | 
31 |     @overrides
32 |     def reset(self):
33 |         self._set_state(self.initial_state)
34 |         self._invalidate_state_caches()
35 |         stds = np.array([0.1, 0.1, 0.01, 0.01])
36 |         pos1, pos2, v1, v2 = np.random.randn(*stds.shape) * stds
37 |         self.link1.angle = pos1
38 |         self.link2.angle = pos2
39 |         self.link1.angularVelocity = v1
40 |         self.link2.angularVelocity = v2
41 |         return self.get_current_obs()
42 | 
43 |     def get_tip_pos(self):
44 |         cur_center_pos = self.link2.position
45 |         cur_angle = self.link2.angle
46 |         cur_pos = (
47 |             cur_center_pos[0] - self.link_len*np.sin(cur_angle),
48 |             cur_center_pos[1] - self.link_len*np.cos(cur_angle)
49 |         )
50 |         return cur_pos
51 | 
52 |     @overrides
53 |     def compute_reward(self, action):
54 |         yield
55 |         tgt_pos = np.asarray([0, self.link_len * 2])
56 |         cur_pos = self.get_tip_pos()
57 |         dist = np.linalg.norm(cur_pos - tgt_pos)
58 |         yield -dist
59 | 
60 |     def is_current_done(self):
61 |         return False
62 | 
63 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/car_parking.xml:
--------------------------------------------------------------------------------
 1 | <!-- Auto-generated. Do not edit! -->
 2 | <box2d>
 3 |   <world timestep="0.05" gravity="0,0">
 4 |     <body name="goal" type="static" position="0,0">
 5 |       <fixture group="-1" shape="circle" radius="1"/>
 6 |     </body>
 7 |     <body name="car" type="dynamic" position="3,4">
 8 |       <fixture density="1.6666666666666667" group="-1" shape="polygon" box="0.3,0.5"/>
 9 |     </body>
10 |     <body name="left_front_wheel" type="dynamic" position="2.7,4.2">
11 |       <fixture density="3.3333333333333335" group="-1" shape="polygon" box="0.05,0.15"/>
12 |     </body>
13 |     <joint type="revolute" name="left_front_wheel_joint" bodyA="car" bodyB="left_front_wheel" localAnchorA="-0.3,0.2" localAnchorB="0,0" limit="0,0"/>
14 |     <body name="right_front_wheel" type="dynamic" position="3.3,4.2">
15 |       <fixture density="3.3333333333333335" group="-1" shape="polygon" box="0.05,0.15"/>
16 |     </body>
17 |     <joint type="revolute" name="right_front_wheel_joint" bodyA="car" bodyB="right_front_wheel" localAnchorA="0.3,0.2" localAnchorB="0,0" limit="0,0"/>
18 |     <body name="left_rear_wheel" type="dynamic" position="2.7,3.7">
19 |       <fixture density="3.3333333333333335" group="-1" shape="polygon" box="0.05,0.15"/>
20 |     </body>
21 |     <joint type="revolute" name="left_rear_wheel_joint" bodyA="car" bodyB="left_rear_wheel" localAnchorA="-0.3,-0.3" localAnchorB="0,0" limit="0,0"/>
22 |     <body name="right_rear_wheel" type="dynamic" position="3.3,3.7">
23 |       <fixture density="3.3333333333333335" group="-1" shape="polygon" box="0.05,0.15"/>
24 |     </body>
25 |     <joint type="revolute" name="right_rear_wheel_joint" bodyA="car" bodyB="right_rear_wheel" localAnchorA="0.3,-0.3" localAnchorB="0,0" limit="0,0"/>
26 |     <control type="force" bodies="left_front_wheel,right_front_wheel" anchor="0,0" direction="0,1" ctrllimit="-10,10"/>
27 |     <state body="car" type="xvel"/>
28 |     <state body="car" type="yvel"/>
29 |     <state body="car" type="dist" to="goal"/>
30 |     <state body="car" type="angle" to="goal" transform="cos"/>
31 |     <state body="car" type="angle" to="goal" transform="sin"/>
32 |   </world>
33 | </box2d>
34 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/car_parking.xml.rb:
--------------------------------------------------------------------------------
 1 | car_height = 1.0
 2 | car_width = 0.6
 3 | car_mass = 1
 4 | car_density = car_mass / car_height / car_width
 5 | 
 6 | wheel_height = 0.3
 7 | wheel_width = 0.1
 8 | wheel_mass = 0.1
 9 | wheel_density = wheel_mass / wheel_height / wheel_width
10 | wheel_max_deg = 30
11 | 
12 | phantom_group = -1
13 | common = { group: phantom_group }
14 | 
15 | box2d {
16 |   world(timestep: 0.05, gravity: [0, 0]) {
17 |     body(name: :goal, type: :static, position: [0, 0]) {
18 |       fixture(common.merge(shape: :circle, radius: 1))
19 |     }
20 | 
21 |     car_pos = [3, 4]
22 |     body(name: :car, type: :dynamic, position: car_pos) {
23 |       rect(
24 |            box: [car_width / 2, car_height / 2],
25 |            density: car_density,
26 |            group: phantom_group,
27 |            )
28 |     }
29 |     [:left_front_wheel, :right_front_wheel, :left_rear_wheel, :right_rear_wheel].each do |wheel|
30 |       x_pos = car_width / 2
31 |       x_pos *= wheel =~ /left/ ? -1 : 1
32 |       y_pos = wheel =~ /front/ ? 0.2 : -0.3
33 |       body(name: wheel, type: :dynamic, position: [car_pos[0] + x_pos, car_pos[1] + y_pos]) {
34 |         rect(
35 |              box: [wheel_width / 2, wheel_height / 2],
36 |              density: wheel_density,
37 |              group: phantom_group,
38 |              )
39 |       }
40 |       # limit = wheel =~ /front/ ? [-wheel_max_deg, wheel_max_deg] : [0, 0]
41 |       limit = [0, 0]
42 |       joint(
43 |             type: :revolute,
44 |             name: "#{wheel}_joint",
45 |             bodyA: :car,
46 |             bodyB: wheel,
47 |             localAnchorA: [x_pos, y_pos],
48 |             localAnchorB: [0, 0],
49 |             limit: limit,
50 |             )
51 |     end
52 |     control(
53 |             type: :force,
54 |             bodies: [:left_front_wheel, :right_front_wheel],
55 |             anchor: [0, 0],
56 |             direction: [0, 1],
57 |             ctrllimit: [-10.N, 10.N],
58 |             )
59 |     state body: :car, type: :xvel
60 |     state body: :car, type: :yvel
61 |     state body: :car, type: :dist, to: :goal
62 |     state body: :car, type: :angle, to: :goal, transform: :cos
63 |     state body: :car, type: :angle, to: :goal, transform: :sin
64 |   }
65 | }
66 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/cartpole.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     from rllab.misc.mako_utils import compute_rect_vertices
 3 |     cart_width = 4.0 / (12 ** 0.5)
 4 |     cart_height = 3.0 / (12 ** 0.5)
 5 | 
 6 |     pole_width = 0.1
 7 |     pole_height = 1.0
 8 |     noise = opts.get("noise", False)
 9 |     if noise:
10 |         import numpy as np
11 |         pole_height += (np.random.rand()-0.5) * pole_height * 1
12 | 
13 |     cart_friction = 0.0005
14 |     pole_friction = 0.000002
15 | %>
16 | 
17 | <box2d>
18 |   <world timestep="0.05">
19 |     <body name="cart" type="dynamic" position="0,${cart_height/2}">
20 |       <fixture
21 |               density="1"
22 |               friction="${cart_friction}"
23 |               shape="polygon"
24 |               box="${cart_width/2},${cart_height/2}"
25 |       />
26 |     </body>
27 |     <body name="pole" type="dynamic" position="0,${cart_height}">
28 |       <fixture
29 |               density="1"
30 |               friction="${pole_friction}"
31 |               group="-1"
32 |               shape="polygon"
33 |               vertices="${compute_rect_vertices((0, 0), (0, pole_height), pole_width/2)}"
34 |       />
35 |     </body>
36 |     <body name="track" type="static" position="0,${cart_height/2}">
37 |       <fixture friction="${pole_friction}" group="-1" shape="polygon" box="100,0.1"/>
38 |     </body>
39 |     <joint type="revolute" name="pole_joint" bodyA="cart" bodyB="pole" anchor="0,${cart_height}"/>
40 |     <joint type="prismatic" name="track_cart" bodyA="track" bodyB="cart"/>
41 |     <state type="xpos" body="cart"/>
42 |     <state type="xvel" body="cart"/>
43 |     <state type="apos" body="pole"/>
44 |     <state type="avel" body="pole"/>
45 |     <control type="force" body="cart" anchor="0,0" direction="1,0" ctrllimit="-10,10"/>
46 |   </world>
47 | </box2d>
48 | 
49 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/double_pendulum.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     from rllab.misc.mako_utils import compute_rect_vertices
 3 |     link_len = opts['link_len']
 4 |     link_width = 0.1
 5 | %>
 6 | 
 7 | <box2d>
 8 |   <world timestep="0.01" velitr="20" positr="20">
 9 |     <body name="link1" type="dynamic" position="0,0">
10 |       <fixture
11 |               density="5.0"
12 |               group="-1"
13 |               shape="polygon"
14 |               vertices="${compute_rect_vertices([0,0], [0, -link_len], link_width/2)}"
15 |       />
16 |     </body>
17 |     <body name="link2" type="dynamic" position="0,${-link_len}">
18 |       <fixture
19 |               density="5.0"
20 |               group="-1"
21 |               shape="polygon"
22 |               vertices="${compute_rect_vertices([0,0], [0,-link_len], link_width/2)}"
23 |       />
24 |     </body>
25 |     <body name="track" type="static" position="0,-0.1">
26 |       <fixture group="-1" shape="polygon" box="100,0.1"/>
27 |     </body>
28 |     <joint type="revolute" name="link_joint_1" bodyA="track" bodyB="link1" anchor="0,0"/>
29 |     <joint type="revolute" name="link_joint_2" bodyA="link1" bodyB="link2" anchor="0,${-link_len}"/>
30 |     <!-- <control type="torque" joint="link_joint_1" ctrllimit="-3,3"/> -->
31 |     <control type="torque" joint="link_joint_2" ctrllimit="-50,50" />
32 |     <state type="apos" body="link1" transform="sin"/>
33 |     <state type="apos" body="link1" transform="cos"/>
34 |     <state type="avel" body="link1"/>
35 |     <state type="apos" body="link2" transform="sin"/>
36 |     <state type="apos" body="link2" transform="cos"/>
37 |     <state type="avel" body="link2"/>
38 |   </world>
39 | </box2d>
40 | 
41 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/models/mountain_car.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     noise = opts.get("noise", False)
 3 |     track_width = 4
 4 |     if noise:
 5 |         import numpy as np
 6 |         track_width += np.random.uniform(-1, 1)
 7 | %>
 8 | 
 9 | <box2d>
10 |   <world timestep="0.05">
11 |     <body name="cart" type="dynamic" position="0,0.05">
12 |       <fixture density="5" friction="0.0005" shape="polygon" box="0.2, 0.1"/>
13 |     </body>
14 |     <body name="track" type="static" position="0,1">
15 |       <fixture shape="sine_chain" height="1" width="${track_width}"/>
16 |     </body>
17 |     <state type="xpos" body="cart"/>
18 |     <state type="xvel" body="cart"/>
19 |     <control type="force" body="cart" anchor="0,0" direction="1,0" ctrllimit="-1,1"/>
20 |   </world>
21 | </box2d>
22 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/mountain_car_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pygame
 3 | from rllab.envs.box2d.parser import find_body
 4 | 
 5 | from rllab.core.serializable import Serializable
 6 | from rllab.envs.box2d.box2d_env import Box2DEnv
 7 | from rllab.misc import autoargs
 8 | from rllab.misc.overrides import overrides
 9 | 
10 | 
11 | class MountainCarEnv(Box2DEnv, Serializable):
12 | 
13 |     @autoargs.inherit(Box2DEnv.__init__)
14 |     @autoargs.arg("height_bonus_coeff", type=float,
15 |                   help="Height bonus added to each step's reward")
16 |     @autoargs.arg("goal_cart_pos", type=float,
17 |                   help="Goal horizontal position")
18 |     def __init__(self,
19 |                  height_bonus=1.,
20 |                  goal_cart_pos=0.6,
21 |                  *args, **kwargs):
22 |         super(MountainCarEnv, self).__init__(
23 |             self.model_path("mountain_car.xml.mako"),
24 |             *args, **kwargs
25 |         )
26 |         self.max_cart_pos = 2
27 |         self.goal_cart_pos = goal_cart_pos
28 |         self.height_bonus = height_bonus
29 |         self.cart = find_body(self.world, "cart")
30 |         Serializable.quick_init(self, locals())
31 | 
32 |     @overrides
33 |     def compute_reward(self, action):
34 |         yield
35 |         yield (-1 + self.height_bonus * self.cart.position[1])
36 | 
37 |     @overrides
38 |     def is_current_done(self):
39 |         return self.cart.position[0] >= self.goal_cart_pos \
40 |             or abs(self.cart.position[0]) >= self.max_cart_pos
41 | 
42 |     @overrides
43 |     def reset(self):
44 |         self._set_state(self.initial_state)
45 |         self._invalidate_state_caches()
46 |         bounds = np.array([
47 |             [-1],
48 |             [1],
49 |         ])
50 |         low, high = bounds
51 |         xvel = np.random.uniform(low, high)
52 |         self.cart.linearVelocity = (float(xvel), self.cart.linearVelocity[1])
53 |         return self.get_current_obs()
54 | 
55 |     @overrides
56 |     def action_from_keys(self, keys):
57 |         if keys[pygame.K_LEFT]:
58 |             return np.asarray([-1])
59 |         elif keys[pygame.K_RIGHT]:
60 |             return np.asarray([+1])
61 |         else:
62 |             return np.asarray([0])
63 | 
64 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/box2d/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from .xml_box2d import world_from_xml, find_body, find_joint
2 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/env_spec.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.serializable import Serializable
 2 | from rllab.spaces.base import Space
 3 | 
 4 | 
 5 | class EnvSpec(Serializable):
 6 | 
 7 |     def __init__(
 8 |             self,
 9 |             observation_space,
10 |             action_space):
11 |         """
12 |         :type observation_space: Space
13 |         :type action_space: Space
14 |         """
15 |         Serializable.quick_init(self, locals())
16 |         self._observation_space = observation_space
17 |         self._action_space = action_space
18 | 
19 |     @property
20 |     def observation_space(self):
21 |         return self._observation_space
22 | 
23 |     @property
24 |     def action_space(self):
25 |         return self._action_space
26 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/identification_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.serializable import Serializable
 2 | from rllab.envs.proxy_env import ProxyEnv
 3 | from rllab.misc.overrides import overrides
 4 | 
 5 | 
 6 | class IdentificationEnv(ProxyEnv, Serializable):
 7 | 
 8 |     def __init__(self, mdp_cls, mdp_args):
 9 |         Serializable.quick_init(self, locals())
10 |         self.mdp_cls = mdp_cls
11 |         self.mdp_args = dict(mdp_args)
12 |         self.mdp_args["template_args"] = dict(noise=True)
13 |         mdp = self.gen_mdp()
14 |         super(IdentificationEnv, self).__init__(mdp)
15 | 
16 |     def gen_mdp(self):
17 |         return self.mdp_cls(**self.mdp_args)
18 | 
19 |     @overrides
20 |     def reset(self):
21 |         if getattr(self, "_mdp", None):
22 |             if hasattr(self._wrapped_env, "release"):
23 |                 self._wrapped_env.release()
24 |         self._wrapped_env = self.gen_mdp()
25 |         return super(IdentificationEnv, self).reset()
26 | 
27 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/gather/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/ant_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.ant_env import AntEnv
3 | 
4 | 
5 | class AntGatherEnv(GatherEnv):
6 | 
7 |     MODEL_CLASS = AntEnv
8 |     ORI_IND = 6
9 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/point_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.point_env import PointEnv
3 | 
4 | 
5 | class PointGatherEnv(GatherEnv):
6 | 
7 |     MODEL_CLASS = PointEnv
8 |     ORI_IND = 2
9 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/gather/swimmer_gather_env.py:
--------------------------------------------------------------------------------
1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv
2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
3 | 
4 | 
5 | class SwimmerGatherEnv(GatherEnv):
6 | 
7 |     MODEL_CLASS = SwimmerEnv
8 |     ORI_IND = 2
9 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/half_cheetah_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.envs.base import Step
 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
 6 | from rllab.misc import logger
 7 | from rllab.misc.overrides import overrides
 8 | 
 9 | 
10 | def smooth_abs(x, param):
11 |     return np.sqrt(np.square(x) + np.square(param)) - param
12 | 
13 | 
14 | class HalfCheetahEnv(MujocoEnv, Serializable):
15 | 
16 |     FILE = 'half_cheetah.xml'
17 | 
18 |     def __init__(self, *args, **kwargs):
19 |         super(HalfCheetahEnv, self).__init__(*args, **kwargs)
20 |         Serializable.__init__(self, *args, **kwargs)
21 | 
22 |     def get_current_obs(self):
23 |         return np.concatenate([
24 |             self.model.data.qpos.flatten()[1:],
25 |             self.model.data.qvel.flat,
26 |             self.get_body_com("torso").flat,
27 |         ])
28 | 
29 |     def get_body_xmat(self, body_name):
30 |         idx = self.model.body_names.index(body_name)
31 |         return self.model.data.xmat[idx].reshape((3, 3))
32 | 
33 |     def get_body_com(self, body_name):
34 |         idx = self.model.body_names.index(body_name)
35 |         return self.model.data.com_subtree[idx]
36 | 
37 |     def step(self, action):
38 |         self.forward_dynamics(action)
39 |         next_obs = self.get_current_obs()
40 |         action = np.clip(action, *self.action_bounds)
41 |         ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(action))
42 |         run_cost = -1 * self.get_body_comvel("torso")[0]
43 |         cost = ctrl_cost + run_cost
44 |         reward = -cost
45 |         done = False
46 |         return Step(next_obs, reward, done)
47 | 
48 |     @overrides
49 |     def log_diagnostics(self, paths):
50 |         progs = [
51 |             path["observations"][-1][-3] - path["observations"][0][-3]
52 |             for path in paths
53 |         ]
54 |         logger.record_tabular('AverageForwardProgress', np.mean(progs))
55 |         logger.record_tabular('MaxForwardProgress', np.max(progs))
56 |         logger.record_tabular('MinForwardProgress', np.min(progs))
57 |         logger.record_tabular('StdForwardProgress', np.std(progs))
58 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/hill/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/ant_hill_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
 4 | from rllab.envs.mujoco.ant_env import AntEnv
 5 | from rllab.misc.overrides import overrides
 6 | import rllab.envs.mujoco.hill.terrain as terrain
 7 | from rllab.spaces import Box
 8 | 
 9 | class AntHillEnv(HillEnv):
10 | 
11 |     MODEL_CLASS = AntEnv
12 |     
13 |     @overrides
14 |     def _mod_hfield(self, hfield):
15 |         # clear a flat patch for the robot to start off from
16 |         return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([0.0, 0.0])))


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/half_cheetah_hill_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
 4 | from rllab.envs.mujoco.half_cheetah_env import HalfCheetahEnv
 5 | from rllab.misc.overrides import overrides
 6 | import rllab.envs.mujoco.hill.terrain as terrain
 7 | from rllab.spaces import Box
 8 | 
 9 | class HalfCheetahHillEnv(HillEnv):
10 | 
11 |     MODEL_CLASS = HalfCheetahEnv
12 |     
13 |     @overrides
14 |     def _mod_hfield(self, hfield):
15 |         # clear a flat patch for the robot to start off from
16 |         return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5])))


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/hopper_hill_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
 4 | from rllab.envs.mujoco.hopper_env import HopperEnv
 5 | from rllab.misc.overrides import overrides
 6 | import rllab.envs.mujoco.hill.terrain as terrain
 7 | from rllab.spaces import Box
 8 | 
 9 | class HopperHillEnv(HillEnv):
10 | 
11 |     MODEL_CLASS = HopperEnv
12 |     
13 |     @overrides
14 |     def _mod_hfield(self, hfield):
15 |         # clear a flat patch for the robot to start off from
16 |         return terrain.clear_patch(hfield, Box(np.array([-1.0, -1.0]), np.array([-0.5, -0.5])))


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/swimmer3d_hill_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
 4 | from rllab.envs.mujoco.swimmer3d_env import Swimmer3DEnv
 5 | from rllab.misc.overrides import overrides
 6 | import rllab.envs.mujoco.hill.terrain as terrain
 7 | from rllab.spaces import Box
 8 | 
 9 | class Swimmer3DHillEnv(HillEnv):
10 | 
11 |     MODEL_CLASS = Swimmer3DEnv
12 |     
13 |     @overrides
14 |     def _mod_hfield(self, hfield):
15 |         # clear a flat patch for the robot to start off from
16 |         return terrain.clear_patch(hfield, Box(np.array([-3.0, -1.5]), np.array([0.0, -0.5])))


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/hill/walker2d_hill_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.envs.mujoco.hill.hill_env import HillEnv
 4 | from rllab.envs.mujoco.walker2d_env import Walker2DEnv
 5 | from rllab.misc.overrides import overrides
 6 | import rllab.envs.mujoco.hill.terrain as terrain
 7 | from rllab.spaces import Box
 8 | 
 9 | class Walker2DHillEnv(HillEnv):
10 | 
11 |     MODEL_CLASS = Walker2DEnv
12 |     
13 |     @overrides
14 |     def _mod_hfield(self, hfield):
15 |         # clear a flat patch for the robot to start off from
16 |         return terrain.clear_patch(hfield, Box(np.array([-2.0, -2.0]), np.array([-0.5, -0.5])))


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/humanoid_env.py:
--------------------------------------------------------------------------------
1 | from .simple_humanoid_env import SimpleHumanoidEnv
2 | 
3 | 
4 | # Taken from Wojciech's code
5 | class HumanoidEnv(SimpleHumanoidEnv):
6 | 
7 |     FILE = 'humanoid.xml'
8 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/inverted_double_pendulum_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.envs.base import Step
 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
 6 | from rllab.misc import autoargs
 7 | from rllab.misc.overrides import overrides
 8 | 
 9 | 
10 | class InvertedDoublePendulumEnv(MujocoEnv, Serializable):
11 |     FILE = 'inverted_double_pendulum.xml.mako'
12 | 
13 |     @autoargs.arg("random_start", type=bool,
14 |                   help="Randomized starting position by adjusting the angles"
15 |                        "When this is false, the double pendulum started out"
16 |                        "in balanced position")
17 |     def __init__(
18 |             self,
19 |             *args, **kwargs):
20 |         self.random_start = kwargs.get("random_start", True)
21 |         super(InvertedDoublePendulumEnv, self).__init__(*args, **kwargs)
22 |         Serializable.quick_init(self, locals())
23 | 
24 |     @overrides
25 |     def get_current_obs(self):
26 |         return np.concatenate([
27 |             self.model.data.qpos[:1],  # cart x pos
28 |             np.sin(self.model.data.qpos[1:]),  # link angles
29 |             np.cos(self.model.data.qpos[1:]),
30 |             np.clip(self.model.data.qvel, -10, 10),
31 |             np.clip(self.model.data.qfrc_constraint, -10, 10)
32 |         ]).reshape(-1)
33 | 
34 |     @overrides
35 |     def step(self, action):
36 |         self.forward_dynamics(action)
37 |         next_obs = self.get_current_obs()
38 |         x, _, y = self.model.data.site_xpos[0]
39 |         dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
40 |         v1, v2 = self.model.data.qvel[1:3]
41 |         vel_penalty = 1e-3 * v1 ** 2 + 5e-3 * v2 ** 2
42 |         alive_bonus = 10
43 |         r = float(alive_bonus - dist_penalty - vel_penalty)
44 |         done = y <= 1
45 |         return Step(next_obs, r, done)
46 | 
47 |     @overrides
48 |     def reset_mujoco(self, init_state=None):
49 |         assert init_state is None
50 |         qpos = np.copy(self.init_qpos)
51 |         if self.random_start:
52 |             qpos[1] = (np.random.rand() - 0.5) * 40 / 180. * np.pi
53 |         self.model.data.qpos = qpos
54 |         self.model.data.qvel = self.init_qvel
55 |         self.model.data.qacc = self.init_qacc
56 |         self.model.data.ctrl = self.init_ctrl
57 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/envs/mujoco/maze/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/ant_maze_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
 2 | from rllab.envs.mujoco.ant_env import AntEnv
 3 | 
 4 | 
 5 | class AntMazeEnv(MazeEnv):
 6 | 
 7 |     MODEL_CLASS = AntEnv
 8 |     ORI_IND = 6
 9 | 
10 |     MAZE_HEIGHT = 2
11 |     MAZE_SIZE_SCALING = 3.0
12 | 
13 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/point_maze_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
 2 | from rllab.envs.mujoco.point_env import PointEnv
 3 | 
 4 | 
 5 | class PointMazeEnv(MazeEnv):
 6 | 
 7 |     MODEL_CLASS = PointEnv
 8 |     ORI_IND = 2
 9 | 
10 |     MAZE_HEIGHT = 2
11 |     MAZE_SIZE_SCALING = 3.0
12 | 
13 |     MANUAL_COLLISION = True
14 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/maze/swimmer_maze_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv
 2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv
 3 | 
 4 | 
 5 | class SwimmerMazeEnv(MazeEnv):
 6 | 
 7 |     MODEL_CLASS = SwimmerEnv
 8 |     ORI_IND = 2
 9 | 
10 |     MAZE_HEIGHT = 0.5
11 |     MAZE_SIZE_SCALING = 4
12 |     MAZE_MAKE_CONTACTS = True
13 | 
14 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/point_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.base import Step
 2 | from .mujoco_env import MujocoEnv
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.misc.overrides import overrides
 5 | import numpy as np
 6 | import math
 7 | from rllab.mujoco_py import glfw
 8 | 
 9 | 
10 | class PointEnv(MujocoEnv, Serializable):
11 | 
12 |     """
13 |     Use Left, Right, Up, Down, A (steer left), D (steer right)
14 |     """
15 | 
16 |     FILE = 'point.xml'
17 | 
18 |     def __init__(self, *args, **kwargs):
19 |         super(PointEnv, self).__init__(*args, **kwargs)
20 |         Serializable.quick_init(self, locals())
21 | 
22 |     def step(self, action):
23 |         qpos = np.copy(self.model.data.qpos)
24 |         qpos[2, 0] += action[1]
25 |         ori = qpos[2, 0]
26 |         # compute increment in each direction
27 |         dx = math.cos(ori) * action[0]
28 |         dy = math.sin(ori) * action[0]
29 |         # ensure that the robot is within reasonable range
30 |         qpos[0, 0] = np.clip(qpos[0, 0] + dx, -7, 7)
31 |         qpos[1, 0] = np.clip(qpos[1, 0] + dy, -7, 7)
32 |         self.model.data.qpos = qpos
33 |         self.model.forward()
34 |         next_obs = self.get_current_obs()
35 |         return Step(next_obs, 0, False)
36 | 
37 |     def get_xy(self):
38 |         qpos = self.model.data.qpos
39 |         return qpos[0, 0], qpos[1, 0]
40 | 
41 |     def set_xy(self, xy):
42 |         qpos = np.copy(self.model.data.qpos)
43 |         qpos[0, 0] = xy[0]
44 |         qpos[1, 0] = xy[1]
45 |         self.model.data.qpos = qpos
46 |         self.model.forward()
47 | 
48 |     @overrides
49 |     def action_from_key(self, key):
50 |         lb, ub = self.action_bounds
51 |         if key == glfw.KEY_LEFT:
52 |             return np.array([0, ub[0]*0.3])
53 |         elif key == glfw.KEY_RIGHT:
54 |             return np.array([0, lb[0]*0.3])
55 |         elif key == glfw.KEY_UP:
56 |             return np.array([ub[1], 0])
57 |         elif key == glfw.KEY_DOWN:
58 |             return np.array([lb[1], 0])
59 |         else:
60 |             return np.array([0, 0])
61 | 
62 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/swimmer3d_env.py:
--------------------------------------------------------------------------------
1 | from .swimmer_env import SwimmerEnv
2 | 
3 | class Swimmer3DEnv(SwimmerEnv):
4 |     FILE = 'swimmer3d.xml'


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/swimmer_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.envs.base import Step
 2 | from rllab.misc.overrides import overrides
 3 | from .mujoco_env import MujocoEnv
 4 | import numpy as np
 5 | from rllab.core.serializable import Serializable
 6 | from rllab.misc import logger
 7 | from rllab.misc import autoargs
 8 | 
 9 | 
10 | class SwimmerEnv(MujocoEnv, Serializable):
11 | 
12 |     FILE = 'swimmer.xml'
13 |     ORI_IND = 2
14 | 
15 |     @autoargs.arg('ctrl_cost_coeff', type=float,
16 |                   help='cost coefficient for controls')
17 |     def __init__(
18 |             self,
19 |             ctrl_cost_coeff=1e-2,
20 |             *args, **kwargs):
21 |         self.ctrl_cost_coeff = ctrl_cost_coeff
22 |         super(SwimmerEnv, self).__init__(*args, **kwargs)
23 |         Serializable.quick_init(self, locals())
24 | 
25 |     def get_current_obs(self):
26 |         return np.concatenate([
27 |             self.model.data.qpos.flat,
28 |             self.model.data.qvel.flat,
29 |             self.get_body_com("torso").flat,
30 |         ]).reshape(-1)
31 | 
32 |     def get_ori(self):
33 |         return self.model.data.qpos[self.__class__.ORI_IND]
34 | 
35 |     def step(self, action):
36 |         self.forward_dynamics(action)
37 |         next_obs = self.get_current_obs()
38 |         lb, ub = self.action_bounds
39 |         scaling = (ub - lb) * 0.5
40 |         ctrl_cost = 0.5 * self.ctrl_cost_coeff * np.sum(
41 |             np.square(action / scaling))
42 |         forward_reward = self.get_body_comvel("torso")[0]
43 |         reward = forward_reward - ctrl_cost
44 |         done = False
45 |         return Step(next_obs, reward, done)
46 | 
47 |     @overrides
48 |     def log_diagnostics(self, paths):
49 |         if len(paths) > 0:
50 |             progs = [
51 |                 path["observations"][-1][-3] - path["observations"][0][-3]
52 |                 for path in paths
53 |             ]
54 |             logger.record_tabular('AverageForwardProgress', np.mean(progs))
55 |             logger.record_tabular('MaxForwardProgress', np.max(progs))
56 |             logger.record_tabular('MinForwardProgress', np.min(progs))
57 |             logger.record_tabular('StdForwardProgress', np.std(progs))
58 |         else:
59 |             logger.record_tabular('AverageForwardProgress', np.nan)
60 |             logger.record_tabular('MaxForwardProgress', np.nan)
61 |             logger.record_tabular('MinForwardProgress', np.nan)
62 |             logger.record_tabular('StdForwardProgress', np.nan)
63 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/mujoco/walker2d_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.envs.base import Step
 5 | from rllab.envs.mujoco.mujoco_env import MujocoEnv
 6 | from rllab.misc import autoargs
 7 | from rllab.misc import logger
 8 | from rllab.misc.overrides import overrides
 9 | 
10 | 
11 | def smooth_abs(x, param):
12 |     return np.sqrt(np.square(x) + np.square(param)) - param
13 | 
14 | 
15 | class Walker2DEnv(MujocoEnv, Serializable):
16 | 
17 |     FILE = 'walker2d.xml'
18 | 
19 |     @autoargs.arg('ctrl_cost_coeff', type=float,
20 |                   help='cost coefficient for controls')
21 |     def __init__(
22 |             self,
23 |             ctrl_cost_coeff=1e-2,
24 |             *args, **kwargs):
25 |         self.ctrl_cost_coeff = ctrl_cost_coeff
26 |         super(Walker2DEnv, self).__init__(*args, **kwargs)
27 |         Serializable.quick_init(self, locals())
28 | 
29 |     def get_current_obs(self):
30 |         return np.concatenate([
31 |             self.model.data.qpos.flat,
32 |             self.model.data.qvel.flat,
33 |             self.get_body_com("torso").flat,
34 |         ])
35 | 
36 |     def step(self, action):
37 |         self.forward_dynamics(action)
38 |         next_obs = self.get_current_obs()
39 |         action = np.clip(action, *self.action_bounds)
40 |         lb, ub = self.action_bounds
41 |         scaling = (ub - lb) * 0.5
42 |         ctrl_cost = 0.5 * self.ctrl_cost_coeff * \
43 |             np.sum(np.square(action / scaling))
44 |         forward_reward = self.get_body_comvel("torso")[0]
45 |         reward = forward_reward - ctrl_cost
46 |         qpos = self.model.data.qpos
47 |         done = not (qpos[0] > 0.8 and qpos[0] < 2.0
48 |                     and qpos[2] > -1.0 and qpos[2] < 1.0)
49 |         return Step(next_obs, reward, done)
50 | 
51 |     @overrides
52 |     def log_diagnostics(self, paths):
53 |         progs = [
54 |             path["observations"][-1][-3] - path["observations"][0][-3]
55 |             for path in paths
56 |         ]
57 |         logger.record_tabular('AverageForwardProgress', np.mean(progs))
58 |         logger.record_tabular('MaxForwardProgress', np.max(progs))
59 |         logger.record_tabular('MinForwardProgress', np.min(progs))
60 |         logger.record_tabular('StdForwardProgress', np.std(progs))
61 | 
62 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/proxy_env.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.serializable import Serializable
 2 | from .base import Env
 3 | 
 4 | 
 5 | class ProxyEnv(Env, Serializable):
 6 |     def __init__(self, wrapped_env):
 7 |         Serializable.quick_init(self, locals())
 8 |         self._wrapped_env = wrapped_env
 9 | 
10 |     @property
11 |     def wrapped_env(self):
12 |         return self._wrapped_env
13 | 
14 |     def reset(self, *args, **kwargs):
15 |         return self._wrapped_env.reset(*args, **kwargs)
16 | 
17 |     @property
18 |     def action_space(self):
19 |         return self._wrapped_env.action_space
20 | 
21 |     @property
22 |     def observation_space(self):
23 |         return self._wrapped_env.observation_space
24 | 
25 |     def step(self, action):
26 |         return self._wrapped_env.step(action)
27 | 
28 |     def render(self, *args, **kwargs):
29 |         return self._wrapped_env.render(*args, **kwargs)
30 | 
31 |     def log_diagnostics(self, paths, *args, **kwargs):
32 |         self._wrapped_env.log_diagnostics(paths, *args, **kwargs)
33 | 
34 |     @property
35 |     def horizon(self):
36 |         return self._wrapped_env.horizon
37 | 
38 |     def terminate(self):
39 |         self._wrapped_env.terminate()
40 | 
41 |     def get_param_values(self):
42 |         return self._wrapped_env.get_param_values()
43 | 
44 |     def set_param_values(self, params):
45 |         self._wrapped_env.set_param_values(params)
46 | 


--------------------------------------------------------------------------------
/rllab/rllab/envs/sliding_mem_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.envs.base import Step
 5 | from rllab.envs.proxy_env import ProxyEnv
 6 | from rllab.misc import autoargs
 7 | from rllab.misc.overrides import overrides
 8 | from rllab.spaces import Box
 9 | 
10 | 
11 | class SlidingMemEnv(ProxyEnv, Serializable):
12 | 
13 |     def __init__(
14 |             self,
15 |             env,
16 |             n_steps=4,
17 |             axis=0,
18 |     ):
19 |         super().__init__(env)
20 |         Serializable.quick_init(self, locals())
21 |         self.n_steps = n_steps
22 |         self.axis = axis
23 |         self.buffer = None
24 | 
25 |     def reset_buffer(self, new_):
26 |         assert self.axis == 0
27 |         self.buffer = np.zeros(self.observation_space.shape, dtype=np.float32)
28 |         self.buffer[0:] = new_
29 | 
30 |     def add_to_buffer(self, new_):
31 |         assert self.axis == 0
32 |         self.buffer[1:] = self.buffer[:-1]
33 |         self.buffer[:1] = new_
34 | 
35 |     @property
36 |     def observation_space(self):
37 |         origin = self._wrapped_env.observation_space
38 |         return Box(
39 |             *[
40 |                 np.repeat(b, self.n_steps, axis=self.axis)
41 |                 for b in origin.bounds
42 |             ]
43 |         )
44 | 
45 |     @overrides
46 |     def reset(self):
47 |         obs = self._wrapped_env.reset()
48 |         self.reset_buffer(obs)
49 |         return self.buffer
50 | 
51 |     @overrides
52 |     def step(self, action):
53 |         next_obs, reward, done, info = self._wrapped_env.step(action)
54 |         self.add_to_buffer(next_obs)
55 |         return Step(self.buffer, reward, done, **info)
56 | 
57 | 


--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/exploration_strategies/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/base.py:
--------------------------------------------------------------------------------
1 | class ExplorationStrategy(object):
2 |     def get_action(self, t, observation, policy, **kwargs):
3 |         raise NotImplementedError
4 | 
5 |     def reset(self):
6 |         pass
7 | 


--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/gaussian_strategy.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.serializable import Serializable
 2 | from rllab.spaces.box import Box
 3 | from rllab.exploration_strategies.base import ExplorationStrategy
 4 | import numpy as np
 5 | 
 6 | 
 7 | class GaussianStrategy(ExplorationStrategy, Serializable):
 8 |     """
 9 |     This strategy adds Gaussian noise to the action taken by the deterministic policy.
10 |     """
11 | 
12 |     def __init__(self, env_spec, max_sigma=1.0, min_sigma=0.1, decay_period=1000000):
13 |         assert isinstance(env_spec.action_space, Box)
14 |         assert len(env_spec.action_space.shape) == 1
15 |         Serializable.quick_init(self, locals())
16 |         self._max_sigma = max_sigma
17 |         self._min_sigma = min_sigma
18 |         self._decay_period = decay_period
19 |         self._action_space = env_spec.action_space
20 | 
21 |     def get_action(self, t, observation, policy, **kwargs):
22 |         action, agent_info = policy.get_action(observation)
23 |         sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min(1.0, t * 1.0 / self._decay_period)
24 |         return np.clip(action + np.random.normal(size=len(action)) * sigma, self._action_space.low,
25 |                        self._action_space.high)
26 | 


--------------------------------------------------------------------------------
/rllab/rllab/exploration_strategies/ou_strategy.py:
--------------------------------------------------------------------------------
 1 | from rllab.misc.overrides import overrides
 2 | from rllab.misc.ext import AttrDict
 3 | from rllab.core.serializable import Serializable
 4 | from rllab.spaces.box import Box
 5 | from rllab.exploration_strategies.base import ExplorationStrategy
 6 | import numpy as np
 7 | import numpy.random as nr
 8 | 
 9 | 
10 | class OUStrategy(ExplorationStrategy, Serializable):
11 |     """
12 |     This strategy implements the Ornstein-Uhlenbeck process, which adds
13 |     time-correlated noise to the actions taken by the deterministic policy.
14 |     The OU process satisfies the following stochastic differential equation:
15 |     dxt = theta*(mu - xt)*dt + sigma*dWt
16 |     where Wt denotes the Wiener process
17 |     """
18 | 
19 |     def __init__(self, env_spec, mu=0, theta=0.15, sigma=0.3, **kwargs):
20 |         assert isinstance(env_spec.action_space, Box)
21 |         assert len(env_spec.action_space.shape) == 1
22 |         Serializable.quick_init(self, locals())
23 |         self.mu = mu
24 |         self.theta = theta
25 |         self.sigma = sigma
26 |         self.action_space = env_spec.action_space
27 |         self.state = np.ones(self.action_space.flat_dim) * self.mu
28 |         self.reset()
29 | 
30 |     def __getstate__(self):
31 |         d = Serializable.__getstate__(self)
32 |         d["state"] = self.state
33 |         return d
34 | 
35 |     def __setstate__(self, d):
36 |         Serializable.__setstate__(self, d)
37 |         self.state = d["state"]
38 | 
39 |     @overrides
40 |     def reset(self):
41 |         self.state = np.ones(self.action_space.flat_dim) * self.mu
42 | 
43 |     def evolve_state(self):
44 |         x = self.state
45 |         dx = self.theta * (self.mu - x) + self.sigma * nr.randn(len(x))
46 |         self.state = x + dx
47 |         return self.state
48 | 
49 |     @overrides
50 |     def get_action(self, t, observation, policy, **kwargs):
51 |         action, _ = policy.get_action(observation)
52 |         ou_state = self.evolve_state()
53 |         return np.clip(action + ou_state, self.action_space.low, self.action_space.high)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     ou = OUStrategy(env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1,))), mu=0, theta=0.15, sigma=0.3)
58 |     states = []
59 |     for i in range(1000):
60 |         states.append(ou.evolve_state()[0])
61 |     import matplotlib.pyplot as plt
62 | 
63 |     plt.plot(states)
64 |     plt.show()
65 | 


--------------------------------------------------------------------------------
/rllab/rllab/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/misc/mako_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def compute_rect_vertices(fromp, to, radius):
 3 |     x1, y1 = fromp
 4 |     x2, y2 = to
 5 |     if abs(y1 - y2) < 1e-6:
 6 |         dx = 0
 7 |         dy = radius
 8 |     else:
 9 |         dx = radius * 1.0 / (((x1 - x2) / (y1 - y2)) ** 2 + 1) ** 0.5
10 |         # equivalently dx = radius * (y2-y1).to_f / ((x2-x1)**2 + (y2-y1)**2)**0.5
11 |         dy = (radius**2 - dx**2) ** 0.5
12 |         dy *= -1 if (x1 - x2) * (y1 - y2) > 0 else 1
13 | 
14 |     return ";".join([",".join(map(str, r)) for r in [
15 |       [x1 + dx, y1 + dy],
16 |       [x2 + dx, y2 + dy],
17 |       [x2 - dx, y2 - dy],
18 |       [x1 - dx, y1 - dy],
19 |     ]])
20 | 
21 | 


--------------------------------------------------------------------------------
/rllab/rllab/misc/meta.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/misc/meta.py


--------------------------------------------------------------------------------
/rllab/rllab/misc/resolve.py:
--------------------------------------------------------------------------------
 1 | from pydoc import locate
 2 | import types
 3 | from rllab.misc.ext import iscanr
 4 | 
 5 | 
 6 | def classesinmodule(module):
 7 |     md = module.__dict__
 8 |     return [
 9 |         md[c] for c in md if (
10 |             isinstance(md[c], type) and md[c].__module__ == module.__name__
11 |         )
12 |     ]
13 | 
14 | 
15 | def locate_with_hint(class_path, prefix_hints=[]):
16 |     module_or_class = locate(class_path)
17 |     if module_or_class is None:
18 |         # for hint in iscanr(lambda x, y: x + "." + y, prefix_hints):
19 |         #     module_or_class = locate(hint + "." + class_path)
20 |         #     if module_or_class:
21 |         #         break
22 |         hint = ".".join(prefix_hints)
23 |         module_or_class = locate(hint + "." + class_path)
24 |     return module_or_class
25 |    
26 | 
27 | def load_class(class_path, superclass=None, prefix_hints=[]):
28 |     module_or_class = locate_with_hint(class_path, prefix_hints)
29 |     if module_or_class is None:
30 |         raise ValueError("Cannot find module or class under path %s" % class_path)
31 |     if type(module_or_class) == types.ModuleType:
32 |         if superclass:
33 |             classes = [x for x in classesinmodule(module_or_class) if issubclass(x, superclass)]
34 |         if len(classes) == 0:
35 |             if superclass:
36 |                 raise ValueError('Could not find any subclasses of %s defined in module %s' % (str(superclass), class_path))
37 |             else:
38 |                 raise ValueError('Could not find any classes defined in module %s' % (class_path))
39 |         elif len(classes) > 1:
40 |             if superclass:
41 |                 raise ValueError('Multiple subclasses of %s are defined in the module %s' % (str(superclass), class_path))
42 |             else:
43 |                 raise ValueError('Multiple classes are defined in the module %s' % (class_path))
44 |         else:
45 |             return classes[0]
46 |     elif isinstance(module_or_class, type):
47 |         if superclass is None or issubclass(module_or_class, superclass):
48 |             return module_or_class
49 |         else:
50 |             raise ValueError('The class %s is not a subclass of %s' % (str(module_or_class), str(superclass)))
51 |     else:
52 |         raise ValueError('Unsupported object: %s' % str(module_or_class))
53 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/.rvmrc:
--------------------------------------------------------------------------------
1 | rvm use 2.1.0@mjpy --create
2 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | gem 'pry'
4 | gem 'activesupport'
5 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     activesupport (4.1.8)
 5 |       i18n (~> 0.6, >= 0.6.9)
 6 |       json (~> 1.7, >= 1.7.7)
 7 |       minitest (~> 5.1)
 8 |       thread_safe (~> 0.1)
 9 |       tzinfo (~> 1.1)
10 |     coderay (1.1.0)
11 |     i18n (0.7.0)
12 |     json (1.8.1)
13 |     method_source (0.8.2)
14 |     minitest (5.5.1)
15 |     pry (0.10.1)
16 |       coderay (~> 1.1.0)
17 |       method_source (~> 0.8.1)
18 |       slop (~> 3.4)
19 |     slop (3.6.0)
20 |     thread_safe (0.3.4)
21 |     tzinfo (1.2.2)
22 |       thread_safe (~> 0.1)
23 | 
24 | PLATFORMS
25 |   ruby
26 | 
27 | DEPENDENCIES
28 |   activesupport
29 |   pry
30 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/__init__.py:
--------------------------------------------------------------------------------
1 | from .mjviewer import MjViewer
2 | from .mjcore import MjModel
3 | from .mjcore import register_license
4 | import os
5 | from .mjconstants import *
6 | 
7 | register_license(os.path.join(os.path.dirname(__file__),
8 |                               '../../vendor/mujoco/mjkey.txt'))
9 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/gen_binding.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | parent_path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P )
 3 | mujoco_path=$parent_path/../../vendor/mujoco
 4 | rm /tmp/code_gen_mujoco.h
 5 | cat $mujoco_path/mjdata.h >> /tmp/code_gen_mujoco.h && \
 6 |   cat $mujoco_path/mjmodel.h >> /tmp/code_gen_mujoco.h && \
 7 |   cat $mujoco_path/mjrender.h >> /tmp/code_gen_mujoco.h && \
 8 |   cat $mujoco_path/mjvisualize.h >> /tmp/code_gen_mujoco.h && \
 9 |   ruby $parent_path/codegen.rb /tmp/code_gen_mujoco.h $mujoco_path/mjxmacro.h > $parent_path/mjtypes.py
10 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/mjconstants.py:
--------------------------------------------------------------------------------
1 | MOUSE_ROTATE_V = 1
2 | MOUSE_ROTATE_H = 2
3 | MOUSE_MOVE_V = 3
4 | MOUSE_MOVE_H = 4
5 | MOUSE_ZOOM = 5
6 | 
7 | mjOBJ_BODY = 1
8 | 


--------------------------------------------------------------------------------
/rllab/rllab/mujoco_py/mjextra.py:
--------------------------------------------------------------------------------
1 | def append_objects(cur, extra):
2 |     for i in range(cur.ngeom, cur.ngeom + extra.ngeom):
3 |         cur.geoms[i] = extra.geoms[i - cur.ngeom]
4 |     cur.ngeom = cur.ngeom + extra.ngeom
5 |     if cur.ngeom > cur.maxgeom: 
6 |         raise ValueError("buffer limit exceeded!")
7 | 


--------------------------------------------------------------------------------
/rllab/rllab/optimizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/optimizers/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/optimizers/minibatch_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class BatchDataset(object):
 5 | 
 6 |     def __init__(self, inputs, batch_size, extra_inputs=None):
 7 |         self._inputs = [
 8 |             i for i in inputs
 9 |         ]
10 |         if extra_inputs is None:
11 |             extra_inputs = []
12 |         self._extra_inputs = extra_inputs
13 |         self._batch_size = batch_size
14 |         if batch_size is not None:
15 |             self._ids = np.arange(self._inputs[0].shape[0])
16 |             self.update()
17 | 
18 |     @property
19 |     def number_batches(self):
20 |         if self._batch_size is None:
21 |             return 1
22 |         return int(np.ceil(self._inputs[0].shape[0] * 1.0 / self._batch_size))
23 | 
24 |     def iterate(self, update=True):
25 |         if self._batch_size is None:
26 |             yield list(self._inputs) + list(self._extra_inputs)
27 |         else:
28 |             for itr in range(self.number_batches):
29 |                 batch_start = itr * self._batch_size
30 |                 batch_end = (itr + 1) * self._batch_size
31 |                 batch_ids = self._ids[batch_start:batch_end]
32 |                 batch = [d[batch_ids] for d in self._inputs]
33 |                 yield list(batch) + list(self._extra_inputs)
34 |             if update:
35 |                 self.update()
36 | 
37 |     def update(self):
38 |         np.random.shuffle(self._ids)
39 | 


--------------------------------------------------------------------------------
/rllab/rllab/plotter/__init__.py:
--------------------------------------------------------------------------------
1 | from .plotter import *
2 | 


--------------------------------------------------------------------------------
/rllab/rllab/plotter/plotter.py:
--------------------------------------------------------------------------------
 1 | import atexit
 2 | from queue import Empty
 3 | from multiprocessing import Process, Queue
 4 | from rllab.sampler.utils import rollout
 5 | import numpy as np
 6 | 
 7 | __all__ = [
 8 |     'init_worker',
 9 |     'init_plot',
10 |     'update_plot'
11 | ]
12 | 
13 | process = None
14 | queue = None
15 | 
16 | 
17 | def _worker_start():
18 |     env = None
19 |     policy = None
20 |     max_length = None
21 |     try:
22 |         while True:
23 |             msgs = {}
24 |             # Only fetch the last message of each type
25 |             while True:
26 |                 try:
27 |                     msg = queue.get_nowait()
28 |                     msgs[msg[0]] = msg[1:]
29 |                 except Empty:
30 |                     break
31 |             if 'stop' in msgs:
32 |                 break
33 |             elif 'update' in msgs:
34 |                 env, policy = msgs['update']
35 |                 # env.start_viewer()
36 |             elif 'demo' in msgs:
37 |                 param_values, max_length = msgs['demo']
38 |                 policy.set_param_values(param_values)
39 |                 rollout(env, policy, max_path_length=max_length, animated=True, speedup=5)
40 |             else:
41 |                 if max_length:
42 |                     rollout(env, policy, max_path_length=max_length, animated=True, speedup=5)
43 |     except KeyboardInterrupt:
44 |         pass
45 | 
46 | 
47 | def _shutdown_worker():
48 |     if process:
49 |         queue.put(['stop'])
50 |         queue.close()
51 |         process.join()
52 | 
53 | 
54 | def init_worker():
55 |     global process, queue
56 |     queue = Queue()
57 |     process = Process(target=_worker_start)
58 |     process.start()
59 |     atexit.register(_shutdown_worker)
60 | 
61 | 
62 | def init_plot(env, policy):
63 |     queue.put(['update', env, policy])
64 | 
65 | 
66 | def update_plot(policy, max_length=np.inf):
67 |     queue.put(['demo', policy.get_param_values(), max_length])
68 | 


--------------------------------------------------------------------------------
/rllab/rllab/policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/policies/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/policies/base.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.parameterized import Parameterized
 2 | 
 3 | 
 4 | class Policy(Parameterized):
 5 |     def __init__(self, env_spec):
 6 |         Parameterized.__init__(self)
 7 |         self._env_spec = env_spec
 8 | 
 9 |     # Should be implemented by all policies
10 | 
11 |     def get_action(self, observation):
12 |         raise NotImplementedError
13 | 
14 |     def reset(self):
15 |         pass
16 | 
17 |     @property
18 |     def observation_space(self):
19 |         return self._env_spec.observation_space
20 | 
21 |     @property
22 |     def action_space(self):
23 |         return self._env_spec.action_space
24 | 
25 |     @property
26 |     def recurrent(self):
27 |         """
28 |         Indicates whether the policy is recurrent.
29 |         :return:
30 |         """
31 |         return False
32 | 
33 |     def log_diagnostics(self, paths):
34 |         """
35 |         Log extra information per iteration based on the collected paths
36 |         """
37 |         pass
38 | 
39 |     @property
40 |     def state_info_keys(self):
41 |         """
42 |         Return keys for the information related to the policy's state when taking an action.
43 |         :return:
44 |         """
45 |         return list()
46 | 
47 |     def terminate(self):
48 |         """
49 |         Clean up operation
50 |         """
51 |         pass
52 | 
53 | 
54 | class StochasticPolicy(Policy):
55 | 
56 |     @property
57 |     def distribution(self):
58 |         """
59 |         :rtype Distribution
60 |         """
61 |         raise NotImplementedError
62 | 
63 |     def dist_info_sym(self, obs_var, state_info_vars):
64 |         """
65 |         Return the symbolic distribution information about the actions.
66 |         :param obs_var: symbolic variable for observations
67 |         :param state_info_vars: a dictionary whose values should contain information about the state of the policy at
68 |         the time it received the observation
69 |         :return:
70 |         """
71 |         raise NotImplementedError
72 | 
73 |     def dist_info(self, obs, state_infos):
74 |         """
75 |         Return the distribution information about the actions.
76 |         :param obs_var: observation values
77 |         :param state_info_vars: a dictionary whose values should contain information about the state of the policy at
78 |         the time it received the observation
79 |         :return:
80 |         """
81 |         raise NotImplementedError
82 | 


--------------------------------------------------------------------------------
/rllab/rllab/policies/uniform_control_policy.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.parameterized import Parameterized
 2 | from rllab.core.serializable import Serializable
 3 | from rllab.distributions.delta import Delta
 4 | from rllab.policies.base import Policy
 5 | from rllab.misc.overrides import overrides
 6 | 
 7 | 
 8 | class UniformControlPolicy(Policy):
 9 |     def __init__(
10 |             self,
11 |             env_spec,
12 |     ):
13 |         Serializable.quick_init(self, locals())
14 |         super(UniformControlPolicy, self).__init__(env_spec=env_spec)
15 | 
16 |     @overrides
17 |     def get_action(self, observation):
18 |         return self.action_space.sample(), dict()
19 | 
20 |     def get_params_internal(self, **tags):
21 |         return []
22 | 
23 |     def get_actions(self, observations):
24 |         return self.action_space.sample_n(len(observations)), dict()
25 | 
26 |     @property
27 |     def vectorized(self):
28 |         return True
29 | 
30 |     def reset(self, dones=None):
31 |         pass
32 | 
33 |     @property
34 |     def distribution(self):
35 |         # Just a placeholder
36 |         return Delta()
37 | 


--------------------------------------------------------------------------------
/rllab/rllab/q_functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/q_functions/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/q_functions/base.py:
--------------------------------------------------------------------------------
1 | from rllab.core.parameterized import Parameterized
2 | 
3 | 
4 | class QFunction(Parameterized):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/rllab/rllab/regressors/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 | 


--------------------------------------------------------------------------------
/rllab/rllab/regressors/product_regressor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | import numpy as np
 5 | from rllab.core.serializable import Serializable
 6 | 
 7 | 
 8 | class ProductRegressor(Serializable):
 9 |     """
10 |     A class for performing MLE regression by fitting a product distribution to the outputs. A separate regressor will
11 |     be trained for each individual input distribution.
12 |     """
13 | 
14 |     def __init__(self, regressors):
15 |         """
16 |         :param regressors: List of individual regressors
17 |         """
18 |         Serializable.quick_init(self, locals())
19 |         self.regressors = regressors
20 |         self.output_dims = [x.output_dim for x in regressors]
21 | 
22 |     def _split_ys(self, ys):
23 |         ys = np.asarray(ys)
24 |         split_ids = np.cumsum(self.output_dims)[:-1]
25 |         return np.split(ys, split_ids, axis=1)
26 | 
27 |     def fit(self, xs, ys):
28 |         for regressor, split_ys in zip(self.regressors, self._split_ys(ys)):
29 |             regressor.fit(xs, split_ys)
30 | 
31 |     def predict(self, xs):
32 |         return np.concatenate([
33 |             regressor.predict(xs) for regressor in self.regressors
34 |         ], axis=1)
35 | 
36 |     def sample_predict(self, xs):
37 |         return np.concatenate([
38 |             regressor.sample_predict(xs) for regressor in self.regressors
39 |         ], axis=1)
40 | 
41 |     def predict_log_likelihood(self, xs, ys):
42 |         return np.sum([
43 |                           regressor.predict_log_likelihood(xs, split_ys)
44 |                           for regressor, split_ys in zip(self.regressors, self._split_ys(ys))
45 |                           ], axis=0)
46 | 
47 |     def get_param_values(self, **tags):
48 |         return np.concatenate(
49 |             [regressor.get_param_values(**tags) for regressor in self.regressors]
50 |         )
51 | 
52 |     def set_param_values(self, flattened_params, **tags):
53 |         param_dims = [
54 |             np.prod(regressor.get_param_shapes(**tags))
55 |             for regressor in self.regressors
56 |             ]
57 |         split_ids = np.cumsum(param_dims)[:-1]
58 |         for regressor, split_param_values in zip(self.regressors, np.split(flattened_params, split_ids)):
59 |             regressor.set_param_values(split_param_values)
60 | 


--------------------------------------------------------------------------------
/rllab/rllab/sampler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/rllab/sampler/__init__.py


--------------------------------------------------------------------------------
/rllab/rllab/sampler/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rllab.misc import tensor_utils
 3 | import time
 4 | 
 5 | 
 6 | def rollout(env, agent, max_path_length=np.inf, animated=False, speedup=1, save_video=True, video_filename='sim_out.mp4', reset_args=None, policy_contexts=None):
 7 |     observations = []
 8 |     actions = []
 9 |     rewards = []
10 |     agent_infos = []
11 |     env_infos = []
12 |     images = []
13 |     o = env.reset(reset_args=reset_args, policy_contexts=policy_contexts)
14 |     agent.reset()
15 |     path_length = 0
16 |     if animated:
17 |         env.render()
18 |     while path_length < max_path_length:
19 |         a, agent_info = agent.get_action(o)
20 |         next_o, r, d, env_info = env.step(a)
21 |         observations.append(env.observation_space.flatten(o))
22 |         rewards.append(r)
23 |         actions.append(env.action_space.flatten(a))
24 |         agent_infos.append(agent_info)
25 |         env_infos.append(env_info)
26 |         path_length += 1
27 |         if d: # and not animated:  # TODO testing
28 |             break
29 |         o = next_o
30 |         if animated:
31 |             env.render()
32 |             timestep = 0.05
33 |             time.sleep(timestep / speedup)
34 |             if save_video:
35 |                 from PIL import Image
36 |                 image = env.wrapped_env.wrapped_env.get_viewer().get_image()
37 |                 pil_image = Image.frombytes('RGB', (image[1], image[2]), image[0])
38 |                 images.append(np.flipud(np.array(pil_image)))
39 | 
40 |     if animated:
41 |         if save_video and len(images) >= max_path_length:
42 |             import moviepy.editor as mpy
43 |             clip = mpy.ImageSequenceClip(images, fps=20*speedup)
44 |             if video_filename[-3:] == 'gif':
45 |                 clip.write_gif(video_filename, fps=20*speedup)
46 |             else:
47 |                 clip.write_videofile(video_filename, fps=20*speedup)
48 |         #return
49 | 
50 |     return dict(
51 |         observations=tensor_utils.stack_tensor_list(observations),
52 |         actions=tensor_utils.stack_tensor_list(actions),
53 |         rewards=tensor_utils.stack_tensor_list(rewards),
54 |         agent_infos=tensor_utils.stack_tensor_dict_list(agent_infos),
55 |         env_infos=tensor_utils.stack_tensor_dict_list(env_infos),
56 |     )


--------------------------------------------------------------------------------
/rllab/rllab/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .product import Product
2 | from .discrete import Discrete
3 | from .box import Box
4 | 
5 | __all__ = ["Product", "Discrete", "Box"]


--------------------------------------------------------------------------------
/rllab/rllab/spaces/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Space(object):
 5 |     """
 6 |     Provides a classification state spaces and action spaces,
 7 |     so you can write generic code that applies to any Environment.
 8 |     E.g. to choose a random action.
 9 |     """
10 | 
11 |     def sample(self, seed=0):
12 |         """
13 |         Uniformly randomly sample a random elemnt of this space
14 |         """
15 |         raise NotImplementedError
16 | 
17 |     def contains(self, x):
18 |         """
19 |         Return boolean specifying if x is a valid
20 |         member of this space
21 |         """
22 |         raise NotImplementedError
23 | 
24 |     def flatten(self, x):
25 |         raise NotImplementedError
26 | 
27 |     def unflatten(self, x):
28 |         raise NotImplementedError
29 | 
30 |     def flatten_n(self, xs):
31 |         raise NotImplementedError
32 | 
33 |     def unflatten_n(self, xs):
34 |         raise NotImplementedError
35 | 
36 |     @property
37 |     def flat_dim(self):
38 |         """
39 |         The dimension of the flattened vector of the tensor representation
40 |         """
41 |         raise NotImplementedError
42 | 
43 |     def new_tensor_variable(self, name, extra_dims):
44 |         """
45 |         Create a Theano tensor variable given the name and extra dimensions prepended
46 |         :param name: name of the variable
47 |         :param extra_dims: extra dimensions in the front
48 |         :return: the created tensor variable
49 |         """
50 |         raise NotImplementedError
51 | 


--------------------------------------------------------------------------------
/rllab/rllab/spaces/box.py:
--------------------------------------------------------------------------------
 1 | from rllab.core.serializable import Serializable
 2 | from .base import Space
 3 | import numpy as np
 4 | from rllab.misc import ext
 5 | import theano
 6 | 
 7 | 
 8 | class Box(Space):
 9 |     """
10 |     A box in R^n.
11 |     I.e., each coordinate is bounded.
12 |     """
13 | 
14 |     def __init__(self, low, high, shape=None):
15 |         """
16 |         Two kinds of valid input:
17 |             Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided
18 |             Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape
19 |         """
20 |         if shape is None:
21 |             assert low.shape == high.shape
22 |             self.low = low
23 |             self.high = high
24 |         else:
25 |             assert np.isscalar(low) and np.isscalar(high)
26 |             self.low = low + np.zeros(shape)
27 |             self.high = high + np.zeros(shape)
28 | 
29 |     def sample(self):
30 |         return np.random.uniform(low=self.low, high=self.high, size=self.low.shape)
31 | 
32 |     def contains(self, x):
33 |         return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all()
34 | 
35 |     @property
36 |     def shape(self):
37 |         return self.low.shape
38 | 
39 |     @property
40 |     def flat_dim(self):
41 |         return np.prod(self.low.shape)
42 | 
43 |     @property
44 |     def bounds(self):
45 |         return self.low, self.high
46 | 
47 |     def flatten(self, x):
48 |         return np.asarray(x).flatten()
49 | 
50 |     def unflatten(self, x):
51 |         return np.asarray(x).reshape(self.shape)
52 | 
53 |     def flatten_n(self, xs):
54 |         xs = np.asarray(xs)
55 |         return xs.reshape((xs.shape[0], -1))
56 | 
57 |     def unflatten_n(self, xs):
58 |         xs = np.asarray(xs)
59 |         return xs.reshape((xs.shape[0],) + self.shape)
60 | 
61 |     def __repr__(self):
62 |         return "Box" + str(self.shape)
63 | 
64 |     def __eq__(self, other):
65 |         return isinstance(other, Box) and np.allclose(self.low, other.low) and \
66 |                np.allclose(self.high, other.high)
67 | 
68 |     def __hash__(self):
69 |         return hash((self.low, self.high))
70 | 
71 |     def new_tensor_variable(self, name, extra_dims):
72 |         return ext.new_tensor(
73 |             name=name,
74 |             ndim=extra_dims+1,
75 |             dtype=theano.config.floatX
76 |         )
77 | 
78 | 


--------------------------------------------------------------------------------
/rllab/rllab/spaces/discrete.py:
--------------------------------------------------------------------------------
 1 | from .base import Space
 2 | import numpy as np
 3 | from rllab.misc import special
 4 | from rllab.misc import ext
 5 | 
 6 | 
 7 | class Discrete(Space):
 8 |     """
 9 |     {0,1,...,n-1}
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         self._n = n
14 | 
15 |     @property
16 |     def n(self):
17 |         return self._n
18 | 
19 |     def sample(self):
20 |         return np.random.randint(self.n)
21 | 
22 |     def contains(self, x):
23 |         x = np.asarray(x)
24 |         return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n
25 | 
26 |     def __repr__(self):
27 |         return "Discrete(%d)" % self.n
28 | 
29 |     def __eq__(self, other):
30 |         return self.n == other.n
31 | 
32 |     def flatten(self, x):
33 |         return special.to_onehot(x, self.n)
34 | 
35 |     def unflatten(self, x):
36 |         return special.from_onehot(x)
37 | 
38 |     def flatten_n(self, x):
39 |         return special.to_onehot_n(x, self.n)
40 | 
41 |     def unflatten_n(self, x):
42 |         return special.from_onehot_n(x)
43 | 
44 |     @property
45 |     def flat_dim(self):
46 |         return self.n
47 | 
48 |     def weighted_sample(self, weights):
49 |         return special.weighted_sample(weights, range(self.n))
50 | 
51 |     @property
52 |     def default_value(self):
53 |         return 0
54 | 
55 |     def new_tensor_variable(self, name, extra_dims):
56 |         if self.n <= 2 ** 8:
57 |             return ext.new_tensor(
58 |                 name=name,
59 |                 ndim=extra_dims+1,
60 |                 dtype='uint8'
61 |             )
62 |         elif self.n <= 2 ** 16:
63 |             return ext.new_tensor(
64 |                 name=name,
65 |                 ndim=extra_dims+1,
66 |                 dtype='uint16'
67 |             )
68 |         else:
69 |             return ext.new_tensor(
70 |                 name=name,
71 |                 ndim=extra_dims+1,
72 |                 dtype='uint32'
73 |             )
74 | 
75 |     def __eq__(self, other):
76 |         if not isinstance(other, Discrete):
77 |             return False
78 |         return self.n == other.n
79 | 
80 |     def __hash__(self):
81 |         return hash(self.n)


--------------------------------------------------------------------------------
/rllab/rllab/viskit/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/__init__.py


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/__init__.py


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/__init__.py


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/npg.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/algos/trpo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from sandbox.rocky.tf.algos.npo import NPO
 4 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 5 | 
 6 | 
 7 | class TRPO(NPO):
 8 |     """
 9 |     Trust Region Policy Optimization
10 |     """
11 | 
12 |     def __init__(
13 |             self,
14 |             optimizer=None,
15 |             optimizer_args=None,
16 |             **kwargs):
17 |         if optimizer is None:
18 |             if optimizer_args is None:
19 |                 optimizer_args = dict()
20 |             optimizer = ConjugateGradientOptimizer(**optimizer_args)
21 |         super(TRPO, self).__init__(optimizer=optimizer, **kwargs)
22 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/core/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/core/layers_powered.py:
--------------------------------------------------------------------------------
 1 | from sandbox.rocky.tf.core.parameterized import Parameterized
 2 | import sandbox.rocky.tf.core.layers as L
 3 | import itertools
 4 | 
 5 | 
 6 | class LayersPowered(Parameterized):
 7 | 
 8 |     def __init__(self, output_layers, input_layers=None):
 9 |         self._output_layers = output_layers
10 |         self._input_layers = input_layers
11 |         Parameterized.__init__(self)
12 | 
13 |     def get_params_internal(self, **tags):
14 |         layers = L.get_all_layers(self._output_layers, treat_as_input=self._input_layers)
15 |         params = itertools.chain.from_iterable(l.get_params(**tags) for l in layers)
16 |         return L.unique(params)
17 | 
18 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/base.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | class Distribution(object):
 6 |     @property
 7 |     def dim(self):
 8 |         raise NotImplementedError
 9 | 
10 |     def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
11 |         """
12 |         Compute the symbolic KL divergence of two distributions
13 |         """
14 |         raise NotImplementedError
15 | 
16 |     def kl(self, old_dist_info, new_dist_info):
17 |         """
18 |         Compute the KL divergence of two distributions
19 |         """
20 |         raise NotImplementedError
21 | 
22 |     def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
23 |         raise NotImplementedError
24 | 
25 |     def entropy(self, dist_info):
26 |         raise NotImplementedError
27 | 
28 |     def log_likelihood_sym(self, x_var, dist_info_vars):
29 |         raise NotImplementedError
30 | 
31 |     def log_likelihood(self, xs, dist_info):
32 |         raise NotImplementedError
33 | 
34 |     @property
35 |     def dist_info_specs(self):
36 |         raise NotImplementedError
37 | 
38 |     @property
39 |     def dist_info_keys(self):
40 |         return [k for k, _ in self.dist_info_specs]
41 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/bernoulli.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from .base import Distribution
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | 
 7 | TINY = 1e-8
 8 | 
 9 | 
10 | class Bernoulli(Distribution):
11 |     def __init__(self, dim):
12 |         self._dim = dim
13 | 
14 |     @property
15 |     def dim(self):
16 |         return self._dim
17 | 
18 |     def kl_sym(self, old_dist_info_vars, new_dist_info_vars):
19 |         old_p = old_dist_info_vars["p"]
20 |         new_p = new_dist_info_vars["p"]
21 |         kl = old_p * (tf.log(old_p + TINY) - tf.log(new_p + TINY)) + \
22 |              (1 - old_p) * (tf.log(1 - old_p + TINY) - tf.log(1 - new_p + TINY))
23 |         ndims = kl.get_shape().ndims
24 |         return tf.reduce_sum(kl, axis=ndims - 1)
25 | 
26 |     def kl(self, old_dist_info, new_dist_info):
27 |         old_p = old_dist_info["p"]
28 |         new_p = new_dist_info["p"]
29 |         kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) + \
30 |              (1 - old_p) * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
31 |         return np.sum(kl, axis=-1)
32 | 
33 |     def sample(self, dist_info):
34 |         p = np.asarray(dist_info["p"])
35 |         return np.cast['int'](np.random.uniform(low=0., high=1., size=p.shape) < p)
36 | 
37 |     def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars):
38 |         old_p = old_dist_info_vars["p"]
39 |         new_p = new_dist_info_vars["p"]
40 |         ndims = old_p.get_shape().ndims
41 |         return tf.reduce_prod(x_var * new_p / (old_p + TINY) + (1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
42 |                               axis=ndims - 1)
43 | 
44 |     def log_likelihood_sym(self, x_var, dist_info_vars):
45 |         p = dist_info_vars["p"]
46 |         ndims = p.get_shape().ndims
47 |         return tf.reduce_sum(x_var * tf.log(p + TINY) + (1 - x_var) * tf.log(1 - p + TINY), axis=ndims - 1)
48 | 
49 |     def log_likelihood(self, xs, dist_info):
50 |         p = dist_info["p"]
51 |         return np.sum(xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)
52 | 
53 |     def entropy(self, dist_info):
54 |         p = dist_info["p"]
55 |         return np.sum(- p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)
56 | 
57 |     @property
58 |     def dist_info_keys(self):
59 |         return ["p"]
60 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/distributions/recurrent_diagonal_gaussian.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | from sandbox.rocky.tf.distributions.diagonal_gaussian import DiagonalGaussian
5 | 
6 | RecurrentDiagonalGaussian = DiagonalGaussian
7 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/envs/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/envs/vec_env_executor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle as pickle
 3 | from sandbox.rocky.tf.misc import tensor_utils
 4 | 
 5 | 
 6 | class VecEnvExecutor(object):
 7 |     def __init__(self, envs, max_path_length):
 8 |         self.envs = envs
 9 |         self._action_space = envs[0].action_space
10 |         self._observation_space = envs[0].observation_space
11 |         self.ts = np.zeros(len(self.envs), dtype='int')
12 |         self.max_path_length = max_path_length
13 | 
14 |     def step(self, action_n, reset_args=None, policy_contexts=None):
15 |         if reset_args is None:
16 |             policy_contexts = [None]*len(self.envs)
17 |             reset_args = [None]*len(self.envs)
18 |         all_results = [env.step(a) for (a, env) in zip(action_n, self.envs)]
19 |         obs, rewards, dones, env_infos = list(map(list, list(zip(*all_results))))
20 |         dones = np.asarray(dones)
21 |         rewards = np.asarray(rewards)
22 |         self.ts += 1
23 |         if self.max_path_length is not None:
24 |             dones[self.ts >= self.max_path_length] = True
25 |         for (i, done) in enumerate(dones):
26 |             if done:
27 |                 obs[i] = self.envs[i].reset(reset_args=reset_args[i], policy_contexts=policy_contexts[i])
28 |                 self.ts[i] = 0
29 |         return obs, rewards, dones, tensor_utils.stack_tensor_dict_list(env_infos)
30 | 
31 |     def reset(self, reset_args=None, policy_contexts=None):
32 |         if reset_args is not None:
33 |             assert policy_contexts is not None
34 |             results = [env.reset(reset_args=arg, policy_contexts=policy_context) for env, arg, policy_context in zip(self.envs, reset_args, policy_contexts)]
35 |         else:
36 |             results = [env.reset() for env in self.envs]
37 |         self.ts[:] = 0
38 |         return results
39 | 
40 |     @property
41 |     def num_envs(self):
42 |         return len(self.envs)
43 | 
44 |     @property
45 |     def action_space(self):
46 |         return self._action_space
47 | 
48 |     @property
49 |     def observation_space(self):
50 |         return self._observation_space
51 | 
52 |     def terminate(self):
53 |         pass


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/trpo_cartpole.py:
--------------------------------------------------------------------------------
 1 | from sandbox.rocky.tf.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer
 6 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import FiniteDifferenceHvp
 7 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
 8 | from sandbox.rocky.tf.envs.base import TfEnv
 9 | from rllab.misc.instrument import stub, run_experiment_lite
10 | 
11 | env = TfEnv(normalize(CartpoleEnv()))
12 | 
13 | policy = GaussianMLPPolicy(
14 |     name="policy",
15 |     env_spec=env.spec,
16 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
17 |     hidden_sizes=(32, 32)
18 | )
19 | 
20 | baseline = LinearFeatureBaseline(env_spec=env.spec)
21 | 
22 | algo = TRPO(
23 |     env=env,
24 |     policy=policy,
25 |     baseline=baseline,
26 |     batch_size=4000,
27 |     max_path_length=100,
28 |     n_itr=40,
29 |     discount=0.99,
30 |     step_size=0.01,
31 |     # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
32 | 
33 | )
34 | algo.train()
35 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/trpo_cartpole_recurrent.py:
--------------------------------------------------------------------------------
 1 | from sandbox.rocky.tf.algos.trpo import TRPO
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from sandbox.rocky.tf.policies.gaussian_gru_policy import GaussianGRUPolicy
 6 | from sandbox.rocky.tf.policies.gaussian_lstm_policy import GaussianLSTMPolicy
 7 | from sandbox.rocky.tf.envs.base import TfEnv
 8 | import sandbox.rocky.tf.core.layers as L
 9 | from sandbox.rocky.tf.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer, FiniteDifferenceHvp
10 | from rllab.misc.instrument import stub, run_experiment_lite
11 | 
12 | env = TfEnv(normalize(CartpoleEnv()))
13 | 
14 | policy = GaussianLSTMPolicy(
15 |     name="policy",
16 |     env_spec=env.spec,
17 |     lstm_layer_cls=L.TfBasicLSTMLayer,
18 |     # gru_layer_cls=L.GRULayer,
19 | )
20 | 
21 | baseline = LinearFeatureBaseline(env_spec=env.spec)
22 | 
23 | algo = TRPO(
24 |     env=env,
25 |     policy=policy,
26 |     baseline=baseline,
27 |     batch_size=4000,
28 |     max_path_length=100,
29 |     n_itr=10,
30 |     discount=0.99,
31 |     step_size=0.01,
32 |     optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5))
33 | )
34 | algo.train()
35 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/launchers/vpg_cartpole.py:
--------------------------------------------------------------------------------
 1 | from sandbox.rocky.tf.algos.vpg import VPG
 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 4 | from rllab.envs.normalized_env import normalize
 5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | from sandbox.rocky.tf.envs.base import TfEnv
 7 | from rllab.misc.instrument import stub, run_experiment_lite
 8 | 
 9 | env = TfEnv(normalize(CartpoleEnv()))
10 | 
11 | policy = GaussianMLPPolicy(
12 |     name="policy",
13 |     env_spec=env.spec,
14 |     # The neural network policy should have two hidden layers, each with 32 hidden units.
15 |     hidden_sizes=(32, 32)
16 | )
17 | 
18 | baseline = LinearFeatureBaseline(env_spec=env.spec)
19 | 
20 | algo = VPG(
21 |     env=env,
22 |     policy=policy,
23 |     baseline=baseline,
24 |     batch_size=10000,
25 |     max_path_length=100,
26 |     n_itr=40,
27 |     discount=0.99,
28 |     optimizer_args=dict(
29 |         tf_optimizer_args=dict(
30 |             learning_rate=0.01,
31 |         )
32 |     )
33 | )
34 | algo.train()
35 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/optimizers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/policies/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/policies/uniform_control_policy.py:
--------------------------------------------------------------------------------
 1 | from sandbox.rocky.tf.policies.base import Policy
 2 | from rllab.core.serializable import Serializable
 3 | 
 4 | 
 5 | class UniformControlPolicy(Policy, Serializable):
 6 |     def __init__(
 7 |             self,
 8 |             env_spec,
 9 |     ):
10 |         Serializable.quick_init(self, locals())
11 |         super(UniformControlPolicy, self).__init__(env_spec=env_spec)
12 | 
13 |     @property
14 |     def vectorized(self):
15 |         return True
16 | 
17 |     def get_action(self, observation):
18 |         return self.action_space.sample(), dict()
19 | 
20 |     def get_actions(self, observations):
21 |         return self.action_space.sample_n(len(observations)), dict()
22 | 
23 |     def get_params_internal(self, **tags):
24 |         return []
25 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/q_functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/sandbox/rocky/tf/q_functions/__init__.py


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/q_functions/base.py:
--------------------------------------------------------------------------------
1 | from sandbox.rocky.tf.core.parameterized import Parameterized
2 | 
3 | class QFunction(Parameterized):
4 |     pass
5 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/regressors/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from .product import Product
2 | from .discrete import Discrete
3 | from .box import Box
4 | 
5 | __all__ = ["Product", "Discrete", "Box"]
6 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/box.py:
--------------------------------------------------------------------------------
 1 | from rllab.spaces.box import Box as TheanoBox
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class Box(TheanoBox):
 6 |     def new_tensor_variable(self, name, extra_dims, flatten=True):
 7 |         if flatten:
 8 |             return tf.placeholder(tf.float32, shape=[None] * extra_dims + [self.flat_dim], name=name)
 9 |         return tf.placeholder(tf.float32, shape=[None] * extra_dims + list(self.shape), name=name)
10 | 
11 |     @property
12 |     def dtype(self):
13 |         return tf.float32
14 | 


--------------------------------------------------------------------------------
/rllab/sandbox/rocky/tf/spaces/discrete.py:
--------------------------------------------------------------------------------
 1 | from rllab.spaces.base import Space
 2 | import numpy as np
 3 | from rllab.misc import special
 4 | from rllab.misc import ext
 5 | import tensorflow as tf
 6 | 
 7 | 
 8 | class Discrete(Space):
 9 |     """
10 |     {0,1,...,n-1}
11 |     """
12 | 
13 |     def __init__(self, n):
14 |         self._n = n
15 | 
16 |     @property
17 |     def n(self):
18 |         return self._n
19 | 
20 |     def sample(self):
21 |         return np.random.randint(self.n)
22 | 
23 |     def sample_n(self, n):
24 |         return np.random.randint(low=0, high=self.n, size=n)
25 | 
26 |     def contains(self, x):
27 |         x = np.asarray(x)
28 |         return x.shape == () and x.dtype.kind == 'i' and x >= 0 and x < self.n
29 | 
30 |     def __repr__(self):
31 |         return "Discrete(%d)" % self.n
32 | 
33 |     def __eq__(self, other):
34 |         return self.n == other.n
35 | 
36 |     def flatten(self, x):
37 |         return special.to_onehot(x, self.n)
38 | 
39 |     def unflatten(self, x):
40 |         return special.from_onehot(x)
41 | 
42 |     def flatten_n(self, x):
43 |         return special.to_onehot_n(x, self.n)
44 | 
45 |     def unflatten_n(self, x):
46 |         return special.from_onehot_n(x)
47 | 
48 |     @property
49 |     def default_value(self):
50 |         return 0
51 | 
52 |     @property
53 |     def flat_dim(self):
54 |         return self.n
55 | 
56 |     def weighted_sample(self, weights):
57 |         return special.weighted_sample(weights, range(self.n))
58 | 
59 |     def new_tensor_variable(self, name, extra_dims):
60 |         # needed for safe conversion to float32
61 |         return tf.placeholder(dtype=tf.uint8, shape=[None] * extra_dims + [self.flat_dim], name=name)
62 | 
63 |     @property
64 |     def dtype(self):
65 |         return tf.uint8
66 | 
67 |     def __eq__(self, other):
68 |         if not isinstance(other, Discrete):
69 |             return False
70 |         return self.n == other.n
71 | 
72 |     def __hash__(self):
73 |         return hash(self.n)
74 | 
75 | 


--------------------------------------------------------------------------------
/rllab/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/scripts/__init__.py


--------------------------------------------------------------------------------
/rllab/scripts/resume_training.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | from rllab.sampler.utils import rollout
 5 | from rllab.algos.batch_polopt import BatchPolopt
 6 | import argparse
 7 | import joblib
 8 | import uuid
 9 | import os
10 | import random
11 | import numpy as np
12 | import json
13 | import subprocess
14 | from rllab.misc import logger
15 | from rllab.misc.instrument import to_local_command
16 | 
17 | filename = str(uuid.uuid4())
18 | 
19 | if __name__ == "__main__":
20 | 
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('file', type=str,
23 |                         help='path to the snapshot file')
24 |     parser.add_argument('--log_dir', type=str, default=None,
25 |                         help='path to the new log directory')
26 |     # Look for params.json file
27 |     args = parser.parse_args()
28 |     parent_dir = os.path.dirname(os.path.realpath(args.file))
29 |     json_file_path = os.path.join(parent_dir, "params.json")
30 |     logger.log("Looking for params.json at %s..." % json_file_path)
31 |     try:
32 |         with open(json_file_path, "r") as f:
33 |             params = json.load(f)
34 |         # exclude certain parameters
35 |         excluded = ['json_args']
36 |         for k in excluded:
37 |             if k in params:
38 |                 del params[k]
39 |         for k, v in list(params.items()):
40 |             if v is None:
41 |                 del params[k]
42 |         if args.log_dir is not None:
43 |             params['log_dir'] = args.log_dir
44 |         params['resume_from'] = args.file
45 |         command = to_local_command(params, script='scripts/run_experiment_lite.py')
46 |         print(command)
47 |         try:
48 |             subprocess.call(command, shell=True, env=os.environ)
49 |         except Exception as e:
50 |             print(e)
51 |             if isinstance(e, KeyboardInterrupt):
52 |                 raise
53 |     except IOError as e:
54 |         logger.log("Failed to find json file. Continuing in non-stub mode...")
55 |         data = joblib.load(args.file)
56 |         assert 'algo' in data
57 |         algo = data['algo']
58 |         assert isinstance(algo, BatchPolopt)
59 |         algo.train()
60 | 


--------------------------------------------------------------------------------
/rllab/scripts/setup_linux.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Make sure that conda is available
 3 | 
 4 | hash conda 2>/dev/null || {
 5 |     echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer."
 6 |     exit 0
 7 | }
 8 | 
 9 | echo "Installing system dependencies"
10 | echo "You will probably be asked for your sudo password."
11 | sudo apt-get update
12 | sudo apt-get install -y python-pip python-dev swig cmake build-essential zlib1g-dev
13 | sudo apt-get build-dep -y python-pygame
14 | sudo apt-get build-dep -y python-scipy
15 | 
16 | # Make sure that we're under the directory of the project
17 | cd "$(dirname "$0")/.."
18 | 
19 | echo "Creating conda environment..."
20 | conda env create -f environment.yml
21 | conda env update
22 | 
23 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab."
24 | 


--------------------------------------------------------------------------------
/rllab/scripts/setup_mujoco.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ "$(uname)" == "Darwin" ]; then
 4 |     mujoco_file="libmujoco131.dylib"
 5 |     glfw_file="libglfw.3.dylib"
 6 |     zip_file="mjpro131_osx.zip"
 7 |     mktemp_cmd="mktemp -d /tmp/mujoco"
 8 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then
 9 |     mujoco_file="libmujoco131.so"
10 |     glfw_file="libglfw.so.3"
11 |     zip_file="mjpro131_linux.zip"
12 |     mktemp_cmd="mktemp -d"
13 | fi
14 | 
15 | if [ ! -f vendor/mujoco/$mujoco_file ]; then
16 |     read -e -p "Please enter the path to the mujoco zip file [$zip_file]:" path
17 |     path=${path:-$zip_file} 
18 |     eval path=\"$path\"
19 |     if [ ! -f $path ]; then
20 |         echo "No file found at $path"
21 |         exit 0
22 |     fi
23 |     rm -r /tmp/mujoco
24 |     dir=`$mktemp_cmd`
25 |     unzip $path -d $dir
26 |     if [ ! -f $dir/mjpro131/bin/$mujoco_file ]; then
27 |         echo "mjpro/$mujoco_file not found. Make sure you have the correct file (most likely named $zip_file)"
28 |         exit 0
29 |     fi
30 |     if [ ! -f $dir/mjpro131/bin/$glfw_file ]; then
31 |         echo "mjpro/$glfw_file not found. Make sure you have the correct file (most likely named $zip_file)"
32 |         exit 0
33 |     fi
34 | 
35 |     mkdir -p vendor/mujoco
36 |     cp $dir/mjpro131/bin/$mujoco_file vendor/mujoco/
37 |     cp $dir/mjpro131/bin/$glfw_file vendor/mujoco/
38 | fi
39 | 
40 | if [ ! -f vendor/mujoco/mjkey.txt ]; then
41 |     read -e -p "Please enter the path to the mujoco license file [mjkey.txt]:" path
42 |     path=${path:-mjkey.txt}
43 |     eval path=$path
44 |     if [ ! -f $path ]; then
45 |         echo "No file found at $path"
46 |         exit 0
47 |     fi
48 |     cp $path vendor/mujoco/mjkey.txt
49 | fi
50 | 
51 | echo "Mujoco has been set up!"
52 | 


--------------------------------------------------------------------------------
/rllab/scripts/setup_osx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Make sure that pip is available
 3 | hash brew 2>/dev/null || {
 4 |     echo "Please install homebrew before continuing. You can use the following command to install:"
 5 |     echo "/usr/bin/ruby -e \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)\""
 6 |     exit 0
 7 | }
 8 | 
 9 | hash conda 2>/dev/null || {
10 |     echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer."
11 |     exit 0
12 | }
13 | 
14 | 
15 | echo "Installing system dependencies"
16 | echo "You will probably be asked for your sudo password."
17 | 
18 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi
19 | 
20 | # Make sure that we're under the directory of the project
21 | cd "$(dirname "$0")/.."
22 | echo "Creating conda environment..."
23 | conda env create -f environment.yml
24 | conda env update
25 | 
26 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab."
27 | 


--------------------------------------------------------------------------------
/rllab/scripts/sim_policy.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import joblib
 4 | import tensorflow as tf
 5 | 
 6 | from rllab.misc.console import query_yes_no
 7 | from rllab.sampler.utils import rollout
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument('file', type=str,
13 |                         help='path to the snapshot file')
14 |     parser.add_argument('--max_path_length', type=int, default=1000,
15 |                         help='Max length of rollout')
16 |     parser.add_argument('--speedup', type=float, default=1,
17 |                         help='Speedup')
18 |     args = parser.parse_args()
19 | 
20 |     # If the snapshot file use tensorflow, do:
21 |     # import tensorflow as tf
22 |     # with tf.Session():
23 |     #     [rest of the code]
24 |     with tf.Session() as sess:
25 |         data = joblib.load(args.file)
26 |         policy = data['policy']
27 |         env = data['env']
28 |         while True:
29 |             path = rollout(env, policy, max_path_length=args.max_path_length,
30 |                            animated=True, speedup=args.speedup)
31 |             if not query_yes_no('Continue simulation?'):
32 |                 break
33 | 


--------------------------------------------------------------------------------
/rllab/scripts/submit_gym.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import argparse
 4 | import os
 5 | import os.path as osp
 6 | import gym
 7 | from rllab.viskit.core import load_params
 8 | 
 9 | if __name__ == "__main__":
10 |     # rl_gym.api_key = 'g8JOpnNVmcjMShBiFtyji2VWX3P2uCzc'
11 |     if 'OPENAI_GYM_API_KEY' not in os.environ:
12 |         raise ValueError("OpenAi Gym API key not configured. Please register an account on https://gym.openai.com and"
13 |                          " set the OPENAI_GYM_API_KEY environment variable, and try the script again.")
14 | 
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('log_dir', type=str,
17 |                         help='path to the logging directory')
18 |     parser.add_argument('--algorithm_id', type=str, default=None, help='Algorithm ID')
19 |     args = parser.parse_args()
20 |     snapshot_dir = osp.abspath(osp.join(args.log_dir, ".."))
21 |     params_file_path = osp.join(snapshot_dir, "params.json")
22 |     gym.upload(args.log_dir, algorithm_id=args.algorithm_id)
23 | 


--------------------------------------------------------------------------------
/rllab/scripts/sync_s3.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('.')
 3 | from rllab import config
 4 | import os
 5 | import argparse
 6 | import ast
 7 | 
 8 | if __name__ == "__main__":
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('folder', type=str, default=None, nargs='?')
11 |     parser.add_argument('--dry', action='store_true', default=False)
12 |     parser.add_argument('--bare', action='store_true', default=False)
13 |     args = parser.parse_args()
14 |     remote_dir = config.AWS_S3_PATH
15 |     local_dir = os.path.join(config.LOG_DIR, "s3")
16 |     if args.folder:
17 |         remote_dir = os.path.join(remote_dir, args.folder)
18 |         local_dir = os.path.join(local_dir, args.folder)
19 |     if args.bare:
20 |         command = ("""
21 |             aws s3 sync {remote_dir} {local_dir} --exclude '*' --include '*.csv' --include '*.json' --content-type "UTF-8"
22 |         """.format(local_dir=local_dir, remote_dir=remote_dir))
23 |     else:
24 |         command = ("""
25 |             aws s3 sync {remote_dir} {local_dir} --exclude '*stdout.log' --exclude '*stdouterr.log' --content-type "UTF-8"
26 |         """.format(local_dir=local_dir, remote_dir=remote_dir))
27 |     if args.dry:
28 |         print(command)
29 |     else:
30 |         os.system(command)


--------------------------------------------------------------------------------
/rllab/setup.py:
--------------------------------------------------------------------------------
 1 | # setup.py
 2 | from setuptools import setup,find_packages
 3 | 
 4 | setup(
 5 |     name='rllab',
 6 |     packages=[package for package in find_packages()
 7 |                 if package.startswith('rllab')],
 8 |     version='0.1.0',
 9 | )
10 | 


--------------------------------------------------------------------------------
/rllab/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/__init__.py


--------------------------------------------------------------------------------
/rllab/tests/algos/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/tests/algos/test_trpo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from rllab.envs.base import Env, Step
 4 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 5 | from rllab.baselines.zero_baseline import ZeroBaseline
 6 | from rllab.algos.trpo import TRPO
 7 | from rllab.spaces.box import Box
 8 | import lasagne.nonlinearities
 9 | import numpy as np
10 | import theano.tensor as TT
11 | 
12 | 
13 | class DummyEnv(Env):
14 |     @property
15 |     def observation_space(self):
16 |         return Box(low=-np.inf, high=np.inf, shape=(1,))
17 | 
18 |     @property
19 |     def action_space(self):
20 |         return Box(low=-5.0, high=5.0, shape=(1,))
21 | 
22 |     def reset(self):
23 |         return np.zeros(1)
24 | 
25 |     def step(self, action):
26 |         return Step(observation=np.zeros(1), reward=np.random.normal(), done=True)
27 | 
28 | 
29 | def naive_relu(x):
30 |     return TT.max(x, 0)
31 | 
32 | 
33 | def test_trpo_relu_nan():
34 |     env = DummyEnv()
35 |     policy = GaussianMLPPolicy(
36 |         env_spec=env.spec,
37 |         hidden_nonlinearity=naive_relu,
38 |         hidden_sizes=(1,))
39 |     baseline = ZeroBaseline(env_spec=env.spec)
40 |     algo = TRPO(
41 |         env=env, policy=policy, baseline=baseline, n_itr=1, batch_size=1000, max_path_length=100,
42 |         step_size=0.001
43 |     )
44 |     algo.train()
45 |     assert not np.isnan(np.sum(policy.get_param_values()))
46 | 
47 | 
48 | def test_trpo_deterministic_nan():
49 |     env = DummyEnv()
50 |     policy = GaussianMLPPolicy(
51 |         env_spec=env.spec,
52 |         hidden_sizes=(1,))
53 |     policy._l_log_std.param.set_value([np.float32(np.log(1e-8))])
54 |     baseline = ZeroBaseline(env_spec=env.spec)
55 |     algo = TRPO(
56 |         env=env, policy=policy, baseline=baseline, n_itr=10, batch_size=1000, max_path_length=100,
57 |         step_size=0.01
58 |     )
59 |     algo.train()
60 |     assert not np.isnan(np.sum(policy.get_param_values()))
61 | 


--------------------------------------------------------------------------------
/rllab/tests/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ermongroup/MetaIRL/455782cbb79e1b635ca678e534000d150bbb98cb/rllab/tests/envs/__init__.py


--------------------------------------------------------------------------------
/rllab/tests/envs/test_maze_env.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from rllab.envs.mujoco.maze.maze_env_utils import line_intersect, ray_segment_intersect
 4 | 
 5 | 
 6 | def test_line_intersect():
 7 |     assert line_intersect((0, 0), (0, 1), (0, 0), (1, 0))[:2] == (0, 0)
 8 |     assert line_intersect((0, 0), (0, 1), (0, 0), (0, 1))[2] == 0
 9 |     assert ray_segment_intersect(ray=((0, 0), 0), segment=((1, -1), (1, 1))) == (1, 0)
10 |     assert ray_segment_intersect(ray=((0, 0), math.pi), segment=((1, -1), (1, 1))) is None
11 | 


--------------------------------------------------------------------------------
/rllab/tests/regression_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/rllab/tests/regression_tests/test_issue_3.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | from nose2.tools import such
 5 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
 7 | from rllab.algos.trpo import TRPO
 8 | from rllab.baselines.zero_baseline import ZeroBaseline
 9 | 
10 | with such.A("Issue #3") as it:
11 |     @it.should("be fixed")
12 |     def test_issue_3():
13 |         """
14 |         As reported in https://github.com/rllab/rllab/issues/3, the adaptive_std parameter was not functioning properly
15 |         """
16 |         env = CartpoleEnv()
17 |         policy = GaussianMLPPolicy(
18 |             env_spec=env,
19 |             adaptive_std=True
20 |         )
21 |         baseline = ZeroBaseline(env_spec=env.spec)
22 |         algo = TRPO(
23 |             env=env,
24 |             policy=policy,
25 |             baseline=baseline,
26 |             batch_size=100,
27 |             n_itr=1
28 |         )
29 |         algo.train()
30 | 
31 | it.createTests(globals())
32 | 


--------------------------------------------------------------------------------
/rllab/tests/test_baselines.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.environ['THEANO_FLAGS'] = 'mode=FAST_COMPILE,optimizer=None'
 4 | 
 5 | from rllab.algos.vpg import VPG
 6 | from rllab.envs.box2d.cartpole_env import CartpoleEnv
 7 | from rllab.baselines.zero_baseline import ZeroBaseline
 8 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 9 | from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline
10 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy
11 | from nose2 import tools
12 | 
13 | 
14 | baselines = [ZeroBaseline, LinearFeatureBaseline, GaussianMLPBaseline]
15 | 
16 | 
17 | @tools.params(*baselines)
18 | def test_baseline(baseline_cls):
19 |     env = CartpoleEnv()
20 |     policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6,))
21 |     baseline = baseline_cls(env_spec=env.spec)
22 |     algo = VPG(
23 |         env=env, policy=policy, baseline=baseline,
24 |         n_itr=1, batch_size=1000, max_path_length=100
25 |     )
26 |     algo.train()
27 | 


--------------------------------------------------------------------------------
/rllab/tests/test_instrument.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | from rllab.misc import instrument
 5 | from nose2.tools import such
 6 | 
 7 | 
 8 | class TestClass(object):
 9 |     @property
10 |     def arr(self):
11 |         return [1, 2, 3]
12 | 
13 |     @property
14 |     def compound_arr(self):
15 |         return [dict(a=1)]
16 | 
17 | 
18 | with such.A("instrument") as it:
19 |     @it.should
20 |     def test_concretize():
21 |         it.assertEqual(instrument.concretize([5]), [5])
22 |         it.assertEqual(instrument.concretize((5,)), (5,))
23 |         fake_globals = dict(TestClass=TestClass)
24 |         instrument.stub(fake_globals)
25 |         modified = fake_globals["TestClass"]
26 |         it.assertIsInstance(modified, instrument.StubClass)
27 |         it.assertIsInstance(modified(), instrument.StubObject)
28 |         it.assertEqual(instrument.concretize((5,)), (5,))
29 |         it.assertIsInstance(instrument.concretize(modified()), TestClass)
30 | 
31 | 
32 |     @it.should
33 |     def test_chained_call():
34 |         fake_globals = dict(TestClass=TestClass)
35 |         instrument.stub(fake_globals)
36 |         modified = fake_globals["TestClass"]
37 |         it.assertIsInstance(modified().arr[0], instrument.StubMethodCall)
38 |         it.assertIsInstance(modified().compound_arr[0]["a"], instrument.StubMethodCall)
39 |         it.assertEqual(instrument.concretize(modified().arr[0]), 1)
40 | 
41 | 
42 |     @it.should
43 |     def test_variant_generator():
44 | 
45 |         vg = instrument.VariantGenerator()
46 |         vg.add("key1", [1, 2, 3])
47 |         vg.add("key2", [True, False])
48 |         vg.add("key3", lambda key2: [1] if key2 else [1, 2])
49 |         it.assertEqual(len(vg.variants()), 9)
50 | 
51 |         class VG(instrument.VariantGenerator):
52 | 
53 |             @instrument.variant
54 |             def key1(self):
55 |                 return [1, 2, 3]
56 | 
57 |             @instrument.variant
58 |             def key2(self):
59 |                 yield True
60 |                 yield False
61 | 
62 |             @instrument.variant
63 |             def key3(self, key2):
64 |                 if key2:
65 |                     yield 1
66 |                 else:
67 |                     yield 1
68 |                     yield 2
69 | 
70 |         it.assertEqual(len(VG().variants()), 9)
71 | 
72 | it.createTests(globals())
73 | 


--------------------------------------------------------------------------------
/rllab/tests/test_networks.py:
--------------------------------------------------------------------------------
 1 | def test_gru_network():
 2 |     from rllab.core.network import GRUNetwork
 3 |     import lasagne.layers as L
 4 |     from rllab.misc import ext
 5 |     import numpy as np
 6 |     network = GRUNetwork(
 7 |         input_shape=(2, 3),
 8 |         output_dim=5,
 9 |         hidden_dim=4,
10 |     )
11 |     f_output = ext.compile_function(
12 |         inputs=[network.input_layer.input_var],
13 |         outputs=L.get_output(network.output_layer)
14 |     )
15 |     assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5)
16 | 


--------------------------------------------------------------------------------
/rllab/tests/test_sampler.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def test_truncate_paths():
 7 |     from rllab.sampler.parallel_sampler import truncate_paths
 8 | 
 9 |     paths = [
10 |         dict(
11 |             observations=np.zeros((100, 1)),
12 |             actions=np.zeros((100, 1)),
13 |             rewards=np.zeros(100),
14 |             env_infos=dict(),
15 |             agent_infos=dict(lala=np.zeros(100)),
16 |         ),
17 |         dict(
18 |             observations=np.zeros((50, 1)),
19 |             actions=np.zeros((50, 1)),
20 |             rewards=np.zeros(50),
21 |             env_infos=dict(),
22 |             agent_infos=dict(lala=np.zeros(50)),
23 |         ),
24 |     ]
25 | 
26 |     truncated = truncate_paths(paths, 130)
27 |     assert len(truncated) == 2
28 |     assert len(truncated[-1]["observations"]) == 30
29 |     assert len(truncated[0]["observations"]) == 100
30 |     # make sure not to change the original one
31 |     assert len(paths) == 2
32 |     assert len(paths[-1]["observations"]) == 50
33 | 


--------------------------------------------------------------------------------
/rllab/tests/test_serializable.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from rllab.core.serializable import Serializable
 4 | from sandbox.rocky.tf.core.parameterized import Parameterized, suppress_params_loading
 5 | 
 6 | 
 7 | class Simple(Parameterized, Serializable):
 8 |     def __init__(self, name):
 9 |         Serializable.quick_init(self, locals())
10 |         with tf.variable_scope(name):
11 |             self.w = tf.get_variable("w", [10, 10])
12 | 
13 |     def get_params_internal(self, **tags):
14 |         return [self.w]
15 | 
16 | 
17 | class AllArgs(Serializable):
18 |     def __init__(self, vararg, *args, **kwargs):
19 |         Serializable.quick_init(self, locals())
20 |         self.vararg = vararg
21 |         self.args = args
22 |         self.kwargs = kwargs
23 | 
24 | 
25 | def test_serializable():
26 |     with suppress_params_loading():
27 |         obj = Simple(name="obj")
28 |         obj1 = Serializable.clone(obj, name="obj1")
29 |         assert obj.w.name.startswith('obj/')
30 |         assert obj1.w.name.startswith('obj1/')
31 | 
32 |         obj2 = AllArgs(0, *(1,), **{'kwarg': 2})
33 |         obj3 = Serializable.clone(obj2)
34 |         assert obj3.vararg == 0
35 |         assert len(obj3.args) == 1 and obj3.args[0] == 1
36 |         assert len(obj3.kwargs) == 1 and obj3.kwargs['kwarg'] == 2
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     test_serializable()
41 | 


--------------------------------------------------------------------------------
/rllab/tests/test_spaces.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from rllab.spaces import Product, Discrete, Box
 3 | import numpy as np
 4 | 
 5 | 
 6 | def test_product_space():
 7 |     _ = Product([Discrete(3), Discrete(2)])
 8 |     product_space = Product(Discrete(3), Discrete(2))
 9 |     sample = product_space.sample()
10 |     assert product_space.contains(sample)
11 | 
12 | 
13 | def test_product_space_unflatten_n():
14 |     space = Product([Discrete(3), Discrete(3)])
15 |     np.testing.assert_array_equal(space.flatten((2, 2)), space.flatten_n([(2, 2)])[0])
16 |     np.testing.assert_array_equal(
17 |         space.unflatten(space.flatten((2, 2))),
18 |         space.unflatten_n(space.flatten_n([(2, 2)]))[0]
19 |     )
20 | 
21 | 
22 | def test_box():
23 |     space = Box(low=-1, high=1, shape=(2, 2))
24 |     np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4])
25 |     np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]])
26 |     np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]])
27 |     np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]])
28 | 


--------------------------------------------------------------------------------
/rllab/tests/test_stateful_pool.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | def _worker_collect_once(_):
 6 |     return 'a', 1
 7 | 
 8 | 
 9 | def test_stateful_pool():
10 |     from rllab.sampler import stateful_pool
11 |     stateful_pool.singleton_pool.initialize(n_parallel=3)
12 |     results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False)
13 |     assert tuple(results) == ('a', 'a', 'a')
14 | 
15 | 
16 | def test_stateful_pool_over_capacity():
17 |     from rllab.sampler import stateful_pool
18 |     stateful_pool.singleton_pool.initialize(n_parallel=4)
19 |     results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False)
20 |     assert len(results) >= 3
21 | 


--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/green_ball.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="box">
 2 | 	<compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
 3 | 	<worldbody>
 4 |     <body name="ball" pos="0 0 0">
 5 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 6 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 7 | 		  <geom type="sphere" size="0.5" pos="0 0 0.5" rgba="0 1 0 1" />
 8 |     </body>
 9 | 	</worldbody>
10 | </mujoco>
11 | 


--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/point.xml:
--------------------------------------------------------------------------------
 1 | <mujoco>
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <option timestep="0.02" integrator="RK4" />
 4 |   <default>
 5 |     <joint limited="false" armature="0" damping="0" />
 6 |     <geom condim="3" conaffinity="0" margin="0" friction="1 0.5 0.5" rgba="0.8 0.6 0.4 1" density="100" />
 7 |   </default>
 8 |   <asset>
 9 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
10 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
11 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
12 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
13 |     <material name='geom' texture="texgeom" texuniform="true" />
14 |   </asset>
15 |   <worldbody>
16 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
17 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
18 |     <body name="torso" pos="0 0 0">
19 |       <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" />
20 |       <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" />
21 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' />
22 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' />
23 |       <joint name='rot' type='hinge' axis='0 0 1' pos='0 0 0' limited="false" />
24 |     </body>
25 |   </worldbody>
26 |   <actuator>
27 |     <!-- Those are just dummy actuators for providing ranges -->
28 |     <motor joint='ballx' ctrlrange="-1 1" ctrllimited="true" />
29 |     <motor joint='rot' ctrlrange="-0.25 0.25" ctrllimited="true" />
30 |   </actuator>
31 | </mujoco>
32 | 


--------------------------------------------------------------------------------
/rllab/vendor/mujoco_models/red_ball.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="box">
 2 | 	<compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
 3 | 	<worldbody>
 4 |     <body name="ball" pos="0 0 0">
 5 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 6 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 7 | 		  <geom type="sphere" size="0.5" pos="0 0 0.5" rgba="1 0 0 1" />
 8 |     </body>
 9 | 	</worldbody>
10 | </mujoco>
11 | 


--------------------------------------------------------------------------------
/scripts/maze_data_collect.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from inverse_rl.algos.trpo import TRPO
 4 | from inverse_rl.models.tf_util import get_session_config
 5 | from sandbox.rocky.tf.policies.gaussian_mlp_policy import GaussianMLPPolicy
 6 | from sandbox.rocky.tf.envs.base import TfEnv
 7 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
 8 | 
 9 | from inverse_rl.envs.env_utils import CustomGymEnv
10 | from inverse_rl.utils.log_utils import rllab_logdir
11 | from inverse_rl.utils.hyper_sweep import run_sweep_parallel, run_sweep_serial
12 | 
13 | 
14 | def main(exp_name, ent_wt=1.0, discrete=True):
15 |     tf.reset_default_graph()
16 |     if discrete:
17 |         env = TfEnv(CustomGymEnv('PointMazeLeft-v0', record_video=False, record_log=False))
18 |     else:
19 |         env = TfEnv(CustomGymEnv('PointMazeLeftCont-v0', record_video=False, record_log=False))
20 | 
21 |     policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
22 |     with tf.Session(config=get_session_config()) as sess:
23 |         algo = TRPO(
24 |             env=env,
25 |             sess=sess,
26 |             policy=policy,
27 |             n_itr=2000,
28 |             batch_size=20000,
29 |             max_path_length=500,
30 |             discount=0.99,
31 |             store_paths=True,
32 |             entropy_weight=ent_wt,
33 |             baseline=LinearFeatureBaseline(env_spec=env.spec),
34 |             exp_name=exp_name,
35 |         )
36 |         if discrete:
37 |             output = 'data/maze_left_data_collect_discrete-15/%s' % exp_name
38 |         else:
39 |             output = 'data/maze_left_data_collect/%s' % exp_name
40 |         with rllab_logdir(algo=algo, dirname=output):
41 |             algo.train()
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     params_dict = {
46 |         'ent_wt': [0.1],
47 |         'discrete': True # Setting discrete to 'True' to get training data, 'False' to get test data (test unseen positions)
48 |     }
49 |     run_sweep_parallel(main, params_dict, repeat=4)
50 | 


--------------------------------------------------------------------------------