├── .gitignore ├── README.md ├── notes └── paper.md ├── output ├── src ├── files.txt ├── getid_linux ├── gym-adv │ ├── .dockerignore │ ├── .gitignore │ ├── .travis.yml │ ├── CODE_OF_CONDUCT.rst │ ├── Dockerfile │ ├── LICENSE1 │ ├── Makefile │ ├── Manifest.in │ ├── README.md │ ├── bin │ │ └── docker_entrypoint │ ├── docs │ │ ├── agents.md │ │ ├── environments.md │ │ ├── misc.md │ │ └── readme.md │ ├── examples │ │ ├── agents │ │ │ ├── _policies.py │ │ │ ├── cem.py │ │ │ ├── keyboard_agent.py │ │ │ ├── random_agent.py │ │ │ └── tabular_q_agent.py │ │ ├── scripts │ │ │ ├── benchmark_runner │ │ │ ├── list_envs │ │ │ ├── play_go │ │ │ ├── sim_env │ │ │ └── upload │ │ └── utilities │ │ │ └── live_plot.py │ ├── gym │ │ ├── __init__.py │ │ ├── benchmarks │ │ │ ├── __init__.py │ │ │ ├── registration.py │ │ │ ├── scoring.py │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_benchmark.py │ │ ├── configuration.py │ │ ├── core.py │ │ ├── envs │ │ │ ├── .algorithmic │ │ │ │ ├── __init__.py │ │ │ │ ├── algorithmic_env.py │ │ │ │ ├── copy_.py │ │ │ │ ├── duplicated_input.py │ │ │ │ ├── repeat_copy.py │ │ │ │ ├── reverse.py │ │ │ │ ├── reversed_addition.py │ │ │ │ └── tests │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── test_algorithmic.py │ │ │ ├── .atari │ │ │ │ ├── __init__.py │ │ │ │ └── atari_env.py │ │ │ ├── .board_game │ │ │ │ ├── __init__.py │ │ │ │ ├── go.py │ │ │ │ └── hex.py │ │ │ ├── .box2d │ │ │ │ ├── __init__.py │ │ │ │ ├── bipedal_walker.py │ │ │ │ ├── car_dynamics.py │ │ │ │ ├── car_racing.py │ │ │ │ └── lunar_lander.py │ │ │ ├── .classical_control │ │ │ │ ├── __init__.py │ │ │ │ ├── acrobot.py │ │ │ │ ├── assets │ │ │ │ │ └── clockwise.png │ │ │ │ ├── cartpole.py │ │ │ │ ├── continuous_mountain_car.py │ │ │ │ ├── mountain_car.py │ │ │ │ ├── pendulum.py │ │ │ │ └── rendering.py │ │ │ ├── .dafety │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── offswitch_cartpole.py │ │ │ │ ├── offswitch_cartpole_prob.py │ │ │ │ ├── predict_actions_cartpole.py │ │ │ │ ├── predict_obs_cartpole.py │ │ │ │ └── semisuper.py │ │ │ ├── .mujoco │ │ │ │ ├── __init__.py │ │ │ │ ├── ant.py │ │ │ │ ├── assets │ │ │ │ │ ├── ant.xml │ │ │ │ │ ├── half_cheetah.xml │ │ │ │ │ ├── hopper.xml │ │ │ │ │ ├── humanoid.xml │ │ │ │ │ ├── humanoidstandup.xml │ │ │ │ │ ├── inverted_double_pendulum.xml │ │ │ │ │ ├── inverted_pendulum.xml │ │ │ │ │ ├── point.xml │ │ │ │ │ ├── reacher.xml │ │ │ │ │ ├── swimmer.xml │ │ │ │ │ └── walker2d.xml │ │ │ │ ├── half_cheetah.py │ │ │ │ ├── hopper.py │ │ │ │ ├── humanoid.py │ │ │ │ ├── humanoidstandup.py │ │ │ │ ├── inverted_double_pendulum.py │ │ │ │ ├── inverted_pendulum.py │ │ │ │ ├── mujoco_env.py │ │ │ │ ├── reacher.py │ │ │ │ ├── swimmer.py │ │ │ │ └── walker2d.py │ │ │ ├── .parameter_tuning │ │ │ │ ├── __init__.py │ │ │ │ ├── convergence.py │ │ │ │ └── train_deep_cnn.py │ │ │ ├── .toy_text │ │ │ │ ├── __init__.py │ │ │ │ ├── blackjack.py │ │ │ │ ├── discrete.py │ │ │ │ ├── frozen_lake.py │ │ │ │ ├── guessing_game.py │ │ │ │ ├── hotter_colder.py │ │ │ │ ├── nchain.py │ │ │ │ ├── roulette.py │ │ │ │ └── taxi.py │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── adversarial │ │ │ │ ├── .classic_control │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── acrobot.py │ │ │ │ │ ├── assets │ │ │ │ │ │ └── clockwise.png │ │ │ │ │ ├── cartpole.py │ │ │ │ │ ├── continuous_mountain_car.py │ │ │ │ │ ├── dist │ │ │ │ │ │ └── gym-0.5.6-py2.7.egg │ │ │ │ │ ├── gym.egg-info │ │ │ │ │ │ ├── PKG-INFO │ │ │ │ │ │ ├── SOURCES.txt │ │ │ │ │ │ ├── dependency_links.txt │ │ │ │ │ │ ├── not-zip-safe │ │ │ │ │ │ ├── requires.txt │ │ │ │ │ │ └── top_level.txt │ │ │ │ │ ├── mountain_car.py │ │ │ │ │ ├── pendulum.py │ │ │ │ │ └── rendering.py │ │ │ │ ├── __init__.py │ │ │ │ └── mujoco │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── ant.py │ │ │ │ │ ├── ant_heel.py │ │ │ │ │ ├── assets │ │ │ │ │ ├── ant.xml │ │ │ │ │ ├── half_cheetah.xml │ │ │ │ │ ├── hopper.xml │ │ │ │ │ ├── hopper_fric.xml │ │ │ │ │ ├── humanoid.xml │ │ │ │ │ ├── humanoidstandup.xml │ │ │ │ │ ├── inverted_double_pendulum.xml │ │ │ │ │ ├── inverted_pendulum.xml │ │ │ │ │ ├── point.xml │ │ │ │ │ ├── reacher.xml │ │ │ │ │ ├── swimmer.xml │ │ │ │ │ └── walker2d.xml │ │ │ │ │ ├── half_cheetah.py │ │ │ │ │ ├── half_cheetah_heel.py │ │ │ │ │ ├── half_cheetah_torso.py │ │ │ │ │ ├── hopper.py │ │ │ │ │ ├── hopper_6.py │ │ │ │ │ ├── hopper_fric.py │ │ │ │ │ ├── hopper_heel.py │ │ │ │ │ ├── hopper_heel_6.py │ │ │ │ │ ├── hopper_torso_6.py │ │ │ │ │ ├── humanoid.py │ │ │ │ │ ├── humanoid_heel.py │ │ │ │ │ ├── humanoidstandup.py │ │ │ │ │ ├── inverted_double_pendulum.py │ │ │ │ │ ├── inverted_pendulum.py │ │ │ │ │ ├── mujoco_env.py │ │ │ │ │ ├── reacher.py │ │ │ │ │ ├── swimmer.py │ │ │ │ │ ├── walker2d.py │ │ │ │ │ ├── walker2d_heel.py │ │ │ │ │ └── walker2d_torso.py │ │ │ ├── debugging │ │ │ │ ├── __init__.py │ │ │ │ ├── one_round_deterministic_reward.py │ │ │ │ ├── one_round_nondeterministic_reward.py │ │ │ │ ├── two_round_deterministic_reward.py │ │ │ │ └── two_round_nondeterministic_reward.py │ │ │ ├── registration.py │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── rollout.json │ │ │ │ ├── test_determinism.py │ │ │ │ ├── test_envs.py │ │ │ │ ├── test_envs_semantics.py │ │ │ │ └── test_registration.py │ │ ├── error.py │ │ ├── monitoring │ │ │ ├── __init__.py │ │ │ ├── monitor.py │ │ │ ├── stats_recorder.py │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── helpers.py │ │ │ │ ├── test_monitor.py │ │ │ │ ├── test_monitor_envs.py │ │ │ │ └── test_video_recorder.py │ │ │ └── video_recorder.py │ │ ├── scoreboard │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── client │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── api_requestor.py │ │ │ │ ├── http_client.py │ │ │ │ ├── resource.py │ │ │ │ ├── tests │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── helper.py │ │ │ │ │ ├── test_evaluation.py │ │ │ │ │ └── test_file_upload.py │ │ │ │ └── util.py │ │ │ ├── registration.py │ │ │ ├── scoring.py │ │ │ └── tests │ │ │ │ ├── __init__.py │ │ │ │ ├── test_registration.py │ │ │ │ └── test_scoring.py │ │ ├── spaces │ │ │ ├── __init__.py │ │ │ ├── box.py │ │ │ ├── discrete.py │ │ │ ├── multi_discrete.py │ │ │ ├── prng.py │ │ │ ├── tests │ │ │ │ ├── __init__.py │ │ │ │ └── test_spaces.py │ │ │ └── tuple_space.py │ │ ├── tests │ │ │ └── test_core.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── atomic_write.py │ │ │ ├── closer.py │ │ │ ├── colorize.py │ │ │ ├── ezpickle.py │ │ │ ├── reraise.py │ │ │ ├── reraise_impl_py2.py │ │ │ ├── reraise_impl_py3.py │ │ │ ├── seeding.py │ │ │ └── tests │ │ │ │ ├── test_atexit.py │ │ │ │ └── test_seeding.py │ │ ├── version.py │ │ └── wrappers │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── frame_skipping.py │ │ │ └── tests │ │ │ └── test_wrappers.py │ ├── misc │ │ ├── check_envs_for_change.py │ │ ├── compare_rollout_data.py │ │ └── write_rollout_data.py │ ├── requirements.txt │ ├── requirements_dev.txt │ ├── scripts │ │ └── generate_json.py │ ├── setup.py │ ├── test.dockerfile │ ├── tox.ini │ ├── unittest.cfg │ └── vendor │ │ └── Xdummy ├── mjkey.txt ├── mjpro131 │ ├── bin │ │ ├── compile │ │ ├── libglfw.so.3 │ │ ├── libmujoco131.so │ │ ├── mjkey.txt │ │ ├── simulate │ │ └── test │ ├── doc │ │ ├── README.txt │ │ └── REFERENCE.txt │ ├── include │ │ ├── glfw3.h │ │ ├── mjdata.h │ │ ├── mjmodel.h │ │ ├── mjrender.h │ │ ├── mjvisualize.h │ │ ├── mjxmacro.h │ │ └── mujoco.h │ ├── model │ │ └── humanoid.xml │ └── sample │ │ ├── compile.cpp │ │ ├── makefile │ │ ├── simulate.cpp │ │ └── test.cpp ├── mjpro131_linux.zip ├── mjpro150 │ ├── bin │ │ ├── basic │ │ ├── compile │ │ ├── derivative │ │ ├── libglew.so │ │ ├── libglewegl.so │ │ ├── libglewosmesa.so │ │ ├── libglfw.so.3 │ │ ├── libmujoco150.so │ │ ├── libmujoco150nogl.so │ │ ├── mjkey.txt │ │ ├── record │ │ ├── simulate │ │ └── test │ ├── doc │ │ ├── README.txt │ │ └── REFERENCE.txt │ ├── include │ │ ├── glfw3.h │ │ ├── mjdata.h │ │ ├── mjmodel.h │ │ ├── mjrender.h │ │ ├── mjvisualize.h │ │ ├── mjxmacro.h │ │ └── mujoco.h │ ├── model │ │ ├── humanoid.xml │ │ └── humanoid100.xml │ └── sample │ │ ├── basic.cpp │ │ ├── compile.cpp │ │ ├── derivative.cpp │ │ ├── makefile │ │ ├── record.cpp │ │ ├── simulate.cpp │ │ └── test.cpp ├── mjpro150_linux.zip └── rllab-adv │ ├── .gitignore │ ├── CHANGELOG.md │ ├── LICENSE1 │ ├── MANIFEST.in │ ├── README.md │ ├── adversarial │ ├── experiments │ │ ├── yoda_rl_adv_driver.sh │ │ ├── yoda_rl_adv_starter.sh │ │ ├── yoda_rl_adversary_driver.sh │ │ ├── yoda_rl_adversary_single_driver.sh │ │ ├── yoda_rl_adversary_single_starter.sh │ │ ├── yoda_rl_adversary_starter.sh │ │ ├── yoda_rl_baseline_driver.sh │ │ ├── yoda_rl_baseline_starter.sh │ │ ├── yoda_rl_baseline_stater.sh │ │ ├── yoda_rl_driver.sh │ │ ├── yoda_rl_multiple_adversary_driver.sh │ │ ├── yoda_rl_multiple_adversary_starter.sh │ │ ├── yoda_rl_no_adv_driver.sh │ │ ├── yoda_rl_no_adv_starter.sh │ │ ├── yoda_rl_no_adversary_driver.sh │ │ ├── yoda_rl_no_adversary_starter.sh │ │ ├── yoda_rl_only_adversary_single_driver.sh │ │ ├── yoda_rl_only_adversary_single_starter.sh │ │ ├── yoda_rl_starter.sh │ │ ├── yoda_rl_step_adversary_driver.sh │ │ ├── yoda_rl_step_adversary_single_driver.sh │ │ ├── yoda_rl_step_adversary_single_starter.sh │ │ ├── yoda_rl_step_adversary_starter.sh │ │ ├── yoda_rl_tandem_adversary_single_driver.sh │ │ └── yoda_rl_tandem_adversary_single_starter.sh │ └── scripts │ │ ├── REINFORCE_baseline.py │ │ ├── __init__.py │ │ ├── binu │ │ └── trpo.py │ │ ├── results │ │ ├── BASELINE-env-HalfCheetahAdv-v1_no_adv_Exp3_Itr600_BS4000_Adv1.0_stp0.01_lam0.97.p.png │ │ ├── BASELINE-env-HopperAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png │ │ ├── BASELINE-env-InvertedPendulumAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png │ │ ├── BASELINE-env-SwimmerAdv-v1_no_adv_Exp3_Itr100_BS4000_Adv1.0_stp0.01_lam0.97.p.png │ │ ├── BASELINE-env-Walker2dAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png │ │ ├── Waler Robust.png │ │ ├── Walker2d - Robust.png │ │ ├── env-HopperAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_364289.p.png │ │ ├── env-SwimmerAdv-v1_Exp3_Itr50_BS4000_Adv0.25_stp0.01_lam0.97_240911.p.png │ │ ├── env-Walker2dAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_163843.p.png │ │ └── hopper_robustness.png │ │ ├── test.py │ │ ├── test_friction_robustness.py │ │ ├── test_robustness_friction.py │ │ ├── test_robustness_mass.py │ │ ├── test_robustness_performance.py │ │ ├── train_adversary.py │ │ ├── train_trpo_adversary.py │ │ ├── train_trpo_baseline.py │ │ ├── train_trpo_mult_var_adversary.py │ │ ├── train_trpo_only_adversary.py │ │ ├── train_trpo_step_adversary.py │ │ ├── train_trpo_var_adversary.py │ │ ├── train_trpo_var_mult_adversary.py │ │ ├── utils_noise.py │ │ ├── viz_results.py │ │ ├── viz_results_const.py │ │ ├── viz_results_const_folder.py │ │ ├── viz_results_robustness.py │ │ └── viz_results_step.py │ ├── bin │ ├── activate │ ├── conda │ └── deactivate │ ├── circle.yml │ ├── contrib │ ├── __init__.py │ └── alexbeloi │ │ ├── __init__.py │ │ ├── examples │ │ ├── __init__.py │ │ ├── trpois_cartpole.py │ │ └── vpgis_cartpole.py │ │ └── is_sampler.py │ ├── docker │ ├── Dockerfile │ ├── gpu_Dockerfile │ ├── gpu_tf_Dockerfile │ └── tester_Dockerfile │ ├── docs │ ├── Makefile │ ├── conf.py │ ├── index.rst │ └── user │ │ ├── cluster.rst │ │ ├── cluster_1.png │ │ ├── cluster_2.png │ │ ├── cluster_3.png │ │ ├── experiments.rst │ │ ├── gym_integration.rst │ │ ├── implement_algo_advanced.rst │ │ ├── implement_algo_basic.rst │ │ ├── implement_env.rst │ │ └── installation.rst │ ├── environment.yml │ ├── requirements.txt │ ├── results │ └── xyz.py │ ├── rllab.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ └── top_level.txt │ ├── rllab │ ├── __init__.py │ ├── algos │ │ ├── __init__.py │ │ ├── base.py │ │ ├── batch_polopt.py │ │ ├── cem.py │ │ ├── cma_es.py │ │ ├── cma_es_lib.py │ │ ├── ddpg.py │ │ ├── erwr.py │ │ ├── nop.py │ │ ├── npo.py │ │ ├── ppo.py │ │ ├── reps.py │ │ ├── tnpg.py │ │ ├── trpo.py │ │ ├── util.py │ │ └── vpg.py │ ├── baselines │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gaussian_conv_baseline.py │ │ ├── gaussian_mlp_baseline.py │ │ ├── linear_feature_baseline.py │ │ └── zero_baseline.py │ ├── config.py │ ├── config_personal_template.py │ ├── core │ │ ├── __init__.py │ │ ├── lasagne_helpers.py │ │ ├── lasagne_layers.py │ │ ├── lasagne_powered.py │ │ ├── network.py │ │ ├── parameterized.py │ │ └── serializable.py │ ├── distributions │ │ ├── __init__.py │ │ ├── base.py │ │ ├── bernoulli.py │ │ ├── categorical.py │ │ ├── delta.py │ │ ├── diagonal_gaussian.py │ │ ├── recurrent_categorical.py │ │ └── recurrent_diagonal_gaussian.py │ ├── envs │ │ ├── __init__.py │ │ ├── adversarial │ │ │ ├── .classic_control │ │ │ │ ├── __init__.py │ │ │ │ ├── acrobot.py │ │ │ │ ├── assets │ │ │ │ │ └── clockwise.png │ │ │ │ ├── cartpole.py │ │ │ │ ├── continuous_mountain_car.py │ │ │ │ ├── dist │ │ │ │ │ └── gym-0.5.6-py2.7.egg │ │ │ │ ├── gym.egg-info │ │ │ │ │ ├── PKG-INFO │ │ │ │ │ ├── SOURCES.txt │ │ │ │ │ ├── dependency_links.txt │ │ │ │ │ ├── not-zip-safe │ │ │ │ │ ├── requires.txt │ │ │ │ │ └── top_level.txt │ │ │ │ ├── mountain_car.py │ │ │ │ ├── pendulum.py │ │ │ │ └── rendering.py │ │ │ ├── __init__.py │ │ │ └── mujoco │ │ │ │ ├── __init__.py │ │ │ │ ├── ant.py │ │ │ │ ├── ant_heel.py │ │ │ │ ├── assets │ │ │ │ ├── ant.xml │ │ │ │ ├── half_cheetah.xml │ │ │ │ ├── hopper.xml │ │ │ │ ├── hopper_fric.xml │ │ │ │ ├── humanoid.xml │ │ │ │ ├── humanoidstandup.xml │ │ │ │ ├── inverted_double_pendulum.xml │ │ │ │ ├── inverted_pendulum.xml │ │ │ │ ├── point.xml │ │ │ │ ├── reacher.xml │ │ │ │ ├── swimmer.xml │ │ │ │ └── walker2d.xml │ │ │ │ ├── half_cheetah.py │ │ │ │ ├── half_cheetah_heel.py │ │ │ │ ├── half_cheetah_torso.py │ │ │ │ ├── hopper.py │ │ │ │ ├── hopper_6.py │ │ │ │ ├── hopper_fric.py │ │ │ │ ├── hopper_heel.py │ │ │ │ ├── hopper_heel_6.py │ │ │ │ ├── hopper_torso_6.py │ │ │ │ ├── humanoid.py │ │ │ │ ├── humanoid_heel.py │ │ │ │ ├── humanoidstandup.py │ │ │ │ ├── inverted_double_pendulum.py │ │ │ │ ├── inverted_pendulum.py │ │ │ │ ├── mujoco_env.py │ │ │ │ ├── reacher.py │ │ │ │ ├── swimmer.py │ │ │ │ ├── walker2d.py │ │ │ │ ├── walker2d_heel.py │ │ │ │ └── walker2d_torso.py │ │ ├── base.py │ │ ├── box2d │ │ │ ├── __init__.py │ │ │ ├── box2d_env.py │ │ │ ├── box2d_viewer.py │ │ │ ├── car_parking_env.py │ │ │ ├── cartpole_env.py │ │ │ ├── cartpole_swingup_env.py │ │ │ ├── double_pendulum_env.py │ │ │ ├── models │ │ │ │ ├── car_parking.xml │ │ │ │ ├── car_parking.xml.rb │ │ │ │ ├── cartpole.xml.mako │ │ │ │ ├── double_pendulum.xml.mako │ │ │ │ └── mountain_car.xml.mako │ │ │ ├── mountain_car_env.py │ │ │ └── parser │ │ │ │ ├── __init__.py │ │ │ │ ├── xml_attr_types.py │ │ │ │ ├── xml_box2d.py │ │ │ │ └── xml_types.py │ │ ├── env_spec.py │ │ ├── grid_world_env.py │ │ ├── gym_env.py │ │ ├── identification_env.py │ │ ├── mujoco │ │ │ ├── __init__.py │ │ │ ├── ant_env.py │ │ │ ├── gather │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_gather_env.py │ │ │ │ ├── embedded_viewer.py │ │ │ │ ├── gather_env.py │ │ │ │ ├── point_gather_env.py │ │ │ │ └── swimmer_gather_env.py │ │ │ ├── half_cheetah_env.py │ │ │ ├── hopper_env.py │ │ │ ├── humanoid_env.py │ │ │ ├── inverted_double_pendulum_env.py │ │ │ ├── maze │ │ │ │ ├── __init__.py │ │ │ │ ├── ant_maze_env.py │ │ │ │ ├── maze_env.py │ │ │ │ ├── maze_env_utils.py │ │ │ │ ├── point_maze_env.py │ │ │ │ └── swimmer_maze_env.py │ │ │ ├── mujoco_env.py │ │ │ ├── point_env.py │ │ │ ├── simple_humanoid_env.py │ │ │ ├── swimmer_env.py │ │ │ └── walker2d_env.py │ │ ├── noisy_env.py │ │ ├── normalized_env.py │ │ ├── proxy_env.py │ │ └── sliding_mem_env.py │ ├── exploration_strategies │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gaussian_strategy.py │ │ └── ou_strategy.py │ ├── misc │ │ ├── __init__.py │ │ ├── autoargs.py │ │ ├── console.py │ │ ├── ext.py │ │ ├── instrument.py │ │ ├── krylov.py │ │ ├── logger.py │ │ ├── mako_utils.py │ │ ├── meta.py │ │ ├── nb_utils.py │ │ ├── overrides.py │ │ ├── resolve.py │ │ ├── special.py │ │ ├── tabulate.py │ │ ├── tensor_utils.py │ │ └── viewer2d.py │ ├── mujoco_py │ │ ├── .rvmrc │ │ ├── Gemfile │ │ ├── Gemfile.lock │ │ ├── __init__.py │ │ ├── codegen.rb │ │ ├── gen_binding.sh │ │ ├── glfw.py │ │ ├── mjconstants.py │ │ ├── mjcore.py │ │ ├── mjextra.py │ │ ├── mjlib.py │ │ ├── mjtypes.py │ │ ├── mjviewer.py │ │ └── util.py │ ├── optimizers │ │ ├── __init__.py │ │ ├── conjugate_gradient_optimizer.py │ │ ├── first_order_optimizer.py │ │ ├── hessian_free_optimizer.py │ │ ├── hf.py │ │ ├── lbfgs_optimizer.py │ │ ├── minibatch_dataset.py │ │ └── penalty_lbfgs_optimizer.py │ ├── plotter │ │ ├── __init__.py │ │ └── plotter.py │ ├── policies │ │ ├── __init__.py │ │ ├── base.py │ │ ├── categorical_conv_policy.py │ │ ├── categorical_gru_policy.py │ │ ├── categorical_mlp_policy.py │ │ ├── constant_control_policy.py │ │ ├── deterministic_mlp_policy.py │ │ ├── gaussian_gru_policy.py │ │ ├── gaussian_mlp_policy.py │ │ ├── random_uniform_control_policy.py │ │ ├── step_control_policy.py │ │ └── uniform_control_policy.py │ ├── q_functions │ │ ├── __init__.py │ │ ├── base.py │ │ └── continuous_mlp_q_function.py │ ├── regressors │ │ ├── __init__.py │ │ ├── categorical_mlp_regressor.py │ │ ├── gaussian_conv_regressor.py │ │ ├── gaussian_mlp_regressor.py │ │ └── product_regressor.py │ ├── sampler │ │ ├── __init__.py │ │ ├── base.py │ │ ├── parallel_sampler.py │ │ ├── stateful_pool.py │ │ ├── utils.py │ │ └── utils.py.new │ ├── spaces │ │ ├── __init__.py │ │ ├── base.py │ │ ├── box.py │ │ ├── discrete.py │ │ └── product.py │ └── viskit │ │ ├── __init__.py │ │ ├── core.py │ │ ├── frontend.py │ │ ├── static │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ └── dropdowns-enhancement.css │ │ └── js │ │ │ ├── bootstrap.min.js │ │ │ ├── dropdowns-enhancement.js │ │ │ ├── jquery-1.10.2.min.js │ │ │ ├── jquery.loadTemplate-1.5.6.js │ │ │ └── plotly-latest.min.js │ │ └── templates │ │ └── main.html │ ├── scripts │ ├── __init__.py │ ├── resume_training.py │ ├── run_experiment_lite.py │ ├── setup_ec2_for_rllab.py │ ├── setup_linux.sh │ ├── setup_mujoco.sh │ ├── setup_osx.sh │ ├── sim_env.py │ ├── sim_policy.py │ ├── submit_gym.py │ └── sync_s3.py │ ├── setup.py │ ├── tests │ ├── __init__.py │ ├── algos │ │ ├── __init__.py │ │ └── test_trpo.py │ ├── envs │ │ ├── __init__.py │ │ ├── test_envs.py │ │ └── test_maze_env.py │ ├── regression_tests │ │ ├── __init__.py │ │ └── test_issue_3.py │ ├── test_algos.py │ ├── test_baselines.py │ ├── test_instrument.py │ ├── test_networks.py │ ├── test_sampler.py │ ├── test_serializable.py │ ├── test_spaces.py │ └── test_stateful_pool.py │ └── vendor │ └── mujoco_models │ ├── ant.xml │ ├── green_ball.xml │ ├── half_cheetah.xml │ ├── hopper.xml │ ├── humanoid.xml │ ├── inverted_double_pendulum.xml │ ├── inverted_double_pendulum.xml.mako │ ├── point.xml │ ├── red_ball.xml │ ├── simple_humanoid.xml │ ├── swimmer.xml │ ├── utils.mako │ └── walker2d.xml ├── temp.py ├── temp2.py └── temp_results └── env-InvertedPendulumAdv-v1_Exp2_Itr5_BS4000_Adv0.25_stp0.01_lam0.97_816990.p.temp /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Topics in ML Project 2 | === 3 | 4 | # Paper 5 | 6 | * [Robust Adversarial Reinforcement Learning](https://arxiv.org/abs/1703.02702) 7 | * *Lerrel Pinto, James Davidson, Rahul Sukthankar and Abhinav Gupta* 8 | * ICML 2015 9 | 10 | # Software 11 | 12 | * [rllab](https://github.com/rll/rllab) 13 | * [rllab-adv](https://github.com/lerrel/rllab-adv) 14 | 15 | ## Install Instructions 16 | 17 | [expand] 18 | 19 | -------------------------------------------------------------------------------- /notes/paper.md: -------------------------------------------------------------------------------- 1 | 2 | # Introduction 3 | 4 | * Real world learning 5 | * Scarcity of data, expensive testing => fails to generalize. 6 | * Learning in simulations 7 | * Non-robustness to modelling errors => unsuccessful transfer of 8 | learnt policy. 9 | 10 | In a past work - high friction helped model adversarial forces at 11 | contact points helping learning. Can we use this further? 12 | 13 | ## Proposition 14 | 15 | * RARL: Robust Adversarial Reinforcement Learning 16 | * Jointly train protagonist, adversary. 17 | 18 | ### Evaluations 19 | 20 | * OpenAI gym environments. 21 | * InvertedPendulum 22 | * HalfCheetah 23 | * Swimmer 24 | * Hopper 25 | * Walker2D 26 | 27 | #### Objective 28 | 29 | Proposed approach is: 30 | 31 | * Robust to model initializations 32 | * Robust to modelling errors and uncertainties. 33 | 34 | ## Premises 35 | 36 | ### Overview 37 | 38 | * train on carpet - test on ice: *must generalize*. 39 | * Parameters: mass, friction. 40 | * Methods: 41 | * Learn ensemble of policies on variations. 42 | * Problems: 43 | * Possible to sample trajectories under possible disturbances? 44 | * Unconstrained scenarios - disturbances >> actions => very sparse. 45 | 46 | * Proposed solution. 47 | * Adversarial agents model disturbances. 48 | * Adversaries incorporate domain knowledge. 49 | 50 | ## Past work 51 | ### Standard RL on MDPs 52 | 53 | * Batch policy algorithms 54 | * REINFORCE, NPG, TRPO 55 | * Learn stochastic policy maximizes cumulative discouted reward. 56 | 57 | ### Two player zero-sum discounted games. 58 | * [expand] 59 | 60 | ## RARL 61 | 62 | 63 | ### Results 64 | 65 | -------------------------------------------------------------------------------- /src/files.txt: -------------------------------------------------------------------------------- 1 | mv env src/ 2 | mv files.txt src/ 3 | mv getid_linux src/ 4 | mv gym-adv src/ 5 | mv mjkey.txt src/ 6 | mv mjpro131 src/ 7 | mv mjpro131_linux.zip src/ 8 | mv mjpro150 src/ 9 | mv mjpro150_linux.zip src/ 10 | mv mujoco-py src/ 11 | mv rllab-adv src/ 12 | -------------------------------------------------------------------------------- /src/getid_linux: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/getid_linux -------------------------------------------------------------------------------- /src/gym-adv/.dockerignore: -------------------------------------------------------------------------------- 1 | .tox 2 | -------------------------------------------------------------------------------- /src/gym-adv/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.py~ 4 | .DS_Store 5 | 6 | # Setuptools distribution and build folders. 7 | /dist/ 8 | /build 9 | 10 | # Virtualenv 11 | /env 12 | 13 | # Python egg metadata, regenerated from source files by setuptools. 14 | /*.egg-info 15 | 16 | *.sublime-project 17 | *.sublime-workspace 18 | 19 | logs/ 20 | 21 | .ipynb_checkpoints 22 | ghostdriver.log 23 | 24 | junk 25 | MUJOCO_LOG.txt 26 | 27 | rllab_mujoco 28 | 29 | tutorial/*.html 30 | 31 | # IDE files 32 | .eggs 33 | .tox 34 | 35 | # PyCharm project files 36 | .idea 37 | vizdoom.ini 38 | -------------------------------------------------------------------------------- /src/gym-adv/.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | services: 4 | - docker 5 | before_install: 6 | # Prime the cache. We currently manually keep this synced. 7 | - docker pull quay.io/openai/gym:test 8 | - docker build -f test.dockerfile -t quay.io/openai/gym:test . 9 | script: 10 | # In a pull request, there are no secrets, and hence no MuJoCo: 11 | # https://docs.travis-ci.com/user/pull-requests#Security-Restrictions-when-testing-Pull-Requests. 12 | - docker run -e MUJOCO_KEY_BUNDLE="${MUJOCO_KEY_BUNDLE:-}" quay.io/openai/gym:test tox 13 | 14 | notifications: 15 | slack: 16 | secure: h/Mxm8K+avH/2W0818zCHmLloRPMFN4NJL01+VShvAkH80/acfjeq/+mMdWXXPL/oOB6kSHDk+GDhwR6+s03ZcPMn5INTFvFYqUc6UWmT+NXtOPxGTN0xda6MdYUkWQUKaMyjFrweZQOMOASFBIzPOq4XeVbM5aB8s4EJhnfAcYZhp/idwKbToVihN4KZgxlvZIFc8iEp1o9uSl5qrsaeYYYXRkb6mauacAwOo4/Chu+cOnoLUOnvhBFE3rV3doDNrbnoalO8XiExtgx5CIAYWrlMni7r2Q+LlzgwdyTH19ZtybPxJTZIIWSBQ2UtcoYdIEDcc36GcUwz1VUGg32mLJJnY2xw80CWR4ixFPpLwwP5Y99WTn8v094B4nmFTWOwNWXp3EkqtTN9XcJoRBqXB5ArucIPqrx57dOCljSKx22gL6WaF2p3stSAxIGFektGyGnisaELrFZG1C63aHoUPicj3gUlijmAoUmYaDRf6P1wnpXqBpKDAWWhAMSatvx1ekmEJgR7OQklQnnfjx9kENDUygNUWS4IQwN2qYieuzHFL3of7/30mTM43+Vt/vWN8GI7j01BXu6FNGGloHxjH1pt3bLP/+uj5BJsT2HWF+Z8XR4VE6cyVuKsQAFgCXwOkoDHALbcwsspONDIt/9ixkesgh1oFt4CzU3UuU5wYs= 17 | on_success: change 18 | -------------------------------------------------------------------------------- /src/gym-adv/CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | OpenAI Gym is dedicated to providing a harassment-free experience for 2 | everyone, regardless of gender, gender identity and expression, sexual 3 | orientation, disability, physical appearance, body size, age, race, or 4 | religion. We do not tolerate harassment of participants in any form. 5 | 6 | This code of conduct applies to all OpenAI Gym spaces (including Gist 7 | comments) both online and off. Anyone who violates this code of 8 | conduct may be sanctioned or expelled from these spaces at the 9 | discretion of the OpenAI team. 10 | 11 | We may add additional rules over time, which will be made clearly 12 | available to participants. Participants are responsible for knowing 13 | and abiding by these rules. 14 | -------------------------------------------------------------------------------- /src/gym-adv/Dockerfile: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install 2 | FROM ubuntu:14.04 3 | 4 | RUN apt-get update \ 5 | && apt-get install -y libav-tools \ 6 | python-numpy \ 7 | python-scipy \ 8 | python-pyglet \ 9 | python-setuptools \ 10 | libpq-dev \ 11 | libjpeg-dev \ 12 | curl \ 13 | cmake \ 14 | swig \ 15 | python-opengl \ 16 | libboost-all-dev \ 17 | libsdl2-dev \ 18 | wget \ 19 | unzip \ 20 | git \ 21 | xpra \ 22 | && apt-get clean \ 23 | && rm -rf /var/lib/apt/lists/* \ 24 | && easy_install pip 25 | 26 | WORKDIR /usr/local/gym 27 | RUN mkdir -p gym && touch gym/__init__.py 28 | COPY ./gym/version.py ./gym 29 | COPY ./requirements.txt . 30 | COPY ./setup.py . 31 | RUN pip install -e .[all] 32 | 33 | # Finally, upload our actual code! 34 | COPY . /usr/local/gym 35 | 36 | WORKDIR /root 37 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 38 | -------------------------------------------------------------------------------- /src/gym-adv/LICENSE1: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2016 OpenAI (http://openai.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/gym-adv/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install test 2 | 3 | install: 4 | pip install -r requirements.txt 5 | 6 | base: 7 | docker pull ubuntu:14.04 8 | docker tag ubuntu:14.04 quay.io/openai/gym:base 9 | docker push quay.io/openai/gym:base 10 | 11 | test: 12 | docker build -f test.dockerfile -t quay.io/openai/gym:test . 13 | docker push quay.io/openai/gym:test 14 | 15 | upload: 16 | rm -rf dist 17 | python setup.py sdist 18 | twine upload dist/* 19 | 20 | docker-build: 21 | docker build -t quay.io/openai/gym . 22 | 23 | docker-run: 24 | docker run -ti quay.io/openai/gym bash 25 | -------------------------------------------------------------------------------- /src/gym-adv/Manifest.in: -------------------------------------------------------------------------------- 1 | recursive-include gym 2 | -------------------------------------------------------------------------------- /src/gym-adv/README.md: -------------------------------------------------------------------------------- 1 | > Under Development 2 | # Gym environments with adversarial disturbance agents 3 | 4 | This contains the adversarial environments used in our work on Robust Adversarial Reinforcement Learning ([RARL](https://arxiv.org/abs/1703.02702)). We heavily build on OpenAI Gym. 5 | 6 | ## Getting Started 7 | 8 | The environments are based on the MuJoCo environments wrapped by OpenAI Gym's environments ([info](https://gym.openai.com/envs#mujoco)). For more information on OpenAI Gym environments refer to the [Gym webpage](https://gym.openai.com/). 9 | 10 | Since these environments use the OpenAI pyhton bindings for the MuJoCo environments, you'll need to install `mujoco-py` following [this](https://github.com/openai/mujoco-py). 11 | 12 | ## Example 13 | 14 | ```python 15 | import gym 16 | E = gym.make('InvertedPendulumAdv-v1') 17 | current_observation = E.reset() 18 | 19 | # Set maximum adversary force 20 | E.update_adversary(6) 21 | 22 | # Get a sample action 23 | u = E.sample_action() 24 | # u.pro corresponds to protagonist action, while u.adv corresponds to the adversary's action 25 | 26 | # Perform action 27 | new_observation, reward, done, ~ = E.step(u) 28 | ``` 29 | 30 | ## Contact 31 | Lerrel Pinto -- lerrelpATcsDOTcmuDOTedu. 32 | -------------------------------------------------------------------------------- /src/gym-adv/bin/docker_entrypoint: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script is the entrypoint for our Docker image. 4 | 5 | set -e 6 | 7 | path=$(cd $(dirname "$0") && pwd) 8 | 9 | [ -z "${MUJOCO_KEY_BUNDLE}" ] || ( mkdir -p ~/.mujoco && curl https://openai-public.s3-us-west-2.amazonaws.com/mujoco/$MUJOCO_KEY_BUNDLE.tar.gz | tar xz -C ~/.mujoco ) 10 | 11 | # Set up display; otherwise rendering will fail 12 | rm -f /tmp/.X12-lock 13 | "$path/../vendor/Xdummy" :12 & 14 | export DISPLAY=:12 15 | 16 | # Wait for the file to come up 17 | display=12 18 | file="/tmp/.X11-unix/X$display" 19 | for i in $(seq 1 10); do 20 | if [ -e "$file" ]; then 21 | break 22 | fi 23 | 24 | echo "Waiting for $file to be created (try $i/10)" 25 | sleep "$i" 26 | done 27 | if ! [ -e "$file" ]; then 28 | echo "Timing out: $file was not created" 29 | exit 1 30 | fi 31 | 32 | exec "$@" 33 | -------------------------------------------------------------------------------- /src/gym-adv/docs/environments.md: -------------------------------------------------------------------------------- 1 | # Environments 2 | 3 | The gym comes prepackaged with many many environments. It's this common api around many environments that makes the gym so great. Here we will list additional environments that do not come prepacked with the gym. Submit another to this list via a pull-request. 4 | 5 | _**NOTICE**: Its possible that in time OpenAI will develop a full fledged repository of suplimental environments. Until this this bit of markdown will suffice._ 6 | 7 | ## PGE: Parallel Game Engine 8 | 9 | PGE is a FOSS 3D engine for AI simulations, and can interoperate with the Gym. Contains environments with modern 3D graphics, and uses Bullet for physics. 10 | 11 | Learn more here: https://github.com/222464/PGE 12 | 13 | ## gym-inventory: Inventory Control Environments 14 | 15 | gym-inventory is a single agent domain featuring discrete state and action spaces that an AI agent might encounter in inventory control problems. 16 | 17 | Learn more here: https://github.com/paulhendricks/gym-inventory 18 | 19 | ## gym-gazebo: training Robots in Gazebo 20 | 21 | gym-gazebo presents an extension of the initial OpenAI gym for robotics using ROS and Gazebo, an advanced 3D modeling and 22 | rendering tool. 23 | 24 | Learn more here: https://github.com/erlerobot/gym-gazebo/ 25 | -------------------------------------------------------------------------------- /src/gym-adv/docs/misc.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | 3 | Here we have a bunch of tools, libs, apis, tutorials, resources, etc. provided by the community to add value to the gym ecosystem. 4 | 5 | ## OpenAIGym.jl 6 | 7 | Convenience wrapper of the OpenAI Gym for the Julia language [/tbreloff/OpenAIGym.jl](https://github.com/tbreloff/OpenAIGym.jl) -------------------------------------------------------------------------------- /src/gym-adv/docs/readme.md: -------------------------------------------------------------------------------- 1 | #Table of Contents 2 | 3 | - [Agents](agents.md) contains a listing of agents compatible with gym environments. Agents facilitate the running of an algorithm against an environment. 4 | 5 | - [Environments](environments.md) lists more environments to run your algorithms against. These do not come prepackaged with the gym. 6 | 7 | - [Miscellaneous](misc.md) is a collection of other value-add tools and utilities. These could be anything from a small convenience lib to a collection of video tutorials or a new language binding. -------------------------------------------------------------------------------- /src/gym-adv/examples/agents/_policies.py: -------------------------------------------------------------------------------- 1 | # Support code for cem.py 2 | 3 | class BinaryActionLinearPolicy(object): 4 | def __init__(self, theta): 5 | self.w = theta[:-1] 6 | self.b = theta[-1] 7 | def act(self, ob): 8 | y = ob.dot(self.w) + self.b 9 | a = int(y < 0) 10 | return a 11 | 12 | class ContinuousActionLinearPolicy(object): 13 | def __init__(self, theta, n_in, n_out): 14 | assert len(theta) == (n_in + 1) * n_out 15 | self.W = theta[0 : n_in * n_out].reshape(n_in, n_out) 16 | self.b = theta[n_in * n_out : None].reshape(1, n_out) 17 | def act(self, ob): 18 | a = ob.dot(self.W) + self.b 19 | return a 20 | -------------------------------------------------------------------------------- /src/gym-adv/examples/scripts/list_envs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from gym import envs 3 | envids = [spec.id for spec in envs.registry.all()] 4 | for envid in sorted(envids): 5 | print(envid) 6 | -------------------------------------------------------------------------------- /src/gym-adv/examples/scripts/play_go: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from six.moves import input as raw_input 3 | import argparse 4 | import pachi_py 5 | import gym 6 | from gym import spaces, envs 7 | from gym.envs.board_game import go 8 | 9 | def main(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--raw_actions', action='store_true') 12 | args = parser.parse_args() 13 | 14 | env = envs.make('Go9x9-v0') 15 | env.reset() 16 | while True: 17 | s = env._state 18 | env._render() 19 | 20 | colorstr = pachi_py.color_to_str(s.color) 21 | if args.raw_actions: 22 | a = int(raw_input('{} (raw)> '.format(colorstr))) 23 | else: 24 | coordstr = raw_input('{}> '.format(colorstr)) 25 | a = go.str_to_action(s.board, coordstr) 26 | 27 | _, r, done, _ = env.step(a) 28 | if done: 29 | break 30 | 31 | print 32 | print('You win!' if r > 0 else 'Opponent wins!') 33 | print('Final score:', env._state.board.official_score) 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /src/gym-adv/gym/benchmarks/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/benchmarks/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/configuration.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | import gym 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | root_logger = logging.getLogger() 9 | 10 | # Should be "gym", but we'll support people doing somewhat crazy 11 | # things. 12 | package_name = '.'.join(__name__.split('.')[:-1]) 13 | gym_logger = logging.getLogger(package_name) 14 | 15 | # Should be modified only by official Gym plugins. This is an 16 | # unsupported API and may be removed in future versions. 17 | _extra_loggers = [gym_logger] 18 | 19 | # Set up the default handler 20 | formatter = logging.Formatter('[%(asctime)s] %(message)s') 21 | handler = logging.StreamHandler(sys.stderr) 22 | handler.setFormatter(formatter) 23 | 24 | # We need to take in the gym logger explicitly since this is called 25 | # at initialization time. 26 | def logger_setup(_=None): 27 | # This used to take in an argument; we still take an (ignored) 28 | # argument for compatibility. 29 | root_logger.addHandler(handler) 30 | for logger in _extra_loggers: 31 | logger.setLevel(logging.INFO) 32 | 33 | def undo_logger_setup(): 34 | """Undoes the automatic logging setup done by OpenAI Gym. You should call 35 | this function if you want to manually configure logging 36 | yourself. Typical usage would involve putting something like the 37 | following at the top of your script: 38 | 39 | gym.undo_logger_setup() 40 | logger = logging.getLogger() 41 | logger.addHandler(logging.StreamHandler(sys.stderr)) 42 | """ 43 | root_logger.removeHandler(handler) 44 | for logger in _extra_loggers: 45 | logger.setLevel(logging.NOTSET) 46 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic.copy_ import CopyEnv 2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv 3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv 4 | from gym.envs.algorithmic.reverse import ReverseEnv 5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv 6 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/copy_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | import numpy as np 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, base=5, chars=True): 10 | super(CopyEnv, self).__init__(base=base, chars=chars) 11 | 12 | def target_from_input_data(self, input_data): 13 | return input_data 14 | 15 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/duplicated_input.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to return every nth character from the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from __future__ import division 6 | import numpy as np 7 | from gym.envs.algorithmic import algorithmic_env 8 | 9 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv): 10 | def __init__(self, duplication=2, base=5): 11 | self.duplication = duplication 12 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True) 13 | 14 | def generate_input_data(self, size): 15 | res = [] 16 | if size < self.duplication: 17 | size = self.duplication 18 | for i in range(size//self.duplication): 19 | char = self.np_random.randint(self.base) 20 | for _ in range(self.duplication): 21 | res.append(char) 22 | return res 23 | 24 | def target_from_input_data(self, input_data): 25 | return [input_data[i] for i in range(0, len(input_data), self.duplication)] 26 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/repeat_copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content multiple times from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | import numpy as np 6 | from gym.envs.algorithmic import algorithmic_env 7 | 8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | def __init__(self, base=5): 11 | super(RepeatCopyEnv, self).__init__(base=base, chars=True) 12 | self.last = 50 13 | 14 | def target_from_input_data(self, input_data): 15 | return input_data + list(reversed(input_data)) + input_data 16 | 17 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/reverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to reverse content over the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | 6 | import numpy as np 7 | from gym.envs.algorithmic import algorithmic_env 8 | 9 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv): 10 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 11 | def __init__(self, base=2): 12 | super(ReverseEnv, self).__init__(base=base, chars=True, starting_min_length=1) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_str): 16 | return list(reversed(input_str)) 17 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/reversed_addition.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from gym.envs.algorithmic import algorithmic_env 4 | 5 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv): 6 | def __init__(self, rows=2, base=3): 7 | super(ReversedAdditionEnv, self).__init__(rows=rows, base=base, chars=False) 8 | 9 | def target_from_input_data(self, input_strings): 10 | curry = 0 11 | target = [] 12 | for digits in input_strings: 13 | total = sum(digits) + curry 14 | target.append(total % self.base) 15 | curry = total // self.base 16 | 17 | if curry > 0: 18 | target.append(curry) 19 | return target 20 | 21 | @property 22 | def time_limit(self): 23 | # Quirk preserved for the sake of consistency: add the length of the input 24 | # rather than the length of the desired output (which may differ if there's 25 | # an extra carried digit). 26 | # TODO: It seems like this time limit is so strict as to make Addition3-v0 27 | # unsolvable, since agents aren't even given enough time steps to look at 28 | # all the digits. (The solutions on the scoreboard seem to only work by 29 | # save-scumming.) 30 | return self.input_width*2 + 4 31 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.algorithmic/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/.algorithmic/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.atari/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.atari.atari_env import AtariEnv 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.board_game/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.board_game.go import GoEnv 2 | from gym.envs.board_game.hex import HexEnv 3 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.box2d/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.box2d.lunar_lander import LunarLander 2 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 3 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 4 | from gym.envs.box2d.car_racing import CarRacing 5 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.classical_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.classical_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/.classical_control/assets/clockwise.png -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.dafety/README.md: -------------------------------------------------------------------------------- 1 | # Safety series README 2 | 3 | This README is to document AI safety issues that have not yet been addressed by the environments in the safety series. 4 | 5 | ## Possible envs 6 | - Wireheading / Delusion Box 7 | - IRL 8 | 9 | ## Impossible envs 10 | - Env modifying agents (breaks the cartesian barrier) 11 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.dafety/__init__.py: -------------------------------------------------------------------------------- 1 | # interpretability envs 2 | from gym.envs.safety.predict_actions_cartpole import PredictActionsCartpoleEnv 3 | from gym.envs.safety.predict_obs_cartpole import PredictObsCartpoleEnv 4 | 5 | # semi_supervised envs 6 | from gym.envs.safety.semisuper import \ 7 | SemisuperPendulumNoiseEnv, SemisuperPendulumRandomEnv, SemisuperPendulumDecayEnv 8 | 9 | # off_switch envs 10 | from gym.envs.safety.offswitch_cartpole import OffSwitchCartpoleEnv 11 | from gym.envs.safety.offswitch_cartpole_prob import OffSwitchCartpoleProbEnv 12 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.mujoco.ant import AntEnv 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | from gym.envs.mujoco.hopper import HopperEnv 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym.envs.mujoco.humanoid import HumanoidEnv 9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 11 | from gym.envs.mujoco.reacher import ReacherEnv 12 | from gym.envs.mujoco.swimmer import SwimmerEnv 13 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 14 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _step(self, action): 11 | xposbefore = self.model.data.qpos[0,0] 12 | self.do_simulation(action, self.frame_skip) 13 | xposafter = self.model.data.qpos[0,0] 14 | ob = self._get_obs() 15 | reward_ctrl = - 0.1 * np.square(action).sum() 16 | reward_run = (xposafter - xposbefore)/self.dt 17 | reward = reward_ctrl + reward_run 18 | done = False 19 | return ob, reward, done, dict(reward_run = reward_run, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | return np.concatenate([ 23 | self.model.data.qpos.flat[1:], 24 | self.model.data.qvel.flat, 25 | ]) 26 | 27 | def reset_model(self): 28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 30 | self.set_state(qpos, qvel) 31 | return self._get_obs() 32 | 33 | def viewer_setup(self): 34 | self.viewer.cam.distance = self.model.stat.extent * 0.5 35 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _step(self, a): 11 | posbefore = self.model.data.qpos[0,0] 12 | self.do_simulation(a, self.frame_skip) 13 | posafter,height,ang = self.model.data.qpos[0:3,0] 14 | alive_bonus = 1.0 15 | reward = (posafter - posbefore) / self.dt 16 | reward += alive_bonus 17 | reward -= 1e-3 * np.square(a).sum() 18 | s = self.state_vector() 19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and 20 | (height > .7) and (abs(ang) < .2)) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | return np.concatenate([ 26 | self.model.data.qpos.flat[1:], 27 | np.clip(self.model.data.qvel.flat,-10,10) 28 | ]) 29 | 30 | def reset_model(self): 31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq) 32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | self.set_state(qpos, qvel) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.75 39 | self.viewer.cam.lookat[2] += .8 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def _step(self, action): 12 | self.do_simulation(action, self.frame_skip) 13 | ob = self._get_obs() 14 | x, _, y = self.model.data.site_xpos[0] 15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 16 | v1, v2 = self.model.data.qvel[1:3] 17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 18 | alive_bonus = 10 19 | r = (alive_bonus - dist_penalty - vel_penalty)[0] 20 | done = bool(y <= 1) 21 | return ob, r, done, {} 22 | 23 | def _get_obs(self): 24 | return np.concatenate([ 25 | self.model.data.qpos[:1], # cart x pos 26 | np.sin(self.model.data.qpos[1:]), # link angles 27 | np.cos(self.model.data.qpos[1:]), 28 | np.clip(self.model.data.qvel, -10, 10), 29 | np.clip(self.model.data.qfrc_constraint, -10, 10) 30 | ]).ravel() 31 | 32 | def reset_model(self): 33 | self.set_state( 34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1 36 | ) 37 | return self._get_obs() 38 | 39 | def viewer_setup(self): 40 | v = self.viewer 41 | v.cam.trackbodyid=0 42 | v.cam.distance = v.model.stat.extent * 0.5 43 | v.cam.lookat[2] += 3#v.model.stat.center[2] 44 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2) 9 | 10 | def _step(self, a): 11 | reward = 1.0 12 | self.do_simulation(a, self.frame_skip) 13 | ob = self._get_obs() 14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) 15 | done = not notdone 16 | return ob, reward, done, {} 17 | 18 | def reset_model(self): 19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01) 20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01) 21 | self.set_state(qpos, qvel) 22 | return self._get_obs() 23 | 24 | def _get_obs(self): 25 | return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel() 26 | 27 | def viewer_setup(self): 28 | v = self.viewer 29 | v.cam.trackbodyid=0 30 | v.cam.distance = v.model.stat.extent 31 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/reacher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2) 9 | 10 | def _step(self, a): 11 | vec = self.get_body_com("fingertip")-self.get_body_com("target") 12 | reward_dist = - np.linalg.norm(vec) 13 | reward_ctrl = - np.square(a).sum() 14 | reward = reward_dist + reward_ctrl 15 | self.do_simulation(a, self.frame_skip) 16 | ob = self._get_obs() 17 | done = False 18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 19 | 20 | def viewer_setup(self): 21 | self.viewer.cam.trackbodyid=0 22 | 23 | def reset_model(self): 24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos 25 | while True: 26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) 27 | if np.linalg.norm(self.goal) < 2: break 28 | qpos[-2:] = self.goal 29 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 30 | qvel[-2:] = 0 31 | self.set_state(qpos, qvel) 32 | return self._get_obs() 33 | 34 | def _get_obs(self): 35 | theta = self.model.data.qpos.flat[:2] 36 | return np.concatenate([ 37 | np.cos(theta), 38 | np.sin(theta), 39 | self.model.data.qpos.flat[2:], 40 | self.model.data.qvel.flat[:2], 41 | self.get_body_com("fingertip") - self.get_body_com("target") 42 | ]) 43 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _step(self, a): 11 | ctrl_cost_coeff = 0.0001 12 | xposbefore = self.model.data.qpos[0,0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.model.data.qpos[0,0] 15 | reward_fwd = (xposafter - xposbefore) / self.dt 16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() 17 | reward = reward_fwd + reward_ctrl 18 | ob = self._get_obs() 19 | return ob, reward, False, dict(reward_fwd = reward_fwd, reward_ctrl=reward_ctrl) 20 | 21 | 22 | def _get_obs(self): 23 | qpos = self.model.data.qpos 24 | qvel = self.model.data.qvel 25 | return np.concatenate([qpos.flat[2:], qvel.flat]) 26 | 27 | def reset_model(self): 28 | self.set_state( 29 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 30 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv) 31 | ) 32 | return self._get_obs() 33 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def _step(self, a): 12 | posbefore = self.model.data.qpos[0,0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter,height,ang = self.model.data.qpos[0:3,0] 15 | alive_bonus = 1.0 16 | reward = ((posafter - posbefore) / self.dt ) 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 20 | and ang > -1.0 and ang < 1.0) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | qpos = self.model.data.qpos 26 | qvel = self.model.data.qvel 27 | return np.concatenate([qpos[1:], np.clip(qvel,-10,10)]).ravel() 28 | 29 | def reset_model(self): 30 | self.set_state( 31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq), 32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | ) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.5 39 | self.viewer.cam.lookat[2] += .8 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.parameter_tuning/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.parameter_tuning.convergence import ConvergenceControl 2 | from gym.envs.parameter_tuning.train_deep_cnn import CNNClassifierTraining 3 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.roulette import RouletteEnv 3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 4 | from gym.envs.toy_text.nchain import NChainEnv 5 | from gym.envs.toy_text.hotter_colder import HotterColder 6 | from gym.envs.toy_text.guessing_game import GuessingGame 7 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.toy_text/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | def categorical_sample(prob_n, np_random): 7 | """ 8 | Sample from categorical distribution 9 | Each row specifies class probabilities 10 | """ 11 | prob_n = np.asarray(prob_n) 12 | csprob_n = np.cumsum(prob_n) 13 | return (csprob_n > np_random.rand()).argmax() 14 | 15 | 16 | class DiscreteEnv(Env): 17 | 18 | """ 19 | Has the following members 20 | - nS: number of states 21 | - nA: number of actions 22 | - P: transitions (*) 23 | - isd: initial state distribution (**) 24 | 25 | (*) dictionary dict of dicts of lists, where 26 | P[s][a] == [(probability, nextstate, reward, done), ...] 27 | (**) list or array of length nS 28 | 29 | 30 | """ 31 | def __init__(self, nS, nA, P, isd): 32 | self.P = P 33 | self.isd = isd 34 | self.lastaction=None # for rendering 35 | self.nS = nS 36 | self.nA = nA 37 | 38 | self.action_space = spaces.Discrete(self.nA) 39 | self.observation_space = spaces.Discrete(self.nS) 40 | 41 | self._seed() 42 | self._reset() 43 | 44 | def _seed(self, seed=None): 45 | self.np_random, seed = seeding.np_random(seed) 46 | return [seed] 47 | 48 | def _reset(self): 49 | self.s = categorical_sample(self.isd, self.np_random) 50 | return self.s 51 | 52 | def _step(self, a): 53 | transitions = self.P[self.s][a] 54 | i = categorical_sample([t[0] for t in transitions], self.np_random) 55 | p, s, r, d= transitions[i] 56 | self.s = s 57 | self.lastaction=a 58 | return (s, r, d, {"prob" : p}) 59 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/.toy_text/roulette.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | from gym.utils import seeding 6 | 7 | 8 | class RouletteEnv(gym.Env): 9 | """Simple roulette environment 10 | 11 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up, 12 | you win a reward of 35. If the parity of your bet matches the parity 13 | of the spin, you win 1. Otherwise you receive a reward of -1. 14 | 15 | The long run reward for playing 0 should be -1/37 for any state 16 | 17 | The last action (38) stops the rollout for a return of 0 (walking away) 18 | """ 19 | def __init__(self, spots=37): 20 | self.n = spots + 1 21 | self.action_space = spaces.Discrete(self.n) 22 | self.observation_space = spaces.Discrete(1) 23 | self._seed() 24 | 25 | def _seed(self, seed=None): 26 | self.np_random, seed = seeding.np_random(seed) 27 | return [seed] 28 | 29 | def _step(self, action): 30 | assert self.action_space.contains(action) 31 | if action == self.n - 1: 32 | # observation, reward, done, info 33 | return 0, 0, True, {} 34 | 35 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] 36 | val = self.np_random.randint(0, self.n - 1) 37 | if val == action == 0: 38 | reward = self.n - 2.0 39 | elif val != 0 and action != 0 and val % 2 == action % 2: 40 | reward = 1.0 41 | else: 42 | reward = -1.0 43 | return 0, reward, False, {} 44 | 45 | def _reset(self): 46 | return 0 47 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.adversarial.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.adversarial.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.adversarial.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.adversarial.classic_control.pendulum import PendulumEnv 5 | from gym.envs.adversarial.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/adversarial/.classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/dist/gym-0.5.6-py2.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/adversarial/.classic_control/dist/gym-0.5.6-py2.7.egg -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: gym 3 | Version: 0.5.6 4 | Summary: The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents. 5 | Home-page: https://github.com/openai/gym 6 | Author: OpenAI 7 | Author-email: gym@openai.com 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | gym.egg-info/PKG-INFO 2 | gym.egg-info/SOURCES.txt 3 | gym.egg-info/dependency_links.txt 4 | gym.egg-info/not-zip-safe 5 | gym.egg-info/requires.txt 6 | gym.egg-info/top_level.txt -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/not-zip-safe: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10.4 2 | requests>=2.0 3 | six 4 | pyglet>=1.2.0 5 | 6 | [all] 7 | PyOpenGL 8 | box2d-py 9 | keras 10 | theano 11 | atari_py>=0.0.17 12 | Pillow 13 | PyOpenGL 14 | pachi-py>=0.0.19 15 | mujoco_py>=0.4.3 16 | imageio 17 | 18 | [atari] 19 | atari_py>=0.0.17 20 | Pillow 21 | PyOpenGL 22 | 23 | [board_game] 24 | pachi-py>=0.0.19 25 | 26 | [box2d] 27 | box2d-py 28 | 29 | [classic_control] 30 | PyOpenGL 31 | 32 | [mujoco] 33 | mujoco_py>=0.4.3 34 | imageio 35 | 36 | [parameter_tuning] 37 | keras 38 | theano 39 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/.classic_control/gym.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/adversarial/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.adversarial.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.adversarial.mujoco.ant import AntEnv 5 | from gym.envs.adversarial.mujoco.ant_heel import AntHeelEnv 6 | from gym.envs.adversarial.mujoco.half_cheetah import HalfCheetahEnv 7 | from gym.envs.adversarial.mujoco.half_cheetah_heel import HalfCheetahHeelEnv 8 | from gym.envs.adversarial.mujoco.half_cheetah_torso import HalfCheetahTorsoEnv 9 | from gym.envs.adversarial.mujoco.hopper import HopperEnv 10 | from gym.envs.adversarial.mujoco.hopper_6 import Hopper6Env 11 | from gym.envs.adversarial.mujoco.hopper_heel import HopperHeelEnv 12 | from gym.envs.adversarial.mujoco.hopper_heel_6 import HopperHeel6Env 13 | from gym.envs.adversarial.mujoco.hopper_torso_6 import HopperTorso6Env 14 | from gym.envs.adversarial.mujoco.walker2d import Walker2dEnv 15 | from gym.envs.adversarial.mujoco.walker2d_heel import Walker2dHeelEnv 16 | from gym.envs.adversarial.mujoco.walker2d_torso import Walker2dTorsoEnv 17 | from gym.envs.adversarial.mujoco.humanoid import HumanoidEnv 18 | from gym.envs.adversarial.mujoco.humanoid_heel import HumanoidHeelEnv 19 | from gym.envs.adversarial.mujoco.inverted_pendulum import InvertedPendulumEnv 20 | from gym.envs.adversarial.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 21 | from gym.envs.adversarial.mujoco.reacher import ReacherEnv 22 | from gym.envs.adversarial.mujoco.swimmer import SwimmerEnv 23 | from gym.envs.adversarial.mujoco.humanoidstandup import HumanoidStandupEnv 24 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/adversarial/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/debugging/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.debugging.one_round_deterministic_reward import OneRoundDeterministicRewardEnv 2 | from gym.envs.debugging.two_round_deterministic_reward import TwoRoundDeterministicRewardEnv 3 | from gym.envs.debugging.one_round_nondeterministic_reward import OneRoundNondeterministicRewardEnv 4 | from gym.envs.debugging.two_round_nondeterministic_reward import TwoRoundNondeterministicRewardEnv 5 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/debugging/one_round_deterministic_reward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple environment with known optimal policy and value function. 3 | 4 | This environment has just two actions. 5 | Action 0 yields 0 reward and then terminates the session. 6 | Action 1 yields 1 reward and then terminates the session. 7 | 8 | Optimal policy: action 1. 9 | 10 | Optimal value function: v(0)=1 (there is only one state, state 0) 11 | """ 12 | 13 | import gym 14 | import random 15 | from gym import spaces 16 | 17 | class OneRoundDeterministicRewardEnv(gym.Env): 18 | def __init__(self): 19 | self.action_space = spaces.Discrete(2) 20 | self.observation_space = spaces.Discrete(1) 21 | self._reset() 22 | 23 | def _step(self, action): 24 | assert self.action_space.contains(action) 25 | if action: 26 | reward = 1 27 | else: 28 | reward = 0 29 | 30 | done = True 31 | return self._get_obs(), reward, done, {} 32 | 33 | def _get_obs(self): 34 | return 0 35 | 36 | def _reset(self): 37 | return self._get_obs() 38 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/debugging/one_round_nondeterministic_reward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple environment with known optimal policy and value function. 3 | 4 | This environment has just two actions. 5 | Action 0 yields randomly 0 or 5 reward and then terminates the session. 6 | Action 1 yields randomly 1 or 3 reward and then terminates the session. 7 | 8 | Optimal policy: action 0. 9 | 10 | Optimal value function: v(0)=2.5 (there is only one state, state 0) 11 | """ 12 | 13 | import gym 14 | from gym import spaces 15 | from gym.utils import seeding 16 | 17 | class OneRoundNondeterministicRewardEnv(gym.Env): 18 | def __init__(self): 19 | self.action_space = spaces.Discrete(2) 20 | self.observation_space = spaces.Discrete(1) 21 | self._seed() 22 | self._reset() 23 | 24 | def _step(self, action): 25 | assert self.action_space.contains(action) 26 | if action: 27 | #your agent should figure out that this option has expected value 2.5 28 | reward = self.np_random.choice([0, 5]) 29 | else: 30 | #your agent should figure out that this option has expected value 2.0 31 | reward = self.np_random.choice([1, 3]) 32 | 33 | done = True 34 | return self._get_obs(), reward, done, {} 35 | 36 | def _get_obs(self): 37 | return 0 38 | 39 | def _reset(self): 40 | return self._get_obs() 41 | 42 | def _seed(self, seed=None): 43 | self.np_random, seed = seeding.np_random(seed) 44 | return [seed] 45 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/debugging/two_round_deterministic_reward.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple environment with known optimal policy and value function. 3 | 4 | Action 0 then 0 yields 0 reward and terminates the session. 5 | Action 0 then 1 yields 3 reward and terminates the session. 6 | Action 1 then 0 yields 1 reward and terminates the session. 7 | Action 1 then 1 yields 2 reward and terminates the session. 8 | 9 | Optimal policy: action 0 then 1. 10 | 11 | Optimal value function v(observation): (this is a fully observable MDP so observation==state) 12 | 13 | v(0)= 3 (you get observation 0 after taking action 0) 14 | v(1)= 2 (you get observation 1 after taking action 1) 15 | v(2)= 3 (you get observation 2 in the starting state) 16 | """ 17 | 18 | import gym 19 | import random 20 | from gym import spaces 21 | 22 | class TwoRoundDeterministicRewardEnv(gym.Env): 23 | def __init__(self): 24 | self.action_space = spaces.Discrete(2) 25 | self.observation_space = spaces.Discrete(3) 26 | self._reset() 27 | 28 | def _step(self, action): 29 | rewards = [[0, 3], [1, 2]] 30 | 31 | assert self.action_space.contains(action) 32 | 33 | if self.firstAction is None: 34 | self.firstAction = action 35 | reward = 0 36 | done = False 37 | else: 38 | reward = rewards[self.firstAction][action] 39 | done = True 40 | 41 | return self._get_obs(), reward, done, {} 42 | 43 | def _get_obs(self): 44 | if self.firstAction is None: 45 | return 2 46 | else: 47 | return self.firstAction 48 | 49 | def _reset(self): 50 | self.firstAction = None 51 | return self._get_obs() 52 | -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/envs/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/envs/tests/test_registration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from gym import error, envs 3 | from gym.envs import registration 4 | from gym.envs.classic_control import cartpole 5 | 6 | def test_make(): 7 | env = envs.make('CartPole-v0') 8 | assert env.spec.id == 'CartPole-v0' 9 | assert isinstance(env, cartpole.CartPoleEnv) 10 | 11 | def test_make_deprecated(): 12 | try: 13 | envs.make('Humanoid-v0') 14 | except error.Error: 15 | pass 16 | else: 17 | assert False 18 | 19 | def test_spec(): 20 | spec = envs.spec('CartPole-v0') 21 | assert spec.id == 'CartPole-v0' 22 | 23 | def test_missing_lookup(): 24 | registry = registration.EnvRegistry() 25 | registry.register(id='Test-v0', entry_point=None) 26 | registry.register(id='Test-v15', entry_point=None) 27 | registry.register(id='Test-v9', entry_point=None) 28 | registry.register(id='Other-v100', entry_point=None) 29 | try: 30 | registry.spec('Test-v1') # must match an env name but not the version above 31 | except error.DeprecatedEnv: 32 | pass 33 | else: 34 | assert False 35 | 36 | try: 37 | registry.spec('Unknown-v1') 38 | except error.UnregisteredEnv: 39 | pass 40 | else: 41 | assert False 42 | 43 | def test_malformed_lookup(): 44 | registry = registration.EnvRegistry() 45 | try: 46 | registry.spec(u'“Breakout-v0”') 47 | except error.Error as e: 48 | assert 'malformed environment ID' in '{}'.format(e), 'Unexpected message: {}'.format(e) 49 | else: 50 | assert False 51 | -------------------------------------------------------------------------------- /src/gym-adv/gym/monitoring/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.monitoring.monitor import ( 2 | _open_monitors, 3 | detect_training_manifests, 4 | load_env_info_from_manifests, 5 | load_results, 6 | Monitor, 7 | ) 8 | from gym.monitoring.stats_recorder import StatsRecorder 9 | from gym.monitoring.video_recorder import VideoRecorder 10 | -------------------------------------------------------------------------------- /src/gym-adv/gym/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/monitoring/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | @contextlib.contextmanager 6 | def tempdir(): 7 | temp = tempfile.mkdtemp() 8 | yield temp 9 | shutil.rmtree(temp) 10 | -------------------------------------------------------------------------------- /src/gym-adv/gym/monitoring/tests/test_monitor_envs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from nose2 import tools 3 | import os 4 | 5 | import logging 6 | logger = logging.getLogger(__name__) 7 | 8 | from gym import envs 9 | from gym.monitoring.tests import helpers 10 | 11 | specs = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec._entry_point is not None] 12 | @tools.params(*specs) 13 | def test_renderable_after_monitor_close(spec): 14 | # TODO(gdb 2016-05-15): Re-enable these tests after fixing box2d-py 15 | if spec._entry_point.startswith('gym.envs.box2d:'): 16 | logger.warn("Skipping tests for box2d env {}".format(spec._entry_point)) 17 | return 18 | elif spec._entry_point.startswith('gym.envs.parameter_tuning:'): 19 | logger.warn("Skipping tests for parameter tuning".format(spec._entry_point)) 20 | return 21 | 22 | # Skip mujoco tests 23 | skip_mujoco = not (os.environ.get('MUJOCO_KEY_BUNDLE') or os.path.exists(os.path.expanduser('~/.mujoco'))) 24 | if skip_mujoco and spec._entry_point.startswith('gym.envs.mujoco:'): 25 | return 26 | 27 | with helpers.tempdir() as temp: 28 | env = spec.make() 29 | # Skip un-renderable envs 30 | if 'human' not in env.metadata.get('render.modes', []): 31 | return 32 | 33 | env.monitor.start(temp) 34 | env.reset() 35 | env.monitor.close() 36 | 37 | env.reset() 38 | env.render() 39 | env.render(close=True) 40 | 41 | env.close() 42 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/README.md: -------------------------------------------------------------------------------- 1 | # Client 2 | 3 | This client was forked from the (Stripe 4 | Python)[https://github.com/stripe/stripe-python] bindings. 5 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from gym import error 5 | 6 | logger = logging.getLogger(__name__) 7 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/scoreboard/client/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/tests/helper.py: -------------------------------------------------------------------------------- 1 | import mock 2 | import unittest 3 | import uuid 4 | 5 | def fake_id(prefix): 6 | entropy = ''.join([a for a in str(uuid.uuid4()) if a.isalnum()]) 7 | return '{}_{}'.format(prefix, entropy) 8 | 9 | class APITestCase(unittest.TestCase): 10 | def setUp(self): 11 | super(APITestCase, self).setUp() 12 | self.requestor_patcher = mock.patch('gym.scoreboard.client.api_requestor.APIRequestor') 13 | requestor_class_mock = self.requestor_patcher.start() 14 | self.requestor_mock = requestor_class_mock.return_value 15 | 16 | def mock_response(self, res): 17 | self.requestor_mock.request = mock.Mock(return_value=(res, 'reskey')) 18 | 19 | class TestData(object): 20 | @classmethod 21 | def file_upload_response(cls): 22 | return { 23 | 'id': fake_id('file'), 24 | 'object': 'file', 25 | } 26 | 27 | @classmethod 28 | def evaluation_response(cls): 29 | return { 30 | 'id': fake_id('file'), 31 | 'object': 'evaluation', 32 | } 33 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/tests/test_evaluation.py: -------------------------------------------------------------------------------- 1 | from gym.scoreboard.client.tests import helper 2 | from gym import scoreboard 3 | 4 | class EvaluationTest(helper.APITestCase): 5 | def test_create_evaluation(self): 6 | self.mock_response(helper.TestData.evaluation_response()) 7 | 8 | evaluation = scoreboard.Evaluation.create() 9 | assert isinstance(evaluation, scoreboard.Evaluation) 10 | 11 | self.requestor_mock.request.assert_called_with( 12 | 'post', 13 | '/v1/evaluations', 14 | {}, 15 | None 16 | ) 17 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/tests/test_file_upload.py: -------------------------------------------------------------------------------- 1 | from gym.scoreboard.client.tests import helper 2 | from gym import scoreboard 3 | 4 | class FileUploadTest(helper.APITestCase): 5 | def test_create_file_upload(self): 6 | self.mock_response(helper.TestData.file_upload_response()) 7 | 8 | file_upload = scoreboard.FileUpload.create() 9 | assert isinstance(file_upload, scoreboard.FileUpload), 'File upload is: {!r}'.format(file_upload) 10 | 11 | self.requestor_mock.request.assert_called_with( 12 | 'post', 13 | '/v1/files', 14 | params={}, 15 | ) 16 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/client/util.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | def utf8(value): 8 | if isinstance(value, unicode) and sys.version_info < (3, 0): 9 | return value.encode('utf-8') 10 | else: 11 | return value 12 | 13 | def file_size(f): 14 | return os.fstat(f.fileno()).st_size 15 | -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/scoreboard/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/scoreboard/tests/test_registration.py: -------------------------------------------------------------------------------- 1 | from gym.scoreboard import registration 2 | 3 | def test_correct_registration(): 4 | try: 5 | registration.registry.finalize(strict=True) 6 | except registration.RegistrationError as e: 7 | assert False, "Caught: {}".format(e) 8 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.box import Box 2 | from gym.spaces.discrete import Discrete 3 | from gym.spaces.multi_discrete import MultiDiscrete, DiscreteToMultiDiscrete, BoxToMultiDiscrete 4 | from gym.spaces.prng import seed 5 | from gym.spaces.tuple_space import Tuple 6 | 7 | __all__ = ["Box", "Discrete", "MultiDiscrete", "DiscreteToMultiDiscrete", "BoxToMultiDiscrete", "Tuple"] 8 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym.spaces import prng 5 | 6 | class Box(gym.Space): 7 | """ 8 | A box in R^n. 9 | I.e., each coordinate is bounded. 10 | 11 | Example usage: 12 | self.action_space = spaces.Box(low=-10, high=10, shape=(1,)) 13 | """ 14 | def __init__(self, low, high, shape=None): 15 | """ 16 | Two kinds of valid input: 17 | Box(-1.0, 1.0, (3,4)) # low and high are scalars, and shape is provided 18 | Box(np.array([-1.0,-2.0]), np.array([2.0,4.0])) # low and high are arrays of the same shape 19 | """ 20 | if shape is None: 21 | assert low.shape == high.shape 22 | self.low = low 23 | self.high = high 24 | else: 25 | assert np.isscalar(low) and np.isscalar(high) 26 | self.low = low + np.zeros(shape) 27 | self.high = high + np.zeros(shape) 28 | def sample(self): 29 | return prng.np_random.uniform(low=self.low, high=self.high, size=self.low.shape) 30 | def contains(self, x): 31 | return x.shape == self.shape and (x >= self.low).all() and (x <= self.high).all() 32 | 33 | def to_jsonable(self, sample_n): 34 | return np.array(sample_n).tolist() 35 | def from_jsonable(self, sample_n): 36 | return [np.asarray(sample) for sample in sample_n] 37 | 38 | @property 39 | def shape(self): 40 | return self.low.shape 41 | def __repr__(self): 42 | return "Box" + str(self.shape) 43 | def __eq__(self, other): 44 | return np.allclose(self.low, other.low) and np.allclose(self.high, other.high) 45 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym, time 4 | from gym.spaces import prng 5 | 6 | class Discrete(gym.Space): 7 | """ 8 | {0,1,...,n-1} 9 | 10 | Example usage: 11 | self.observation_space = spaces.Discrete(2) 12 | """ 13 | def __init__(self, n): 14 | self.n = n 15 | def sample(self): 16 | return prng.np_random.randint(self.n) 17 | def contains(self, x): 18 | if isinstance(x, int): 19 | as_int = x 20 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.kind in np.typecodes['AllInteger'] and x.shape == ()): 21 | as_int = int(x) 22 | else: 23 | return False 24 | return as_int >= 0 and as_int < self.n 25 | def __repr__(self): 26 | return "Discrete(%d)" % self.n 27 | def __eq__(self, other): 28 | return self.n == other.n 29 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/prng.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | np_random = numpy.random.RandomState() 4 | 5 | def seed(seed=None): 6 | """Seed the common numpy.random.RandomState used in spaces 7 | 8 | CF 9 | https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277 10 | for some details about why we seed the spaces separately from the 11 | envs, but tl;dr is that it's pretty uncommon for them to be used 12 | within an actual algorithm, and the code becomes simpler to just 13 | use this common numpy.random.RandomState. 14 | """ 15 | np_random.seed(seed) 16 | 17 | # This numpy.random.RandomState gets used in all spaces for their 18 | # 'sample' method. It's not really expected that people will be using 19 | # these in their algorithms. 20 | seed(0) 21 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/gym-adv/gym/spaces/tests/__init__.py -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/tests/test_spaces.py: -------------------------------------------------------------------------------- 1 | import json # note: ujson fails this test due to float equality 2 | 3 | import numpy as np 4 | from nose2 import tools 5 | 6 | from gym.spaces import Tuple, Box, Discrete, MultiDiscrete 7 | 8 | @tools.params(Discrete(3), 9 | Tuple([Discrete(5), Discrete(10)]), 10 | Tuple([Discrete(5), Box(np.array([0,0]),np.array([1,5]))]), 11 | Tuple((Discrete(5), Discrete(2), Discrete(2))), 12 | MultiDiscrete([ [0, 1], [0, 1], [0, 100] ]), 13 | ) 14 | def test_roundtripping(space): 15 | sample_1 = space.sample() 16 | sample_2 = space.sample() 17 | assert space.contains(sample_1) 18 | assert space.contains(sample_2) 19 | json_rep = space.to_jsonable([sample_1, sample_2]) 20 | 21 | json_roundtripped = json.loads(json.dumps(json_rep)) 22 | 23 | samples_after_roundtrip = space.from_jsonable(json_roundtripped) 24 | sample_1_prime, sample_2_prime = samples_after_roundtrip 25 | 26 | s1 = space.to_jsonable([sample_1]) 27 | s1p = space.to_jsonable([sample_1_prime]) 28 | s2 = space.to_jsonable([sample_2]) 29 | s2p = space.to_jsonable([sample_2_prime]) 30 | assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p) 31 | assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p) 32 | -------------------------------------------------------------------------------- /src/gym-adv/gym/spaces/tuple_space.py: -------------------------------------------------------------------------------- 1 | from gym import Space 2 | 3 | class Tuple(Space): 4 | """ 5 | A tuple (i.e., product) of simpler spaces 6 | 7 | Example usage: 8 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3))) 9 | """ 10 | def __init__(self, spaces): 11 | self.spaces = spaces 12 | 13 | def sample(self): 14 | return tuple([space.sample() for space in self.spaces]) 15 | 16 | def contains(self, x): 17 | if isinstance(x, list): 18 | x = tuple(x) # Promote list to tuple for contains check 19 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all( 20 | space.contains(part) for (space,part) in zip(self.spaces,x)) 21 | 22 | def __repr__(self): 23 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")" 24 | 25 | def to_jsonable(self, sample_n): 26 | # serialize as list-repr of tuple of vectors 27 | return [space.to_jsonable([sample[i] for sample in sample_n]) \ 28 | for i, space in enumerate(self.spaces)] 29 | 30 | def from_jsonable(self, sample_n): 31 | return zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)]) 32 | -------------------------------------------------------------------------------- /src/gym-adv/gym/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from gym import core 2 | 3 | class ArgumentEnv(core.Env): 4 | calls = 0 5 | 6 | def __init__(self, arg): 7 | self.calls += 1 8 | self.arg = arg 9 | 10 | def test_env_instantiation(): 11 | # This looks like a pretty trivial, but given our usage of 12 | # __new__, it's worth having. 13 | env = ArgumentEnv('arg') 14 | assert env.arg == 'arg' 15 | assert env.calls == 1 16 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | from .reraise import reraise 11 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight = False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | # Import six here so that `utils` has no import-time dependencies. 25 | # We want this since we use `utils` during our import-time sanity checks 26 | # that verify that our dependencies (including six) are actually present. 27 | import six 28 | 29 | attr = [] 30 | num = color2num[color] 31 | if highlight: num += 10 32 | attr.append(six.u(str(num))) 33 | if bold: attr.append(six.u('1')) 34 | attrs = six.u(';').join(attr) 35 | return six.u('\x1b[%sm%s\x1b[0m') % (attrs, string) 36 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | def __init__(self, *args, **kwargs): 21 | self._ezpickle_args = args 22 | self._ezpickle_kwargs = kwargs 23 | def __getstate__(self): 24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs} 25 | def __setstate__(self, d): 26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 27 | self.__dict__.update(out.__dict__) 28 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/reraise.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # We keep the actual reraising in different modules, since the 4 | # reraising code uses syntax mutually exclusive to Python 2/3. 5 | if sys.version_info[0] < 3: 6 | from .reraise_impl_py2 import reraise_impl 7 | else: 8 | from .reraise_impl_py3 import reraise_impl 9 | 10 | def reraise(prefix=None, suffix=None): 11 | old_exc_type, old_exc_value, traceback = sys.exc_info() 12 | if old_exc_value is None: 13 | old_exc_value = old_exc_type() 14 | 15 | e = ReraisedException(old_exc_value, prefix, suffix) 16 | 17 | reraise_impl(e, traceback) 18 | 19 | # http://stackoverflow.com/a/13653312 20 | def full_class_name(o): 21 | module = o.__class__.__module__ 22 | if module is None or module == str.__class__.__module__: 23 | return o.__class__.__name__ 24 | return module + '.' + o.__class__.__name__ 25 | 26 | class ReraisedException(Exception): 27 | def __init__(self, old_exc, prefix, suffix): 28 | self.old_exc = old_exc 29 | self.prefix = prefix 30 | self.suffix = suffix 31 | 32 | def __str__(self): 33 | klass = self.old_exc.__class__ 34 | 35 | orig = "%s: %s" % (full_class_name(self.old_exc), klass.__str__(self.old_exc)) 36 | prefixpart = suffixpart = '' 37 | if self.prefix is not None: 38 | prefixpart = self.prefix + "\n" 39 | if self.suffix is not None: 40 | suffixpart = "\n\n" + self.suffix 41 | return "%sThe original exception was:\n\n%s%s" % (prefixpart, orig, suffixpart) 42 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/reraise_impl_py2.py: -------------------------------------------------------------------------------- 1 | def reraise_impl(e, traceback): 2 | raise e.__class__, e, traceback 3 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/reraise_impl_py3.py: -------------------------------------------------------------------------------- 1 | # http://stackoverflow.com/a/33822606 -- `from None` disables Python 3' 2 | # semi-smart exception chaining, which we don't want in this case. 3 | def reraise_impl(e, traceback): 4 | raise e.with_traceback(traceback) from None 5 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/tests/test_atexit.py: -------------------------------------------------------------------------------- 1 | from gym.utils.closer import Closer 2 | 3 | class Closeable(object): 4 | close_called = False 5 | def close(self): 6 | self.close_called = True 7 | 8 | def test_register_unregister(): 9 | registry = Closer(atexit_register=False) 10 | c1 = Closeable() 11 | c2 = Closeable() 12 | 13 | assert not c1.close_called 14 | assert not c2.close_called 15 | registry.register(c1) 16 | id2 = registry.register(c2) 17 | 18 | registry.unregister(id2) 19 | registry.close() 20 | assert c1.close_called 21 | assert not c2.close_called 22 | -------------------------------------------------------------------------------- /src/gym-adv/gym/utils/tests/test_seeding.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.utils import seeding 3 | 4 | def test_invalid_seeds(): 5 | for seed in [-1, 'test']: 6 | try: 7 | seeding.np_random(seed) 8 | except error.Error: 9 | pass 10 | else: 11 | assert False, 'Invalid seed {} passed validation'.format(seed) 12 | 13 | def test_valid_seeds(): 14 | for seed in [0, 1]: 15 | random, seed1 = seeding.np_random(seed) 16 | assert seed == seed1 17 | -------------------------------------------------------------------------------- /src/gym-adv/gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.5.6' 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/wrappers/README.md: -------------------------------------------------------------------------------- 1 | # Wrappers (experimental) 2 | 3 | This is a placeholder for now: we will likely soon start adding 4 | standardized wrappers for environments. (Only stable and 5 | general-purpose wrappers will be accepted into gym core.) 6 | 7 | Note that we may later restructure any of the files, but will keep the 8 | wrappers available at the wrappers' top-level folder. So for 9 | example, you should access `MyWrapper` as follows: 10 | 11 | ``` 12 | # Will be supported in future releases 13 | from gym.wrappers import MyWrapper 14 | ``` 15 | 16 | ## How to add new wrappers to Gym 17 | 18 | 1. Write your wrapper in the wrappers' top-level folder. 19 | 2. Import your wrapper into the `__init__.py` file. This file is located at `/gym/wrappers/__init__.py`. Add `from gym.wrappers.my_awesome_wrapper import MyWrapper` to this file. 20 | 3. Write a good description of the utility of your wrapper using python docstring format (""" """ under the class definition) 21 | 22 | 23 | ## Quick Tips 24 | 25 | - Don't forget to call super(class_name, self).__init__(env) if you override the wrapper's __init__ function 26 | - You can access the inner environment with `self.unwrapped` 27 | - You can access the previous layer using `self.env` 28 | - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer 29 | - Create a wrapped function for at least one of the following: `__init__(self, env)`, `_step`, `_reset`, `_render`, `_close`, `_configure`, or `_seed` 30 | - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) 31 | -------------------------------------------------------------------------------- /src/gym-adv/gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.wrappers.frame_skipping import SkipWrapper 2 | -------------------------------------------------------------------------------- /src/gym-adv/gym/wrappers/frame_skipping.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | __all__ = ['SkipWrapper'] 4 | 5 | def SkipWrapper(repeat_count): 6 | class SkipWrapper(gym.Wrapper): 7 | """ 8 | Generic common frame skipping wrapper 9 | Will perform action for `x` additional steps 10 | """ 11 | def __init__(self, env): 12 | super(SkipWrapper, self).__init__(env) 13 | self.repeat_count = repeat_count 14 | self.stepcount = 0 15 | 16 | def _step(self, action): 17 | done = False 18 | total_reward = 0 19 | current_step = 0 20 | while current_step < (self.repeat_count + 1) and not done: 21 | self.stepcount += 1 22 | obs, reward, done, info = self.env.step(action) 23 | total_reward += reward 24 | current_step += 1 25 | if 'skip.stepcount' in info: 26 | raise gym.error.Error('Key "skip.stepcount" already in info. Make sure you are not stacking ' \ 27 | 'the SkipWrapper wrappers.') 28 | info['skip.stepcount'] = self.stepcount 29 | return obs, total_reward, done, info 30 | 31 | def _reset(self): 32 | self.stepcount = 0 33 | return self.env.reset() 34 | 35 | return SkipWrapper 36 | -------------------------------------------------------------------------------- /src/gym-adv/gym/wrappers/tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.wrappers import SkipWrapper 3 | 4 | def test_skip(): 5 | every_two_frame = SkipWrapper(2) 6 | env = gym.make("FrozenLake-v0") 7 | env = every_two_frame(env) 8 | obs = env.reset() 9 | env.render() 10 | -------------------------------------------------------------------------------- /src/gym-adv/misc/check_envs_for_change.py: -------------------------------------------------------------------------------- 1 | ENVS = ["Ant-v0", "HalfCheetah-v0", "Hopper-v0", "Humanoid-v0", "InvertedDoublePendulum-v0", "Reacher-v0", "Swimmer-v0", "Walker2d-v0"] 2 | OLD_COMMIT = "HEAD" 3 | 4 | # ================================================================ 5 | 6 | import subprocess, gym 7 | from gym import utils 8 | from os import path 9 | 10 | def cap(cmd): 11 | "Call and print command" 12 | print utils.colorize(cmd, "green") 13 | subprocess.check_call(cmd,shell=True) 14 | 15 | # ================================================================ 16 | 17 | gymroot = path.abspath(path.dirname(path.dirname(gym.__file__))) 18 | oldgymroot = "/tmp/old-gym" 19 | comparedir = "/tmp/gym-comparison" 20 | 21 | oldgymbase = path.basename(oldgymroot) 22 | 23 | print "gym root", gymroot 24 | thisdir = path.abspath(path.dirname(__file__)) 25 | print "this directory", thisdir 26 | cap("rm -rf %(oldgymroot)s %(comparedir)s && mkdir %(comparedir)s && cd /tmp && git clone %(gymroot)s %(oldgymbase)s"%locals()) 27 | for env in ENVS: 28 | print utils.colorize("*"*50 + "\nENV: %s" % env, "red") 29 | writescript = path.join(thisdir, "write_rollout_data.py") 30 | outfileA = path.join(comparedir, env) + "-A.npz" 31 | cap("python %(writescript)s %(env)s %(outfileA)s"%locals()) 32 | outfileB = path.join(comparedir, env) + "-B.npz" 33 | cap("python %(writescript)s %(env)s %(outfileB)s --gymdir=%(oldgymroot)s"%locals()) 34 | 35 | comparescript = path.join(thisdir, "compare_rollout_data.py") 36 | cap("python %(comparescript)s %(outfileA)s %(outfileB)s"%locals()) 37 | 38 | -------------------------------------------------------------------------------- /src/gym-adv/misc/compare_rollout_data.py: -------------------------------------------------------------------------------- 1 | import argparse, numpy as np 2 | 3 | def main(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument("file1") 6 | parser.add_argument("file2") 7 | args = parser.parse_args() 8 | file1 = np.load(args.file1) 9 | file2 = np.load(args.file2) 10 | 11 | for k in sorted(file1.keys()): 12 | arr1 = file1[k] 13 | arr2 = file2[k] 14 | if arr1.shape == arr2.shape: 15 | if np.allclose(file1[k], file2[k]): 16 | print "%s: matches!"%k 17 | continue 18 | else: 19 | print "%s: arrays are not equal. Difference = %g"%(k, np.abs(arr1 - arr2).max()) 20 | else: 21 | print "%s: arrays have different shape! %s vs %s"%(k, arr1.shape, arr2.shape) 22 | print "first 30 els:\n1. %s\n2. %s"%(arr1.flat[:30], arr2.flat[:30]) 23 | 24 | 25 | if __name__ == "__main__": 26 | main() -------------------------------------------------------------------------------- /src/gym-adv/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10.4 2 | requests>=2.0 3 | six 4 | pyglet>=1.2.0 5 | scipy==0.17.1 6 | -------------------------------------------------------------------------------- /src/gym-adv/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # Testing 2 | nose2 3 | mock 4 | 5 | -e .[all] 6 | -------------------------------------------------------------------------------- /src/gym-adv/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys, os.path 3 | 4 | # Don't import gym module here, since deps may not be installed 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gym')) 6 | from version import VERSION 7 | 8 | # Environment-specific dependencies. 9 | extras = { 10 | 'atari': ['atari_py>=0.0.17', 'Pillow', 'PyOpenGL'], 11 | 'board_game' : ['pachi-py>=0.0.19'], 12 | 'box2d': ['box2d-py'], 13 | 'classic_control': ['PyOpenGL'], 14 | 'mujoco': ['mujoco_py>=0.4.3', 'imageio'], 15 | 'parameter_tuning': ['keras', 'theano'], 16 | } 17 | 18 | # Meta dependency groups. 19 | all_deps = [] 20 | for group_name in extras: 21 | all_deps += extras[group_name] 22 | extras['all'] = all_deps 23 | 24 | setup(name='gym', 25 | version=VERSION, 26 | description='The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents.', 27 | url='https://github.com/openai/gym', 28 | author='OpenAI', 29 | author_email='gym@openai.com', 30 | license='', 31 | packages=[package for package in find_packages() 32 | if package.startswith('gym')], 33 | zip_safe=False, 34 | install_requires=[ 35 | 'numpy>=1.10.4', 'requests>=2.0', 'six', 'pyglet>=1.2.0', 36 | ], 37 | extras_require=extras, 38 | package_data={'gym': ['envs/mujoco/assets/*.xml', 'envs/classic_control/assets/*.png']}, 39 | tests_require=['nose2', 'mock'], 40 | ) 41 | -------------------------------------------------------------------------------- /src/gym-adv/test.dockerfile: -------------------------------------------------------------------------------- 1 | # A Dockerfile that sets up a full Gym install 2 | FROM quay.io/openai/gym:base 3 | 4 | RUN apt-get update \ 5 | && apt-get install -y libav-tools \ 6 | python-numpy \ 7 | python-scipy \ 8 | python-pyglet \ 9 | python-setuptools \ 10 | libpq-dev \ 11 | libjpeg-dev \ 12 | curl \ 13 | cmake \ 14 | swig \ 15 | python-opengl \ 16 | libboost-all-dev \ 17 | libsdl2-dev \ 18 | wget \ 19 | unzip \ 20 | git \ 21 | xpra \ 22 | libav-tools \ 23 | python3-dev \ 24 | && apt-get clean \ 25 | && rm -rf /var/lib/apt/lists/* \ 26 | && easy_install pip 27 | 28 | WORKDIR /usr/local/gym/ 29 | RUN mkdir -p gym && touch gym/__init__.py 30 | COPY ./gym/version.py ./gym/ 31 | COPY ./requirements.txt ./ 32 | COPY ./setup.py ./ 33 | COPY ./tox.ini ./ 34 | 35 | RUN pip install tox 36 | # Install the relevant dependencies. Keep printing so Travis knows we're alive. 37 | RUN ["bash", "-c", "( while true; do echo '.'; sleep 60; done ) & tox --notest"] 38 | 39 | # Finally, clean cached code (including dot files) and upload our actual code! 40 | RUN mv .tox /tmp/.tox && rm -rf .??* * && mv /tmp/.tox .tox 41 | COPY . /usr/local/gym/ 42 | 43 | ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] 44 | CMD ["tox"] 45 | -------------------------------------------------------------------------------- /src/gym-adv/tox.ini: -------------------------------------------------------------------------------- 1 | # Tox (http://tox.testrun.org/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py27, py34 8 | 9 | [testenv:py34] 10 | whitelist_externals=make 11 | passenv=DISPLAY TRAVIS* 12 | deps = 13 | nose2 14 | mock 15 | atari_py>=0.0.17 16 | Pillow 17 | PyOpenGL 18 | pachi-py>=0.0.19 19 | box2d-py 20 | PyOpenGL 21 | doom_py>=0.0.11 22 | mujoco_py>=0.4.3 23 | keras 24 | theano 25 | numpy>=1.10.4 26 | requests>=2.0 27 | six 28 | pyglet>=1.2.0 29 | commands = 30 | nose2 {posargs} 31 | 32 | [testenv:py27] 33 | whitelist_externals=make 34 | passenv=DISPLAY TRAVIS* 35 | deps = 36 | nose2 37 | mock 38 | atari_py>=0.0.17 39 | Pillow 40 | PyOpenGL 41 | pachi-py>=0.0.19 42 | box2d-py 43 | PyOpenGL 44 | doom_py>=0.0.11 45 | mujoco_py>=0.4.3 46 | keras 47 | theano 48 | numpy>=1.10.4 49 | requests>=2.0 50 | six 51 | pyglet>=1.2.0 52 | commands = 53 | nose2 {posargs} 54 | -------------------------------------------------------------------------------- /src/gym-adv/unittest.cfg: -------------------------------------------------------------------------------- 1 | [log-capture] 2 | always-on = True 3 | clear-handlers = True 4 | date-format = None 5 | filter = -nose 6 | log-level = NOTSET 7 | 8 | [output-buffer] 9 | always-on = True 10 | stderr = True 11 | stdout = True 12 | -------------------------------------------------------------------------------- /src/mjkey.txt: -------------------------------------------------------------------------------- 1 | MuJoCo Pro Personal license activation key, number 37429, type 6. 2 | 3 | Issued to Jerin Philip . 4 | 5 | Expires October 22, 2018. 6 | 7 | This file activates MuJoCo Pro on a single computer with id: 8 | LINUX_65a0md_7fbvebb7561e480afe5d0bb656b747bc494 9 | 10 | Do not modify this file. Its entire content, including the 11 | plain text section, is used by the activation manager. 12 | 13 | 5119dbd6032fbd63148d5c23eb534a06dd925202dd07c8c90439e1cb315f5f09 14 | 7b13fb1311b258d58df7afb8c209e3cc20a1baf8aa1e58e581a9f9de67975b39 15 | cc71df6f9a568900246659384cef881ace47f7eb00633069ebd53527873fb83e 16 | 7805ae2ba88d0ce98ccf0782d319d0d253403b0f7feaa864245b80eeca13721b 17 | b96d701956066ae1dfd3f2365df1bd40152223c61cb44fb4771163a661e11793 18 | 3c905661b0978800df6e1bbde8a02d3c4474251e48b74d2960d55e3b629387cc 19 | f5dbbdf8ba3628b9024527b6e2b3e16009703962aa9691871ff55efc3d8293d7 20 | 92bc468193f051baf48fac381d255d299c3608fe858f212bb2f3adf714a87f08 21 | -------------------------------------------------------------------------------- /src/mjpro131/bin/compile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131/bin/compile -------------------------------------------------------------------------------- /src/mjpro131/bin/libglfw.so.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131/bin/libglfw.so.3 -------------------------------------------------------------------------------- /src/mjpro131/bin/libmujoco131.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131/bin/libmujoco131.so -------------------------------------------------------------------------------- /src/mjpro131/bin/mjkey.txt: -------------------------------------------------------------------------------- 1 | MuJoCo Pro Personal license activation key, number 37429, type 6. 2 | 3 | Issued to Jerin Philip . 4 | 5 | Expires October 22, 2018. 6 | 7 | This file activates MuJoCo Pro on a single computer with id: 8 | LINUX_65a0md_7fbvebb7561e480afe5d0bb656b747bc494 9 | 10 | Do not modify this file. Its entire content, including the 11 | plain text section, is used by the activation manager. 12 | 13 | 5119dbd6032fbd63148d5c23eb534a06dd925202dd07c8c90439e1cb315f5f09 14 | 7b13fb1311b258d58df7afb8c209e3cc20a1baf8aa1e58e581a9f9de67975b39 15 | cc71df6f9a568900246659384cef881ace47f7eb00633069ebd53527873fb83e 16 | 7805ae2ba88d0ce98ccf0782d319d0d253403b0f7feaa864245b80eeca13721b 17 | b96d701956066ae1dfd3f2365df1bd40152223c61cb44fb4771163a661e11793 18 | 3c905661b0978800df6e1bbde8a02d3c4474251e48b74d2960d55e3b629387cc 19 | f5dbbdf8ba3628b9024527b6e2b3e16009703962aa9691871ff55efc3d8293d7 20 | 92bc468193f051baf48fac381d255d299c3608fe858f212bb2f3adf714a87f08 21 | -------------------------------------------------------------------------------- /src/mjpro131/bin/simulate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131/bin/simulate -------------------------------------------------------------------------------- /src/mjpro131/bin/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131/bin/test -------------------------------------------------------------------------------- /src/mjpro131/doc/README.txt: -------------------------------------------------------------------------------- 1 | Welcome to MuJoCo Pro version 1.31. 2 | 3 | The full documentation is available at http://www.mujoco.org/book 4 | The most relevant chapters are Overview, MJCF Models, and MuJoCo Pro. 5 | 6 | Here we provide brief notes to get you started: 7 | 8 | 9 | The activation key (which you should have received with your license) is a 10 | plain-text file whose path must be passed to the mj_activate() function. 11 | The code samples assume that it is called mjkey.txt in the bin directory. 12 | 13 | Once you have mjkey.txt in the bin directory, run: 14 | simulate ../model/humanoid.xml (or ./simulate on Linux and OSX) 15 | to see MuJoCo Pro in action. 16 | 17 | On Linux, you can use LD_LIBRARY_PATH to point the dynamic linker to the 18 | .so files, or copy them to a directory that is already in the linker path. 19 | On OSX, the MuJoCo Pro dynamic library is compiled with @executable_path/ 20 | to avoid the need for installation in a predefined directory. 21 | 22 | In general, the directory structure we have provided is merely a suggestion; 23 | feel free to re-organize it if needed. MuJoCo Pro does not have an installer 24 | and does not write any files outside the executable directory. 25 | 26 | The makefile in the sample directory generates binaries in the bin directory. 27 | These binaries are pre-compiled and included in the software distribution. 28 | 29 | While the software distribution contains only one model (humanoid.xml), 30 | additional models are available at http://www.mujoco.org/forum under Resources. 31 | -------------------------------------------------------------------------------- /src/mjpro131/sample/makefile: -------------------------------------------------------------------------------- 1 | all: 2 | g++ -O2 -I../include simulate.cpp -std=c++11 ../bin/libmujoco131.so ../bin/libglfw.so.3 -o ../bin/simulate 3 | g++ -O2 -I../include test.cpp -std=c++11 ../bin/libmujoco131.so -o ../bin/test 4 | g++ -O2 -I../include compile.cpp -std=c++11 ../bin/libmujoco131.so -o ../bin/compile 5 | -------------------------------------------------------------------------------- /src/mjpro131_linux.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro131_linux.zip -------------------------------------------------------------------------------- /src/mjpro150/bin/basic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/basic -------------------------------------------------------------------------------- /src/mjpro150/bin/compile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/compile -------------------------------------------------------------------------------- /src/mjpro150/bin/derivative: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/derivative -------------------------------------------------------------------------------- /src/mjpro150/bin/libglew.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libglew.so -------------------------------------------------------------------------------- /src/mjpro150/bin/libglewegl.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libglewegl.so -------------------------------------------------------------------------------- /src/mjpro150/bin/libglewosmesa.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libglewosmesa.so -------------------------------------------------------------------------------- /src/mjpro150/bin/libglfw.so.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libglfw.so.3 -------------------------------------------------------------------------------- /src/mjpro150/bin/libmujoco150.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libmujoco150.so -------------------------------------------------------------------------------- /src/mjpro150/bin/libmujoco150nogl.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/libmujoco150nogl.so -------------------------------------------------------------------------------- /src/mjpro150/bin/mjkey.txt: -------------------------------------------------------------------------------- 1 | MuJoCo Pro Personal license activation key, number 37429, type 6. 2 | 3 | Issued to Jerin Philip . 4 | 5 | Expires October 22, 2018. 6 | 7 | This file activates MuJoCo Pro on a single computer with id: 8 | LINUX_65a0md_7fbvebb7561e480afe5d0bb656b747bc494 9 | 10 | Do not modify this file. Its entire content, including the 11 | plain text section, is used by the activation manager. 12 | 13 | 5119dbd6032fbd63148d5c23eb534a06dd925202dd07c8c90439e1cb315f5f09 14 | 7b13fb1311b258d58df7afb8c209e3cc20a1baf8aa1e58e581a9f9de67975b39 15 | cc71df6f9a568900246659384cef881ace47f7eb00633069ebd53527873fb83e 16 | 7805ae2ba88d0ce98ccf0782d319d0d253403b0f7feaa864245b80eeca13721b 17 | b96d701956066ae1dfd3f2365df1bd40152223c61cb44fb4771163a661e11793 18 | 3c905661b0978800df6e1bbde8a02d3c4474251e48b74d2960d55e3b629387cc 19 | f5dbbdf8ba3628b9024527b6e2b3e16009703962aa9691871ff55efc3d8293d7 20 | 92bc468193f051baf48fac381d255d299c3608fe858f212bb2f3adf714a87f08 21 | -------------------------------------------------------------------------------- /src/mjpro150/bin/record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/record -------------------------------------------------------------------------------- /src/mjpro150/bin/simulate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/simulate -------------------------------------------------------------------------------- /src/mjpro150/bin/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150/bin/test -------------------------------------------------------------------------------- /src/mjpro150/doc/README.txt: -------------------------------------------------------------------------------- 1 | Welcome to MuJoCo Pro version 1.50. 2 | 3 | The full documentation is available at http://www.mujoco.org/book 4 | The most relevant chapters are Overview, MJCF Models, and MuJoCo Pro. 5 | 6 | Here we provide brief notes to get you started: 7 | 8 | 9 | The activation key (which you should have received with your license) is a 10 | plain-text file whose path must be passed to the mj_activate() function. 11 | The code samples assume that it is called mjkey.txt in the bin directory. 12 | 13 | Once you have mjkey.txt in the bin directory, run: 14 | simulate ../model/humanoid.xml (or ./simulate on Linux and OSX) 15 | to see MuJoCo Pro in action. 16 | 17 | On Linux, you can use LD_LIBRARY_PATH to point the dynamic linker to the 18 | .so files, or copy them to a directory that is already in the linker path. 19 | On OSX, the MuJoCo Pro dynamic library is compiled with @executable_path/ 20 | to avoid the need for installation in a predefined directory. 21 | 22 | In general, the directory structure we have provided is merely a suggestion; 23 | feel free to re-organize it if needed. MuJoCo Pro does not have an installer 24 | and does not write any files outside the executable directory. 25 | 26 | The makefile in the sample directory generates binaries in the bin directory. 27 | These binaries are pre-compiled and included in the software distribution. 28 | 29 | While the software distribution contains only one model (humanoid.xml), 30 | additional models are available at http://www.mujoco.org/forum under Resources. 31 | -------------------------------------------------------------------------------- /src/mjpro150/sample/makefile: -------------------------------------------------------------------------------- 1 | COMMON=-O2 -I../include -L../bin -std=c++11 -mavx 2 | 3 | default: 4 | g++ $(COMMON) test.cpp -lmujoco150nogl -o ../bin/test 5 | g++ $(COMMON) compile.cpp -lmujoco150nogl -o ../bin/compile 6 | g++ $(COMMON) derivative.cpp -lmujoco150nogl -fopenmp -o ../bin/derivative 7 | g++ $(COMMON) simulate.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/simulate 8 | g++ $(COMMON) record.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/record 9 | g++ $(COMMON) basic.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/basic 10 | 11 | egl: 12 | g++ $(COMMON) -DMJ_EGL record.cpp -lmujoco150 -lOpenGL -lEGL -lglewegl -o ../bin/recordegl 13 | 14 | osmesa: 15 | g++ $(COMMON) -DMJ_OSMESA record.cpp -lmujoco150 -lOSMesa -lglewosmesa -o ../bin/recordosmesa 16 | 17 | all: default egl osmesa 18 | -------------------------------------------------------------------------------- /src/mjpro150_linux.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/mjpro150_linux.zip -------------------------------------------------------------------------------- /src/rllab-adv/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | *.p 3 | *.pyc 4 | *-checkpoint.ipynb 5 | .DS_Store 6 | *.h5 7 | *.log 8 | *.npz 9 | secrets.py 10 | *.avi 11 | *.mp4 12 | build 13 | build_linux 14 | .idea 15 | .sublime-project 16 | run_experiment.sh 17 | scratch-notebooks 18 | launch_scripts 19 | *.sh.e* 20 | *.sh.o* 21 | MUJOCO_LOG.TXT 22 | vendor/mujoco 23 | .project 24 | .pydevproject 25 | *.pdf 26 | .env 27 | snippets 28 | private 29 | lua 30 | iterate.dat 31 | .env 32 | src/ 33 | .settings 34 | .pods 35 | docs/_build 36 | blackbox.zip 37 | blackbox 38 | rllab/config_personal.py 39 | *.swp 40 | sandbox 41 | adversarial/figs 42 | adversarial/figs/* 43 | -------------------------------------------------------------------------------- /src/rllab-adv/LICENSE1: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 rllab contributors 4 | 5 | rllab uses a shared copyright model: each contributor holds copyright over 6 | their contributions to rllab. The project versioning records all such 7 | contribution and copyright details. 8 | By contributing to the rllab repository through pull-request, comment, 9 | or otherwise, the contributor releases their content to the license and 10 | copyright terms herein. 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy 13 | of this software and associated documentation files (the "Software"), to deal 14 | in the Software without restriction, including without limitation the rights 15 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 | copies of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in all 20 | copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28 | SOFTWARE. 29 | -------------------------------------------------------------------------------- /src/rllab-adv/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include rllab 2 | -------------------------------------------------------------------------------- /src/rllab-adv/README.md: -------------------------------------------------------------------------------- 1 | >Under Development 2 | # Robust Adversarial Reinforcement Learning 3 | 4 | This repo contains code for training RL agents with adversarial disturbance agents in our work on Robust Adversarial Reinforcement Learning ([RARL](https://arxiv.org/abs/1703.02702)). We build heavily build on the OpenAI rllab repo. 5 | 6 | ## Installation instructions 7 | 8 | Since we build upon the [rllab](https://github.com/openai/rllab) package for the optimizers, the installation process is similar to `rllab's` manual installation. Most of the packages are virtually installated in the anaconda `rllab3-adv` enivronment. 9 | 10 | - Dependencies for scipy: 11 | 12 | ``` 13 | sudo apt-get build-dep python-scipy 14 | ``` 15 | 16 | - Install python modules: 17 | 18 | ``` 19 | conda env create -f environment.yml 20 | ``` 21 | 22 | - [Install MuJoCo](https://github.com/openai/mujoco-py) 23 | 24 | - Add `rllab-adv` to your `PYTHONPATH`. 25 | 26 | ``` 27 | export PYTHONPATH=:$PYTHONPATH 28 | ``` 29 | 30 | ## Example 31 | 32 | ```python 33 | # Enter the anaconda virtual environment 34 | source activate rllab3-adv 35 | # Train on InvertedPendulum 36 | python adversarial/scripts/train_adversary.py --env InvertedPendulumAdv-v1 --folder ~/rllab-adv/results 37 | ``` 38 | 39 | ## Contact 40 | Lerrel Pinto -- lerrelpATcsDOTcmuDOTedu. 41 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_adv_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! -n "$env" ] 4 | then 5 | echo "env UNDEFINED, not running anything" 6 | exit 7 | fi 8 | if [ ! -n "$adv_fraction" ] 9 | then 10 | echo "adv_fraction UNDEFINED, not running anything" 11 | exit 12 | fi 13 | if [ ! -n "$PROCSTRING" ] 14 | then 15 | echo "PROCSTRING UNDEFINED, not running anything" 16 | exit 17 | fi 18 | 19 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 20 | OUTPUT_FILER=/home/${USER}/tmpoutputs_adv/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 21 | 22 | cd /home/lerrelp/rllab/adversary_scripts 23 | 24 | #run a niced python script 25 | nice python train_trpo_var_adversary.py --env $env --adv_name adv --n_exps 3 --n_itr 500 --layer_size 100 50 25 --batch_size 25000 --if_render 0 --save_every 100 --adv_fraction $adv_fraction > $OUTPUT_FILER 26 | echo "Finished Without Problems" >> $OUTPUT_FILER 27 | echo "..::RL Solved::.." 28 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_adv_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LOGDIR=/home/${USER}/tmpoutputs_adv/ 4 | if [ ! -d ${LOGDIR} ]; then 5 | echo "Directory ${LOGDIR} not present, creating it" 6 | mkdir $LOGDIR 7 | fi 8 | 9 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 10 | 11 | for env in "HopperAdv-v1" "HalfCheetahAdv-v1" "Walker2dAdv-v1"; 12 | do 13 | for adv_fraction in "0.1" "0.25" "0.5" "1.0" "1.25"; 14 | do 15 | PROCSTRING="$env-$adv_fraction" 16 | echo $PROCSTRING 17 | qsub -N ${PROCSTRING} -q default -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v env=${env},adv_fraction=${adv_fraction},PROCSTRING=${PROCSTRING} yoda_rl_adv_driver.sh 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_adversary_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_var_adversary.py --env $ENV --adv_name adv --n_exps 5 --n_itr 500 --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_adversary_single_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_var_adversary.py --env $ENV --adv_name adv --n_exps 1 --n_itr $NITR --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction --folder $SAVEDIR> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_adversary_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ ! -n "$1" ] 3 | then 4 | echo "Usage: " $0 " ENV=1 QUEUE=2 STEP_SIZE=3 LAMBDA=4" 5 | echo " script_to_run is MANDTORY" 6 | echo " QUEUE options are default, reg-mem, big-mem, gpu" 7 | exit 8 | else 9 | ENV=$1 10 | echo "Running: " $ENV 11 | fi 12 | 13 | if [ ! -n "$2" ] 14 | then 15 | QUEUE="default" 16 | echo "Defaulting QUEUE to ${QUEUE}" 17 | else 18 | QUEUE=$2 19 | fi 20 | 21 | if [ ! -n "$3" ] 22 | then 23 | STEP_SIZE="0.01" 24 | echo "Defaulting STEP_SIZE to ${STEP_SIZE}" 25 | else 26 | STEP_SIZE=$3 27 | fi 28 | 29 | if [ ! -n "$4" ] 30 | then 31 | LAMBDA="0.97" 32 | echo "Defaulting LAMBDA to ${LAMBDA}" 33 | else 34 | LAMBDA=$4 35 | fi 36 | 37 | 38 | 39 | LOGDIR=/home/${USER}/tmpoutputs_adversary_${ENV}/ 40 | if [ ! -d ${LOGDIR} ]; then 41 | echo "Directory ${LOGDIR} not present, creating it" 42 | mkdir $LOGDIR 43 | fi 44 | 45 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 46 | 47 | for adv_fraction in "0.1" "0.25" "0.5" "1.0"; 48 | do 49 | for step_size in ${STEP_SIZE}; 50 | do 51 | for gae_lambda in ${LAMBDA}; 52 | do 53 | for batch_size in "25000"; 54 | do 55 | PROCSTRING="$ENV-$step_size-$gae_lambda-$batch_size-$adv_fraction" 56 | echo $PROCSTRING 57 | qsub -N ${PROCSTRING} -q ${QUEUE} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v LOGDIR=${LOGDIR},ENV=${ENV},step_size=${step_size},gae_lambda=${gae_lambda},batch_size=${batch_size},PROCSTRING=${PROCSTRING},adv_fraction=${adv_fraction} yoda_rl_adversary_driver.sh 58 | done 59 | done 60 | done 61 | done 62 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_baseline_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_baseline.py --env $ENV --n_exps 5 --n_itr 500 --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda > $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_baseline_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ ! -n "$1" ] 3 | then 4 | echo "Usage: " $0 " ENV=1 QUEUE=2 " 5 | echo " script_to_run is MANDTORY" 6 | echo " QUEUE options are default, reg-mem, big-mem, gpu" 7 | exit 8 | else 9 | ENV=$1 10 | echo "Running: " $ENV 11 | fi 12 | 13 | #the REPEAT is the number of times we will run this script 14 | if [ ! -n "$2" ] 15 | then 16 | QUEUE="default" 17 | echo "Defaulting QUEUE to ${QUEUE}" 18 | else 19 | QUEUE=$2 20 | fi 21 | 22 | 23 | LOGDIR=/home/${USER}/tmpoutputs_baseline_${ENV}/ 24 | if [ ! -d ${LOGDIR} ]; then 25 | echo "Directory ${LOGDIR} not present, creating it" 26 | mkdir $LOGDIR 27 | fi 28 | 29 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 30 | 31 | for step_size in "0.005" "0.01" "0.02"; 32 | do 33 | for gae_lambda in "0.95" "0.97" "1.0"; 34 | do 35 | for batch_size in "10000" "25000" "50000"; 36 | do 37 | PROCSTRING="$step_size-$gae_lambda-$batch_size" 38 | echo $PROCSTRING 39 | qsub -N ${PROCSTRING} -q ${QUEUE} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v LOGDIR=${LOGDIR},ENV=${ENV},step_size=${step_size},gae_lambda=${gae_lambda},batch_size=${batch_size},PROCSTRING=${PROCSTRING} yoda_rl_baseline_driver.sh 40 | done 41 | done 42 | done 43 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_baseline_stater.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LOGDIR=/home/${USER}/tmpoutputs_baseline/ 4 | if [ ! -d ${LOGDIR} ]; then 5 | echo "Directory ${LOGDIR} not present, creating it" 6 | mkdir $LOGDIR 7 | fi 8 | 9 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 10 | 11 | for env in "HopperAdv-v1" "HalfCheetahAdv-v1" "Walker2dAdv-v1" "InvertedPendulumAdv-v1"; 12 | do 13 | for adv_fraction in "0.0"; 14 | do 15 | PROCSTRING="$env-$adv_fraction" 16 | echo $PROCSTRING 17 | qsub -N ${PROCSTRING} -q reg-mem -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v env=${env},adv_fraction=${adv_fraction},PROCSTRING=${PROCSTRING} yoda_rl_baseline_driver.sh 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! -n "$env" ] 4 | then 5 | echo "PROCSTRING UNDEFINED, not running anything" 6 | exit 7 | fi 8 | if [ ! -n "$adv_fraction" ] 9 | then 10 | echo "PROCSTRING UNDEFINED, not running anything" 11 | exit 12 | fi 13 | if [ ! -n "$PROCSTRING" ] 14 | then 15 | echo "PROCSTRING UNDEFINED, not running anything" 16 | exit 17 | fi 18 | 19 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 20 | OUTPUT_FILER=/home/${USER}/tmpoutputs/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 21 | 22 | cd /home/lerrelp/rllab/adversary_scripts 23 | 24 | #run a niced python script 25 | nice python train_trpo_var_adversary.py --env $env --adv_name adv --n_exps 3 --n_itr 500 --layer_size 64 64 --batch_size 25000 --if_render 0 --adv_fraction $adv_fraction > $OUTPUT_FILER 26 | echo "Finished Without Problems" >> $OUTPUT_FILER 27 | echo "..::RL Solved::.." 28 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_multiple_adversary_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_mult_var_adversary.py --env $ENV --adv_name adv --n_exps 5 --n_itr 500 --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_multiple_adversary_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ ! -n "$1" ] 3 | then 4 | echo "Usage: " $0 " ENV=1 QUEUE=2 STEP_SIZE=3 LAMBDA=4" 5 | echo " script_to_run is MANDTORY" 6 | echo " QUEUE options are default, reg-mem, big-mem, gpu" 7 | exit 8 | else 9 | ENV=$1 10 | echo "Running: " $ENV 11 | fi 12 | 13 | if [ ! -n "$2" ] 14 | then 15 | QUEUE="default" 16 | echo "Defaulting QUEUE to ${QUEUE}" 17 | else 18 | QUEUE=$2 19 | fi 20 | 21 | if [ ! -n "$3" ] 22 | then 23 | STEP_SIZE="0.01" 24 | echo "Defaulting STEP_SIZE to ${STEP_SIZE}" 25 | else 26 | STEP_SIZE=$3 27 | fi 28 | 29 | if [ ! -n "$4" ] 30 | then 31 | LAMBDA="0.97" 32 | echo "Defaulting LAMBDA to ${LAMBDA}" 33 | else 34 | LAMBDA=$4 35 | fi 36 | 37 | 38 | 39 | LOGDIR=/home/${USER}/tmpoutputs_multiple_adversary_${ENV}/ 40 | if [ ! -d ${LOGDIR} ]; then 41 | echo "Directory ${LOGDIR} not present, creating it" 42 | mkdir $LOGDIR 43 | fi 44 | 45 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 46 | 47 | for adv_fraction in "0.1" "0.25" "0.5" "1.0"; 48 | do 49 | for step_size in ${STEP_SIZE}; 50 | do 51 | for gae_lambda in ${LAMBDA}; 52 | do 53 | for batch_size in "25000"; 54 | do 55 | PROCSTRING="$ENV-$step_size-$gae_lambda-$batch_size-$adv_fraction" 56 | echo $PROCSTRING 57 | qsub -N ${PROCSTRING} -q ${QUEUE} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v LOGDIR=${LOGDIR},ENV=${ENV},step_size=${step_size},gae_lambda=${gae_lambda},batch_size=${batch_size},PROCSTRING=${PROCSTRING},adv_fraction=${adv_fraction} yoda_rl_multiple_adversary_driver.sh 58 | done 59 | done 60 | done 61 | done 62 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_no_adv_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! -n "$env" ] 4 | then 5 | echo "env UNDEFINED, not running anything" 6 | exit 7 | fi 8 | if [ ! -n "$adv_fraction" ] 9 | then 10 | echo "adv_fraction UNDEFINED, not running anything" 11 | exit 12 | fi 13 | if [ ! -n "$PROCSTRING" ] 14 | then 15 | echo "PROCSTRING UNDEFINED, not running anything" 16 | exit 17 | fi 18 | 19 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 20 | OUTPUT_FILER=/home/${USER}/tmpoutputs_no_adv/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 21 | 22 | cd /home/lerrelp/rllab/adversary_scripts 23 | 24 | #run a niced python script 25 | nice python train_trpo_var_adversary.py --env $env --adv_name no_adv --n_exps 3 --n_itr 500 --layer_size 64 64 --batch_size 25000 --if_render 0 --adv_fraction $adv_fraction > $OUTPUT_FILER 26 | echo "Finished Without Problems" >> $OUTPUT_FILER 27 | echo "..::RL Solved::.." 28 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_no_adv_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LOGDIR=/home/${USER}/tmpoutputs_no_adv/ 4 | if [ ! -d ${LOGDIR} ]; then 5 | echo "Directory ${LOGDIR} not present, creating it" 6 | mkdir $LOGDIR 7 | fi 8 | 9 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 10 | 11 | for env in "HopperAdv-v1" "HalfCheetahAdv-v1" "Walker2dAdv-v1"; 12 | do 13 | for adv_fraction in "0.1" "1.0" "10.0"; 14 | do 15 | PROCSTRING="$env-$adv_fraction" 16 | echo $PROCSTRING 17 | qsub -N ${PROCSTRING} -q reg-mem -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v env=${env},adv_fraction=${adv_fraction},PROCSTRING=${PROCSTRING} yoda_rl_no_adv_driver.sh 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_no_adversary_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_var_adversary.py --env $ENV --adv_name no_adv --n_exps 5 --n_itr 500 --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_no_adversary_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ ! -n "$1" ] 3 | then 4 | echo "Usage: " $0 " ENV=1 QUEUE=2 STEP_SIZE=3 LAMBDA=4" 5 | echo " script_to_run is MANDTORY" 6 | echo " QUEUE options are default, reg-mem, big-mem, gpu" 7 | exit 8 | else 9 | ENV=$1 10 | echo "Running: " $ENV 11 | fi 12 | 13 | if [ ! -n "$2" ] 14 | then 15 | QUEUE="default" 16 | echo "Defaulting QUEUE to ${QUEUE}" 17 | else 18 | QUEUE=$2 19 | fi 20 | 21 | if [ ! -n "$3" ] 22 | then 23 | STEP_SIZE="0.01" 24 | echo "Defaulting STEP_SIZE to ${STEP_SIZE}" 25 | else 26 | STEP_SIZE=$3 27 | fi 28 | 29 | if [ ! -n "$4" ] 30 | then 31 | LAMBDA="0.97" 32 | echo "Defaulting LAMBDA to ${LAMBDA}" 33 | else 34 | LAMBDA=$4 35 | fi 36 | 37 | 38 | 39 | LOGDIR=/home/${USER}/tmpoutputs_no_adversary_${ENV}/ 40 | if [ ! -d ${LOGDIR} ]; then 41 | echo "Directory ${LOGDIR} not present, creating it" 42 | mkdir $LOGDIR 43 | fi 44 | 45 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 46 | 47 | for adv_fraction in "0.1" "0.25" "0.5" "1.0"; 48 | do 49 | for step_size in ${STEP_SIZE}; 50 | do 51 | for gae_lambda in ${LAMBDA}; 52 | do 53 | for batch_size in "25000"; 54 | do 55 | PROCSTRING="$ENV-$step_size-$gae_lambda-$batch_size-$adv_fraction" 56 | echo $PROCSTRING 57 | qsub -N ${PROCSTRING} -q ${QUEUE} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v LOGDIR=${LOGDIR},ENV=${ENV},step_size=${step_size},gae_lambda=${gae_lambda},batch_size=${batch_size},PROCSTRING=${PROCSTRING},adv_fraction=${adv_fraction} yoda_rl_no_adversary_driver.sh 58 | done 59 | done 60 | done 61 | done 62 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_only_adversary_single_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_only_adversary.py --env $ENV --pro_path $PROPATH --adv_name adv --n_exps 1 --n_itr $NITR --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction --folder $SAVEDIR> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LOGDIR=/home/${USER}/tmpoutputs/ 4 | if [ ! -d ${LOGDIR} ]; then 5 | echo "Directory ${LOGDIR} not present, creating it" 6 | mkdir $LOGDIR 7 | fi 8 | 9 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 10 | 11 | for env in "HopperAdv-v1" "HalfCheetahAdv-v1" "Walker2dAdv-v1"; 12 | do 13 | for adv_fraction in "0.1" "0.25" "0.5" "1.0" "2.0" "5.0" "10.0"; 14 | do 15 | PROCSTRING="$env-$adv_fraction" 16 | echo $PROCSTRING 17 | qsub -N ${PROCSTRING} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v env=${env},adv_fraction=${adv_fraction},PROCSTRING=${PROCSTRING} yoda_rl_driver.sh 18 | done 19 | done 20 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_step_adversary_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_step_adversary.py --env $ENV --adv_step 100 --n_adv_itr 100 --adv_name adv --n_exps 5 --n_itr 500 --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_step_adversary_single_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_step_adversary.py --env $ENV --adv_name adv --n_exps 1 --n_itr $NITR --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction --folder $SAVEDIR> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_step_adversary_starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ ! -n "$1" ] 3 | then 4 | echo "Usage: " $0 " ENV=1 QUEUE=2 STEP_SIZE=3 LAMBDA=4" 5 | echo " script_to_run is MANDTORY" 6 | echo " QUEUE options are default, reg-mem, big-mem, gpu" 7 | exit 8 | else 9 | ENV=$1 10 | echo "Running: " $ENV 11 | fi 12 | 13 | if [ ! -n "$2" ] 14 | then 15 | QUEUE="default" 16 | echo "Defaulting QUEUE to ${QUEUE}" 17 | else 18 | QUEUE=$2 19 | fi 20 | 21 | if [ ! -n "$3" ] 22 | then 23 | STEP_SIZE="0.01" 24 | echo "Defaulting STEP_SIZE to ${STEP_SIZE}" 25 | else 26 | STEP_SIZE=$3 27 | fi 28 | 29 | if [ ! -n "$4" ] 30 | then 31 | LAMBDA="0.97" 32 | echo "Defaulting LAMBDA to ${LAMBDA}" 33 | else 34 | LAMBDA=$4 35 | fi 36 | 37 | 38 | 39 | LOGDIR=/home/${USER}/tmpoutputs_step_adversary_${ENV}/ 40 | if [ ! -d ${LOGDIR} ]; then 41 | echo "Directory ${LOGDIR} not present, creating it" 42 | mkdir $LOGDIR 43 | fi 44 | 45 | LOGSTRING="-e ${LOGDIR} -o ${LOGDIR} -j oe" 46 | 47 | for adv_fraction in "0.1" "0.25" "0.5" "0.75" "1.0" "1.25"; 48 | do 49 | for step_size in ${STEP_SIZE}; 50 | do 51 | for gae_lambda in ${LAMBDA}; 52 | do 53 | for batch_size in "25000"; 54 | do 55 | PROCSTRING="$ENV-$step_size-$gae_lambda-$batch_size-$adv_fraction" 56 | echo $PROCSTRING 57 | qsub -N ${PROCSTRING} -q ${QUEUE} -l nodes=1:ppn=8 -l walltime=99:99:99:99 ${LOGSTRING} -v LOGDIR=${LOGDIR},ENV=${ENV},step_size=${step_size},gae_lambda=${gae_lambda},batch_size=${batch_size},PROCSTRING=${PROCSTRING},adv_fraction=${adv_fraction} yoda_rl_step_adversary_driver.sh 58 | done 59 | done 60 | done 61 | done 62 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/experiments/yoda_rl_tandem_adversary_single_driver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PROCSTRING_BASENAME=`basename ${PROCSTRING}` 4 | OUTPUT_FILER=${LOGDIR}/${PROCSTRING_BASENAME}.${HOSTNAME}.$$.output 5 | 6 | cd /home/lerrelp/rllab/adversary_scripts 7 | 8 | #run a niced 9 | nice python train_trpo_var_adversary.py --env $ENV --adv_name adv --n_exps 1 --n_itr $NITR --n_pro_itr $NITRT --n_adv_itr $NITRT --layer_size 64 64 --batch_size $batch_size --step_size $step_size --gae_lambda $gae_lambda --adv_fraction $adv_fraction --folder $SAVEDIR> $OUTPUT_FILER 10 | echo "Finished Without Problems" >> $OUTPUT_FILER 11 | echo "..::RL Solved::.." 12 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/binu/trpo.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | 7 | env = normalize(CartpoleEnv()) 8 | 9 | policy = GaussianMLPPolicy( 10 | env_spec=env.spec, 11 | # The neural network policy should have two hidden layers, each with 32 hidden units. 12 | hidden_sizes=(32, 32) 13 | ) 14 | 15 | baseline = LinearFeatureBaseline(env_spec=env.spec) 16 | 17 | algo = TRPO(env=env,policy=policy,baseline=baseline,batch_size=4000,whole_paths=True,max_path_length=100,n_itr=40,discount=0.99,step_size=0.01) 18 | algo.train() 19 | -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/BASELINE-env-HalfCheetahAdv-v1_no_adv_Exp3_Itr600_BS4000_Adv1.0_stp0.01_lam0.97.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/BASELINE-env-HalfCheetahAdv-v1_no_adv_Exp3_Itr600_BS4000_Adv1.0_stp0.01_lam0.97.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/BASELINE-env-HopperAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/BASELINE-env-HopperAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/BASELINE-env-InvertedPendulumAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/BASELINE-env-InvertedPendulumAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/BASELINE-env-SwimmerAdv-v1_no_adv_Exp3_Itr100_BS4000_Adv1.0_stp0.01_lam0.97.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/BASELINE-env-SwimmerAdv-v1_no_adv_Exp3_Itr100_BS4000_Adv1.0_stp0.01_lam0.97.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/BASELINE-env-Walker2dAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/BASELINE-env-Walker2dAdv-v1_no_adv_Exp3_Itr500_BS4000_Adv1.0_stp0.01_lam0.97.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/Waler Robust.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/Waler Robust.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/Walker2d - Robust.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/Walker2d - Robust.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/env-HopperAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_364289.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/env-HopperAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_364289.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/env-SwimmerAdv-v1_Exp3_Itr50_BS4000_Adv0.25_stp0.01_lam0.97_240911.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/env-SwimmerAdv-v1_Exp3_Itr50_BS4000_Adv0.25_stp0.01_lam0.97_240911.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/env-Walker2dAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_163843.p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/env-Walker2dAdv-v1_Exp1_Itr500_BS4000_Adv0.25_stp0.01_lam0.97_163843.p.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/results/hopper_robustness.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/adversarial/scripts/results/hopper_robustness.png -------------------------------------------------------------------------------- /src/rllab-adv/adversarial/scripts/viz_results_const.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.patches as mpatches 4 | import pickle 5 | import scipy 6 | import argparse 7 | 8 | ## Pass arguments ## 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('r', type=str, help='filepath to results') 11 | parser.add_argument('f', type=int, default=1, help='0 for no filtering. 1 for filtering') 12 | 13 | args = parser.parse_args() 14 | savename = args.r 15 | if_filtering = bool(args.f) 16 | 17 | res_D = pickle.load(open(savename,'rb')) 18 | const_test_rew_summary = res_D['zero_test'] 19 | 20 | all_patches = [] 21 | 22 | con_rew = np.array(const_test_rew_summary) 23 | mean_con = con_rew.mean(0) 24 | std_con = con_rew.std(0) 25 | if if_filtering==True: 26 | mean_window_size = 15 27 | mean_order = 3 28 | std_window_size = 45 29 | std_order = 2 30 | mean_con = scipy.signal.savgol_filter(mean_con, mean_window_size, mean_order) 31 | std_con = scipy.signal.savgol_filter(std_con, std_window_size, std_order) 32 | x = [i for i in range(len(mean_con))] 33 | plt.plot(x,mean_con,color=(0.5,0.1,0.1), linewidth=2.0) 34 | plt.fill_between(x, mean_con-std_con, mean_con+std_con,color=(0.5,0.1,0.1), alpha=0.5) 35 | all_patches.append(mpatches.Patch(color=(0.5,0.1,0.1), label='zero_test_rew_summary')) 36 | 37 | for l in con_rew: 38 | plt.plot(x,l,color=(0.1,0.5,0.1), linewidth=2.0) 39 | 40 | plt.legend(handles=all_patches) 41 | axes = plt.gca() 42 | axes.set_ylim([-500,6000]) 43 | plt.title(savename) 44 | plt.show() 45 | #from IPython import embed;embed() 46 | -------------------------------------------------------------------------------- /src/rllab-adv/bin/activate: -------------------------------------------------------------------------------- 1 | /home/baxter/anaconda2/bin/activate -------------------------------------------------------------------------------- /src/rllab-adv/bin/conda: -------------------------------------------------------------------------------- 1 | /home/baxter/anaconda2/bin/conda -------------------------------------------------------------------------------- /src/rllab-adv/bin/deactivate: -------------------------------------------------------------------------------- 1 | /home/baxter/anaconda2/bin/deactivate -------------------------------------------------------------------------------- /src/rllab-adv/circle.yml: -------------------------------------------------------------------------------- 1 | machine: 2 | services: 3 | - docker 4 | 5 | dependencies: 6 | cache_directories: 7 | - "~/docker" 8 | override: 9 | - docker info 10 | - if [[ -e ~/docker/image.tar ]]; then docker load -i ~/docker/image.tar; fi 11 | - docker build -t tester -f docker/tester_Dockerfile . 12 | - mkdir -p ~/docker; docker save tester > ~/docker/image.tar 13 | 14 | test: 15 | override: 16 | - docker run tester /bin/bash -li -c "CIRCLECI=true nose2" 17 | -------------------------------------------------------------------------------- /src/rllab-adv/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/contrib/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/contrib/alexbeloi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/contrib/alexbeloi/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/contrib/alexbeloi/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/contrib/alexbeloi/examples/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/contrib/alexbeloi/examples/trpois_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.trpo import TRPO 2 | from rllab.algos.tnpg import TNPG 3 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 4 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 5 | from rllab.envs.normalized_env import normalize 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | from contrib.alexbeloi.is_sampler import ISSampler 8 | 9 | """ 10 | Example using VPG with ISSampler, iterations alternate between live and 11 | importance sampled iterations. 12 | """ 13 | 14 | env = normalize(CartpoleEnv()) 15 | 16 | policy = GaussianMLPPolicy( 17 | env_spec=env.spec, 18 | # The neural network policy should have two hidden layers, each with 32 hidden units. 19 | hidden_sizes=(32, 32) 20 | ) 21 | 22 | baseline = LinearFeatureBaseline(env_spec=env.spec) 23 | 24 | optimizer_args = dict( 25 | # debug_nan=True, 26 | # reg_coeff=0.1, 27 | # cg_iters=2 28 | ) 29 | 30 | algo = TRPO( 31 | env=env, 32 | policy=policy, 33 | baseline=baseline, 34 | batch_size=4000, 35 | max_path_length=100, 36 | n_itr=200, 37 | discount=0.99, 38 | step_size=0.01, 39 | sampler_cls=ISSampler, 40 | sampler_args=dict(n_backtrack=1), 41 | optimizer_args=optimizer_args 42 | ) 43 | algo.train() 44 | -------------------------------------------------------------------------------- /src/rllab-adv/contrib/alexbeloi/examples/vpgis_cartpole.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.vpg import VPG 2 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 3 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 4 | from rllab.envs.normalized_env import normalize 5 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 6 | from contrib.alexbeloi.is_sampler import ISSampler 7 | 8 | """ 9 | Example using VPG with ISSampler, iterations alternate between live and 10 | importance sampled iterations. 11 | """ 12 | 13 | env = normalize(CartpoleEnv()) 14 | 15 | policy = GaussianMLPPolicy( 16 | env_spec=env.spec, 17 | # The neural network policy should have two hidden layers, each with 32 hidden units. 18 | hidden_sizes=(32, 32) 19 | ) 20 | 21 | baseline = LinearFeatureBaseline(env_spec=env.spec) 22 | 23 | algo = VPG( 24 | env=env, 25 | policy=policy, 26 | baseline=baseline, 27 | batch_size=4000, 28 | max_path_length=100, 29 | n_itr=40, 30 | discount=0.99, 31 | step_size=0.01, 32 | sampler_cls=ISSampler, 33 | sampler_args=dict(n_backtrack=1), 34 | ) 35 | algo.train() 36 | -------------------------------------------------------------------------------- /src/rllab-adv/docker/tester_Dockerfile: -------------------------------------------------------------------------------- 1 | FROM neocxi/rllab_exp_gpu_tf:py3 2 | 3 | RUN bash -c 'source activate rllab3 && conda install -y nomkl && conda uninstall -y scipy && conda install -y scipy' 4 | 5 | ADD . /root/code/rllab 6 | WORKDIR /root/code/rllab 7 | -------------------------------------------------------------------------------- /src/rllab-adv/docs/index.rst: -------------------------------------------------------------------------------- 1 | .. rllab documentation master file, created by 2 | sphinx-quickstart on Mon Feb 15 20:07:12 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to rllab 7 | ================ 8 | 9 | rllab is a framework for developing and evaluating reinforcement learning algorithms. 10 | 11 | rllab is a work in progress, input is welcome. The available documentation is limited for now. 12 | 13 | User Guide 14 | ========== 15 | 16 | The rllab user guide explains how to install rllab, how to run experiments, and how to implement new MDPs and new algorithms. 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | 21 | user/installation 22 | user/experiments 23 | user/gym_integration 24 | user/implement_env 25 | user/implement_algo_basic 26 | user/implement_algo_advanced 27 | user/cluster 28 | 29 | 30 | Citing rllab 31 | ============ 32 | 33 | If you use rllab for academic research, you are highly encouraged to cite the following paper: 34 | 35 | - Yan Duan, Xi Chen, Rein Houthooft, John Schulman, Pieter Abbeel. "`Benchmarking Deep Reinforcement Learning for Continuous Control `_. *Proceedings of the 33rd International Conference on Machine Learning (ICML), 2016.* 36 | 37 | 38 | Indices and tables 39 | ================== 40 | 41 | * :ref:`genindex` 42 | * :ref:`modindex` 43 | * :ref:`search` 44 | 45 | -------------------------------------------------------------------------------- /src/rllab-adv/docs/user/cluster_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/docs/user/cluster_1.png -------------------------------------------------------------------------------- /src/rllab-adv/docs/user/cluster_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/docs/user/cluster_2.png -------------------------------------------------------------------------------- /src/rllab-adv/docs/user/cluster_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/docs/user/cluster_3.png -------------------------------------------------------------------------------- /src/rllab-adv/environment.yml: -------------------------------------------------------------------------------- 1 | name: rllab3-adv 2 | channels: 3 | - https://conda.anaconda.org/kne 4 | - https://conda.binstar.org/tlatorre 5 | - https://conda.anaconda.org/cjs14 6 | - https://conda.anaconda.org/menpo 7 | - jjhelmus 8 | dependencies: 9 | - python==3.5.2 10 | - numpy==1.10.4 11 | - scipy 12 | - path.py 13 | - python-dateutil 14 | - joblib==0.9.4 15 | - mako 16 | - ipywidgets 17 | - numba 18 | - flask 19 | - pybox2d 20 | - pygame 21 | - h5py 22 | - matplotlib 23 | - opencv3=3.1.0 24 | - scikit-learn 25 | - tensorflow=0.10.0rc0 26 | - pip: 27 | - Pillow 28 | - atari-py 29 | - pyprind 30 | - ipdb 31 | - boto3 32 | - PyOpenGL 33 | - nose2 34 | - pyzmq 35 | - msgpack-python 36 | - mujoco_py 37 | - cached_property 38 | - line_profiler 39 | - Cython 40 | - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano 41 | - git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne 42 | - git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly 43 | - awscli 44 | - git+https://github.com/lerrel/gym-adv.git 45 | - pyglet 46 | - git+https://github.com/neocxi/prettytensor.git 47 | - jupyter 48 | - progressbar2 49 | - chainer==1.15.0 50 | -------------------------------------------------------------------------------- /src/rllab-adv/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | path.py 4 | python-dateutil 5 | joblib==0.9.4 6 | mako 7 | ipywidgets 8 | numba 9 | flask 10 | pypybox2d 11 | pygame 12 | h5py 13 | matplotlib 14 | scikit-learn 15 | Pillow 16 | atari-py 17 | pyprind 18 | ipdb 19 | boto3 20 | PyOpenGL 21 | nose2 22 | pyzmq 23 | msgpack-python 24 | cached_property 25 | line_profiler 26 | Cython 27 | git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano 28 | git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne 29 | git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly 30 | awscli 31 | pyglet 32 | git+https://github.com/neocxi/prettytensor.git 33 | jupyter 34 | progressbar2 35 | chainer==1.15.0 36 | tensorflow==0.12.1 37 | 38 | -------------------------------------------------------------------------------- /src/rllab-adv/results/xyz.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | import sys 3 | import pickle 4 | import rllab 5 | 6 | data = pickle.load(open(sys.argv[1], 'rb')) 7 | 8 | pprint(data) 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: rllab 3 | Version: 0.1.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description-Content-Type: UNKNOWN 10 | Description: UNKNOWN 11 | Platform: UNKNOWN 12 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | rllab 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/algos/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/base.py: -------------------------------------------------------------------------------- 1 | class Algorithm(object): 2 | pass 3 | 4 | 5 | class RLAlgorithm(Algorithm): 6 | 7 | def train(self): 8 | raise NotImplementedError 9 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/erwr.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.vpg import VPG 2 | from rllab.optimizers.lbfgs_optimizer import LbfgsOptimizer 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class ERWR(VPG, Serializable): 7 | """ 8 | Episodic Reward Weighted Regression [1]_ 9 | 10 | Notes 11 | ----- 12 | This does not implement the original RwR [2]_ that deals with "immediate reward problems" since 13 | it doesn't find solutions that optimize for temporally delayed rewards. 14 | 15 | .. [1] Kober, Jens, and Jan R. Peters. "Policy search for motor primitives in robotics." Advances in neural information processing systems. 2009. 16 | .. [2] Peters, Jan, and Stefan Schaal. "Using reward-weighted regression for reinforcement learning of task space control." Approximate Dynamic Programming and Reinforcement Learning, 2007. ADPRL 2007. IEEE International Symposium on. IEEE, 2007. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | optimizer=None, 22 | optimizer_args=None, 23 | positive_adv=None, 24 | **kwargs): 25 | Serializable.quick_init(self, locals()) 26 | if optimizer is None: 27 | if optimizer_args is None: 28 | optimizer_args = dict() 29 | optimizer = LbfgsOptimizer(**optimizer_args) 30 | super(ERWR, self).__init__( 31 | optimizer=optimizer, 32 | positive_adv=True if positive_adv is None else positive_adv, 33 | **kwargs 34 | ) 35 | 36 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/nop.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.batch_polopt import BatchPolopt 2 | from rllab.misc.overrides import overrides 3 | 4 | 5 | class NOP(BatchPolopt): 6 | """ 7 | NOP (no optimization performed) policy search algorithm 8 | """ 9 | 10 | def __init__( 11 | self, 12 | **kwargs): 13 | super(NOP, self).__init__(**kwargs) 14 | 15 | @overrides 16 | def init_opt(self): 17 | pass 18 | 19 | @overrides 20 | def optimize_policy(self, itr, samples_data): 21 | pass 22 | 23 | @overrides 24 | def get_itr_snapshot(self, itr, samples_data): 25 | return dict() 26 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/ppo.py: -------------------------------------------------------------------------------- 1 | from rllab.optimizers.penalty_lbfgs_optimizer import PenaltyLbfgsOptimizer 2 | from rllab.algos.npo import NPO 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class PPO(NPO, Serializable): 7 | """ 8 | Penalized Policy Optimization. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | Serializable.quick_init(self, locals()) 17 | if optimizer is None: 18 | if optimizer_args is None: 19 | optimizer_args = dict() 20 | optimizer = PenaltyLbfgsOptimizer(**optimizer_args) 21 | super(PPO, self).__init__(optimizer=optimizer, **kwargs) 22 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/tnpg.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.npo import NPO 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | from rllab.misc import ext 4 | 5 | 6 | class TNPG(NPO): 7 | """ 8 | Truncated Natural Policy Gradient. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | if optimizer is None: 17 | default_args = dict(max_backtracks=1) 18 | if optimizer_args is None: 19 | optimizer_args = default_args 20 | else: 21 | optimizer_args = dict(default_args, **optimizer_args) 22 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 23 | super(TNPG, self).__init__(optimizer=optimizer, **kwargs) 24 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/algos/trpo.py: -------------------------------------------------------------------------------- 1 | from rllab.algos.npo import NPO 2 | from rllab.optimizers.conjugate_gradient_optimizer import ConjugateGradientOptimizer 3 | from rllab.core.serializable import Serializable 4 | 5 | 6 | class TRPO(NPO): 7 | """ 8 | Trust Region Policy Optimization 9 | """ 10 | 11 | def __init__( 12 | self, 13 | optimizer=None, 14 | optimizer_args=None, 15 | **kwargs): 16 | if optimizer is None: 17 | if optimizer_args is None: 18 | optimizer_args = dict() 19 | optimizer = ConjugateGradientOptimizer(**optimizer_args) 20 | super(TRPO, self).__init__(optimizer=optimizer, **kwargs) 21 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/baselines/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/base.py: -------------------------------------------------------------------------------- 1 | from rllab.misc import autoargs 2 | 3 | 4 | class Baseline(object): 5 | 6 | def __init__(self, env_spec): 7 | self._mdp_spec = env_spec 8 | 9 | @property 10 | def algorithm_parallelized(self): 11 | return False 12 | 13 | def get_param_values(self): 14 | raise NotImplementedError 15 | 16 | def set_param_values(self, val): 17 | raise NotImplementedError 18 | 19 | def fit(self, paths): 20 | raise NotImplementedError 21 | 22 | def predict(self, path): 23 | raise NotImplementedError 24 | 25 | @classmethod 26 | @autoargs.add_args 27 | def add_args(cls, parser): 28 | pass 29 | 30 | @classmethod 31 | @autoargs.new_from_args 32 | def new_from_args(cls, args, mdp): 33 | pass 34 | 35 | def log_diagnostics(self, paths): 36 | """ 37 | Log extra information per iteration based on the collected paths 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/gaussian_conv_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.misc.overrides import overrides 5 | from rllab.core.parameterized import Parameterized 6 | from rllab.baselines.base import Baseline 7 | from rllab.regressors.gaussian_conv_regressor import GaussianConvRegressor 8 | 9 | 10 | class GaussianConvBaseline(Baseline, Parameterized, Serializable): 11 | 12 | def __init__( 13 | self, 14 | env_spec, 15 | subsample_factor=1., 16 | regressor_args=None, 17 | ): 18 | Serializable.quick_init(self, locals()) 19 | super(GaussianConvBaseline, self).__init__(env_spec) 20 | if regressor_args is None: 21 | regressor_args = dict() 22 | 23 | self._regressor = GaussianConvRegressor( 24 | input_shape=env_spec.observation_space.shape, 25 | output_dim=1, 26 | name="vf", 27 | **regressor_args 28 | ) 29 | 30 | @overrides 31 | def fit(self, paths): 32 | observations = np.concatenate([p["observations"] for p in paths]) 33 | returns = np.concatenate([p["returns"] for p in paths]) 34 | self._regressor.fit(observations, returns.reshape((-1, 1))) 35 | 36 | @overrides 37 | def predict(self, path): 38 | return self._regressor.predict(path["observations"]).flatten() 39 | 40 | @overrides 41 | def get_param_values(self, **tags): 42 | return self._regressor.get_param_values(**tags) 43 | 44 | @overrides 45 | def set_param_values(self, flattened_params, **tags): 46 | self._regressor.set_param_values(flattened_params, **tags) 47 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/gaussian_mlp_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.core.parameterized import Parameterized 5 | from rllab.baselines.base import Baseline 6 | from rllab.misc.overrides import overrides 7 | from rllab.regressors.gaussian_mlp_regressor import GaussianMLPRegressor 8 | 9 | 10 | class GaussianMLPBaseline(Baseline, Parameterized, Serializable): 11 | 12 | def __init__( 13 | self, 14 | env_spec, 15 | subsample_factor=1., 16 | num_seq_inputs=1, 17 | regressor_args=None, 18 | ): 19 | Serializable.quick_init(self, locals()) 20 | super(GaussianMLPBaseline, self).__init__(env_spec) 21 | if regressor_args is None: 22 | regressor_args = dict() 23 | 24 | self._regressor = GaussianMLPRegressor( 25 | input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,), 26 | output_dim=1, 27 | name="vf", 28 | **regressor_args 29 | ) 30 | 31 | @overrides 32 | def fit(self, paths): 33 | observations = np.concatenate([p["observations"] for p in paths]) 34 | returns = np.concatenate([p["returns"] for p in paths]) 35 | self._regressor.fit(observations, returns.reshape((-1, 1))) 36 | 37 | @overrides 38 | def predict(self, path): 39 | return self._regressor.predict(path["observations"]).flatten() 40 | 41 | @overrides 42 | def get_param_values(self, **tags): 43 | return self._regressor.get_param_values(**tags) 44 | 45 | @overrides 46 | def set_param_values(self, flattened_params, **tags): 47 | self._regressor.set_param_values(flattened_params, **tags) 48 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/linear_feature_baseline.py: -------------------------------------------------------------------------------- 1 | from rllab.baselines.base import Baseline 2 | from rllab.misc.overrides import overrides 3 | import numpy as np 4 | 5 | 6 | class LinearFeatureBaseline(Baseline): 7 | def __init__(self, env_spec, reg_coeff=1e-5): 8 | self._coeffs = None 9 | self._reg_coeff = reg_coeff 10 | 11 | @overrides 12 | def get_param_values(self, **tags): 13 | return self._coeffs 14 | 15 | @overrides 16 | def set_param_values(self, val, **tags): 17 | self._coeffs = val 18 | 19 | def _features(self, path): 20 | o = np.clip(path["observations"], -10, 10) 21 | l = len(path["rewards"]) 22 | al = np.arange(l).reshape(-1, 1) / 100.0 23 | return np.concatenate([o, o ** 2, al, al ** 2, al ** 3, np.ones((l, 1))], axis=1) 24 | 25 | @overrides 26 | def fit(self, paths): 27 | featmat = np.concatenate([self._features(path) for path in paths]) 28 | returns = np.concatenate([path["returns"] for path in paths]) 29 | reg_coeff = self._reg_coeff 30 | for _ in range(5): 31 | self._coeffs = np.linalg.lstsq( 32 | featmat.T.dot(featmat) + reg_coeff * np.identity(featmat.shape[1]), 33 | featmat.T.dot(returns) 34 | )[0] 35 | if not np.any(np.isnan(self._coeffs)): 36 | break 37 | reg_coeff *= 10 38 | 39 | @overrides 40 | def predict(self, path): 41 | if self._coeffs is None: 42 | return np.zeros(len(path["rewards"])) 43 | return self._features(path).dot(self._coeffs) 44 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/baselines/zero_baseline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rllab.baselines.base import Baseline 3 | from rllab.misc.overrides import overrides 4 | 5 | 6 | class ZeroBaseline(Baseline): 7 | 8 | def __init__(self, env_spec): 9 | pass 10 | 11 | @overrides 12 | def get_param_values(self, **kwargs): 13 | return None 14 | 15 | @overrides 16 | def set_param_values(self, val, **kwargs): 17 | pass 18 | 19 | @overrides 20 | def fit(self, paths): 21 | pass 22 | 23 | @overrides 24 | def predict(self, path): 25 | return np.zeros_like(path["rewards"]) 26 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/config_personal_template.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | USE_GPU = False 4 | 5 | DOCKER_IMAGE = "rein/rllab-exp-new" 6 | 7 | KUBE_PREFIX = "template_" 8 | 9 | DOCKER_LOG_DIR = "/tmp/expt" 10 | 11 | AWS_IMAGE_ID = "ami-67c5d00d" 12 | 13 | if USE_GPU: 14 | AWS_INSTANCE_TYPE = "g2.2xlarge" 15 | else: 16 | AWS_INSTANCE_TYPE = "c4.2xlarge" 17 | 18 | AWS_KEY_NAME = "research_virginia" 19 | 20 | AWS_SPOT = True 21 | 22 | AWS_SPOT_PRICE = '10.0' 23 | 24 | AWS_IAM_INSTANCE_PROFILE_NAME = "rllab" 25 | 26 | AWS_SECURITY_GROUPS = ["rllab"] 27 | 28 | AWS_REGION_NAME = "us-west-2" 29 | 30 | AWS_CODE_SYNC_S3_PATH = "e" 31 | 32 | CODE_SYNC_IGNORES = ["*.git/*", "*data/*", "*src/*", 33 | "*.pods/*", "*tests/*", "*examples/*", "docs/*"] 34 | 35 | LOCAL_CODE_DIR = "" 36 | 37 | AWS_S3_PATH = "" 38 | 39 | LABEL = "template" 40 | 41 | DOCKER_CODE_DIR = "/root/code/rllab" 42 | 43 | AWS_ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY", "") 44 | 45 | AWS_ACCESS_SECRET = os.environ.get("AWS_ACCESS_SECRET", "") 46 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/core/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/core/lasagne_powered.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | from rllab.misc.overrides import overrides 3 | import lasagne.layers as L 4 | 5 | 6 | class LasagnePowered(Parameterized): 7 | def __init__(self, output_layers): 8 | self._output_layers = output_layers 9 | super(LasagnePowered, self).__init__() 10 | 11 | @property 12 | def output_layers(self): 13 | return self._output_layers 14 | 15 | @overrides 16 | def get_params_internal(self, **tags): # this gives ALL the vars (not the params values) 17 | return L.get_all_params( # this lasagne function also returns all var below the passed layers 18 | L.concat(self._output_layers), 19 | **tags 20 | ) 21 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/core/serializable.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | class Serializable(object): 5 | 6 | def __init__(self, *args, **kwargs): 7 | self.__args = args 8 | self.__kwargs = kwargs 9 | 10 | def quick_init(self, locals_): 11 | if getattr(self, "_serializable_initialized", False): 12 | return 13 | spec = inspect.getargspec(self.__init__) 14 | # Exclude the first "self" parameter 15 | in_order_args = [locals_[arg] for arg in spec.args][1:] 16 | if spec.varargs: 17 | varargs = locals_[spec.varargs] 18 | else: 19 | varargs = tuple() 20 | if spec.keywords: 21 | kwargs = locals_[spec.keywords] 22 | else: 23 | kwargs = dict() 24 | self.__args = tuple(in_order_args) + varargs 25 | self.__kwargs = kwargs 26 | setattr(self, "_serializable_initialized", True) 27 | 28 | def __getstate__(self): 29 | return {"__args": self.__args, "__kwargs": self.__kwargs} 30 | 31 | def __setstate__(self, d): 32 | # convert all __args to keyword-based arguments 33 | in_order_args = inspect.getargspec(self.__init__).args[1:] 34 | out = type(self)(**dict(zip(in_order_args, d["__args"]), **d["__kwargs"])) 35 | self.__dict__.update(out.__dict__) 36 | 37 | @classmethod 38 | def clone(cls, obj, **kwargs): 39 | assert isinstance(obj, Serializable) 40 | d = obj.__getstate__() 41 | d["__kwargs"] = dict(d["__kwargs"], **kwargs) 42 | out = type(obj).__new__(type(obj)) 43 | out.__setstate__(d) 44 | return out 45 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/distributions/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/distributions/base.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as TT 2 | 3 | class Distribution(object): 4 | 5 | @property 6 | def dim(self): 7 | raise NotImplementedError 8 | 9 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 10 | """ 11 | Compute the symbolic KL divergence of two distributions 12 | """ 13 | raise NotImplementedError 14 | 15 | def kl(self, old_dist_info, new_dist_info): 16 | """ 17 | Compute the KL divergence of two distributions 18 | """ 19 | raise NotImplementedError 20 | 21 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 22 | raise NotImplementedError 23 | 24 | def entropy(self, dist_info): 25 | raise NotImplementedError 26 | 27 | def log_likelihood_sym(self, x_var, dist_info_vars): 28 | raise NotImplementedError 29 | 30 | def likelihood_sym(self, x_var, dist_info_vars): 31 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars)) 32 | 33 | def log_likelihood(self, xs, dist_info): 34 | raise NotImplementedError 35 | 36 | @property 37 | def dist_info_keys(self): 38 | raise NotImplementedError 39 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/distributions/delta.py: -------------------------------------------------------------------------------- 1 | from rllab.distributions.base import Distribution 2 | 3 | class Delta(Distribution): 4 | @property 5 | def dim(self): 6 | return 0 7 | 8 | def kl_sym(self, old_dist_info_vars, new_dist_info_vars): 9 | return None 10 | 11 | def kl(self, old_dist_info, new_dist_info): 12 | return None 13 | 14 | def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars): 15 | raise NotImplementedError 16 | 17 | def entropy(self, dist_info): 18 | raise NotImplementedError 19 | 20 | def log_likelihood_sym(self, x_var, dist_info_vars): 21 | raise NotImplementedError 22 | 23 | def likelihood_sym(self, x_var, dist_info_vars): 24 | return TT.exp(self.log_likelihood_sym(x_var, dist_info_vars)) 25 | 26 | def log_likelihood(self, xs, dist_info): 27 | return None 28 | 29 | @property 30 | def dist_info_keys(self): 31 | return None 32 | 33 | def entropy(self,dist_info): 34 | return 0 35 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/distributions/recurrent_diagonal_gaussian.py: -------------------------------------------------------------------------------- 1 | import theano.tensor as TT 2 | import numpy as np 3 | from rllab.distributions.base import Distribution 4 | from rllab.distributions.diagonal_gaussian import DiagonalGaussian 5 | 6 | RecurrentDiagonalGaussian = DiagonalGaussian 7 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.adversarial.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.adversarial.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.adversarial.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.adversarial.classic_control.pendulum import PendulumEnv 5 | from gym.envs.adversarial.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/adversarial/.classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/dist/gym-0.5.6-py2.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/adversarial/.classic_control/dist/gym-0.5.6-py2.7.egg -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: gym 3 | Version: 0.5.6 4 | Summary: The OpenAI Gym: A toolkit for developing and comparing your reinforcement learning agents. 5 | Home-page: https://github.com/openai/gym 6 | Author: OpenAI 7 | Author-email: gym@openai.com 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | gym.egg-info/PKG-INFO 2 | gym.egg-info/SOURCES.txt 3 | gym.egg-info/dependency_links.txt 4 | gym.egg-info/not-zip-safe 5 | gym.egg-info/requires.txt 6 | gym.egg-info/top_level.txt -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/not-zip-safe: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.10.4 2 | requests>=2.0 3 | six 4 | pyglet>=1.2.0 5 | 6 | [all] 7 | PyOpenGL 8 | box2d-py 9 | keras 10 | theano 11 | atari_py>=0.0.17 12 | Pillow 13 | PyOpenGL 14 | pachi-py>=0.0.19 15 | mujoco_py>=0.4.3 16 | imageio 17 | 18 | [atari] 19 | atari_py>=0.0.17 20 | Pillow 21 | PyOpenGL 22 | 23 | [board_game] 24 | pachi-py>=0.0.19 25 | 26 | [box2d] 27 | box2d-py 28 | 29 | [classic_control] 30 | PyOpenGL 31 | 32 | [mujoco] 33 | mujoco_py>=0.4.3 34 | imageio 35 | 36 | [parameter_tuning] 37 | keras 38 | theano 39 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/.classic_control/gym.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/adversarial/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.adversarial.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.adversarial.mujoco.ant import AntEnv 5 | from gym.envs.adversarial.mujoco.ant_heel import AntHeelEnv 6 | from gym.envs.adversarial.mujoco.half_cheetah import HalfCheetahEnv 7 | from gym.envs.adversarial.mujoco.half_cheetah_heel import HalfCheetahHeelEnv 8 | from gym.envs.adversarial.mujoco.half_cheetah_torso import HalfCheetahTorsoEnv 9 | from gym.envs.adversarial.mujoco.hopper import HopperEnv 10 | from gym.envs.adversarial.mujoco.hopper_6 import Hopper6Env 11 | from gym.envs.adversarial.mujoco.hopper_heel import HopperHeelEnv 12 | from gym.envs.adversarial.mujoco.hopper_heel_6 import HopperHeel6Env 13 | from gym.envs.adversarial.mujoco.hopper_torso_6 import HopperTorso6Env 14 | from gym.envs.adversarial.mujoco.walker2d import Walker2dEnv 15 | from gym.envs.adversarial.mujoco.walker2d_heel import Walker2dHeelEnv 16 | from gym.envs.adversarial.mujoco.walker2d_torso import Walker2dTorsoEnv 17 | from gym.envs.adversarial.mujoco.humanoid import HumanoidEnv 18 | from gym.envs.adversarial.mujoco.humanoid_heel import HumanoidHeelEnv 19 | from gym.envs.adversarial.mujoco.inverted_pendulum import InvertedPendulumEnv 20 | from gym.envs.adversarial.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 21 | from gym.envs.adversarial.mujoco.reacher import ReacherEnv 22 | from gym.envs.adversarial.mujoco.swimmer import SwimmerEnv 23 | from gym.envs.adversarial.mujoco.humanoidstandup import HumanoidStandupEnv 24 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/adversarial/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/box2d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/box2d/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/box2d/models/double_pendulum.xml.mako: -------------------------------------------------------------------------------- 1 | <% 2 | from rllab.misc.mako_utils import compute_rect_vertices 3 | link_len = opts['link_len'] 4 | link_width = 0.1 5 | %> 6 | 7 | 8 | 9 | 10 | 16 | 17 | 18 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/box2d/models/mountain_car.xml.mako: -------------------------------------------------------------------------------- 1 | <% 2 | noise = opts.get("noise", False) 3 | track_width = 4 4 | if noise: 5 | import numpy as np 6 | track_width += np.random.uniform(-1, 1) 7 | %> 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/box2d/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from .xml_box2d import world_from_xml, find_body, find_joint 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/env_spec.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.spaces.base import Space 3 | 4 | 5 | class EnvSpec(Serializable): 6 | 7 | def __init__( 8 | self, 9 | observation_space, 10 | pro_action_space, 11 | adv_action_space): 12 | """ 13 | :type observation_space: Space 14 | :type action_space: Space 15 | """ 16 | Serializable.quick_init(self, locals()) 17 | self._observation_space = observation_space 18 | self._pro_action_space = pro_action_space 19 | self._adv_action_space = adv_action_space 20 | 21 | @property 22 | def observation_space(self): 23 | return self._observation_space 24 | 25 | @property 26 | def pro_action_space(self): 27 | return self._pro_action_space 28 | 29 | @property 30 | def adv_action_space(self): 31 | return self._adv_action_space 32 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/identification_env.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.envs.proxy_env import ProxyEnv 3 | from rllab.misc.overrides import overrides 4 | 5 | 6 | class IdentificationEnv(ProxyEnv, Serializable): 7 | 8 | def __init__(self, mdp_cls, mdp_args): 9 | Serializable.quick_init(self, locals()) 10 | self.mdp_cls = mdp_cls 11 | self.mdp_args = dict(mdp_args) 12 | self.mdp_args["template_args"] = dict(noise=True) 13 | mdp = self.gen_mdp() 14 | super(IdentificationEnv, self).__init__(mdp) 15 | 16 | def gen_mdp(self): 17 | return self.mdp_cls(**self.mdp_args) 18 | 19 | @overrides 20 | def reset(self): 21 | if getattr(self, "_mdp", None): 22 | if hasattr(self._wrapped_env, "release"): 23 | self._wrapped_env.release() 24 | self._wrapped_env = self.gen_mdp() 25 | return super(IdentificationEnv, self).reset() 26 | 27 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/mujoco/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/gather/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/mujoco/gather/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/gather/ant_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.ant_env import AntEnv 3 | from rllab.envs.mujoco.mujoco_env import q_mult, q_inv 4 | import math 5 | 6 | class AntGatherEnv(GatherEnv): 7 | 8 | MODEL_CLASS = AntEnv 9 | ORI_IND = 3 10 | 11 | def get_ori(self): 12 | ori = [0, 1, 0, 0] 13 | rot = self.inner_env.model.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND+4] # take the quaternion 14 | ori = q_mult(q_mult(rot,ori),q_inv(rot))[1:3] # project onto x-y plane 15 | ori = math.atan2(ori[1],ori[0]) 16 | return ori 17 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/gather/point_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.point_env import PointEnv 3 | 4 | 5 | class PointGatherEnv(GatherEnv): 6 | 7 | MODEL_CLASS = PointEnv 8 | ORI_IND = 2 9 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/gather/swimmer_gather_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.gather.gather_env import GatherEnv 2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 3 | 4 | 5 | class SwimmerGatherEnv(GatherEnv): 6 | 7 | MODEL_CLASS = SwimmerEnv 8 | ORI_IND = 2 9 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/humanoid_env.py: -------------------------------------------------------------------------------- 1 | from .simple_humanoid_env import SimpleHumanoidEnv 2 | 3 | 4 | # Taken from Wojciech's code 5 | class HumanoidEnv(SimpleHumanoidEnv): 6 | 7 | FILE = 'humanoid.xml' 8 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/maze/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/envs/mujoco/maze/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/maze/ant_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.ant_env import AntEnv 3 | from rllab.envs.mujoco.mujoco_env import q_mult, q_inv 4 | import math 5 | 6 | 7 | class AntMazeEnv(MazeEnv): 8 | 9 | MODEL_CLASS = AntEnv 10 | ORI_IND = 3 11 | 12 | MAZE_HEIGHT = 2 13 | MAZE_SIZE_SCALING = 3.0 14 | 15 | def get_ori(self): 16 | ori = [0, 1, 0, 0] 17 | rot = self.wrapped_env.model.data.qpos[self.__class__.ORI_IND:self.__class__.ORI_IND+4] # take the quaternion 18 | ori = q_mult(q_mult(rot,ori),q_inv(rot))[1:3] # project onto x-y plane 19 | ori = math.atan2(ori[1],ori[0]) 20 | return ori 21 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/maze/point_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.point_env import PointEnv 3 | 4 | 5 | class PointMazeEnv(MazeEnv): 6 | 7 | MODEL_CLASS = PointEnv 8 | ORI_IND = 2 9 | 10 | MAZE_HEIGHT = 2 11 | MAZE_SIZE_SCALING = 3.0 12 | 13 | MANUAL_COLLISION = True 14 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/mujoco/maze/swimmer_maze_env.py: -------------------------------------------------------------------------------- 1 | from rllab.envs.mujoco.maze.maze_env import MazeEnv 2 | from rllab.envs.mujoco.swimmer_env import SwimmerEnv 3 | 4 | 5 | class SwimmerMazeEnv(MazeEnv): 6 | 7 | MODEL_CLASS = SwimmerEnv 8 | ORI_IND = 2 9 | 10 | MAZE_HEIGHT = 0.5 11 | MAZE_SIZE_SCALING = 4 12 | MAZE_MAKE_CONTACTS = True 13 | 14 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/proxy_env.py: -------------------------------------------------------------------------------- 1 | from .base import Env 2 | 3 | 4 | class ProxyEnv(Env): 5 | def __init__(self, wrapped_env): 6 | self._wrapped_env = wrapped_env 7 | 8 | @property 9 | def wrapped_env(self): 10 | return self._wrapped_env 11 | 12 | def reset(self): 13 | return self._wrapped_env.reset() 14 | 15 | @property 16 | def action_space(self): 17 | return self._wrapped_env.action_space 18 | 19 | @property 20 | def observation_space(self): 21 | return self._wrapped_env.observation_space 22 | 23 | def step(self, action): 24 | return self._wrapped_env.step(action) 25 | 26 | def render(self, *args, **kwargs): 27 | return self._wrapped_env.render(*args, **kwargs) 28 | 29 | def log_diagnostics(self, paths): 30 | self._wrapped_env.log_diagnostics(paths) 31 | 32 | @property 33 | def horizon(self): 34 | return self._wrapped_env.horizon 35 | 36 | def terminate(self): 37 | self._wrapped_env.terminate() 38 | 39 | def get_param_values(self): 40 | return self._wrapped_env.get_param_values() 41 | 42 | def set_param_values(self,params): 43 | self._wrapped_env.set_param_values(params) 44 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/envs/sliding_mem_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rllab.core.serializable import Serializable 4 | from rllab.envs.base import Step 5 | from rllab.envs.proxy_env import ProxyEnv 6 | from rllab.misc import autoargs 7 | from rllab.misc.overrides import overrides 8 | from rllab.spaces import Box 9 | 10 | 11 | class SlidingMemEnv(ProxyEnv, Serializable): 12 | 13 | def __init__( 14 | self, 15 | env, 16 | n_steps=4, 17 | axis=0, 18 | ): 19 | super().__init__(env) 20 | Serializable.quick_init(self, locals()) 21 | self.n_steps = n_steps 22 | self.axis = axis 23 | self.buffer = None 24 | 25 | def reset_buffer(self, new_): 26 | assert self.axis == 0 27 | self.buffer = np.zeros(self.observation_space.shape, dtype=np.float32) 28 | self.buffer[0:] = new_ 29 | 30 | def add_to_buffer(self, new_): 31 | assert self.axis == 0 32 | self.buffer[1:] = self.buffer[:-1] 33 | self.buffer[:1] = new_ 34 | 35 | @property 36 | def observation_space(self): 37 | origin = self._wrapped_env.observation_space 38 | return Box( 39 | *[ 40 | np.repeat(b, self.n_steps, axis=self.axis) 41 | for b in origin.bounds 42 | ] 43 | ) 44 | 45 | @overrides 46 | def reset(self): 47 | obs = self._wrapped_env.reset() 48 | self.reset_buffer(obs) 49 | return self.buffer 50 | 51 | @overrides 52 | def step(self, action): 53 | next_obs, reward, done, info = self._wrapped_env.step(action) 54 | self.add_to_buffer(next_obs) 55 | return Step(self.buffer, reward, done, **info) 56 | 57 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/exploration_strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/exploration_strategies/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/exploration_strategies/base.py: -------------------------------------------------------------------------------- 1 | class ExplorationStrategy(object): 2 | def get_action(self, t, observation, policy, **kwargs): 3 | raise NotImplementedError 4 | 5 | def reset(self): 6 | pass 7 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/exploration_strategies/gaussian_strategy.py: -------------------------------------------------------------------------------- 1 | from rllab.core.serializable import Serializable 2 | from rllab.spaces.box import Box 3 | from rllab.exploration_strategies.base import ExplorationStrategy 4 | import numpy as np 5 | 6 | 7 | class GaussianStrategy(ExplorationStrategy, Serializable): 8 | """ 9 | This strategy adds Gaussian noise to the action taken by the deterministic policy. 10 | """ 11 | 12 | def __init__(self, env_spec, max_sigma=1.0, min_sigma=0.1, decay_period=1000000): 13 | assert isinstance(env_spec.action_space, Box) 14 | assert len(env_spec.action_space.shape) == 1 15 | Serializable.quick_init(self, locals()) 16 | self._max_sigma = max_sigma 17 | self._min_sigma = min_sigma 18 | self._decay_period = decay_period 19 | self._action_space = env_spec.action_space 20 | 21 | def get_action(self, t, observation, policy, **kwargs): 22 | action, agent_info = policy.get_action(observation) 23 | sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min(1.0, t * 1.0) / self._decay_period 24 | return np.clip(action + np.random.normal(size=len(action)) * sigma, self._action_space.low, 25 | self._action_space.high) 26 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/misc/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/misc/mako_utils.py: -------------------------------------------------------------------------------- 1 | 2 | def compute_rect_vertices(fromp, to, radius): 3 | x1, y1 = fromp 4 | x2, y2 = to 5 | if abs(y1 - y2) < 1e-6: 6 | dx = 0 7 | dy = radius 8 | else: 9 | dx = radius * 1.0 / (((x1 - x2) / (y1 - y2)) ** 2 + 1) ** 0.5 10 | # equivalently dx = radius * (y2-y1).to_f / ((x2-x1)**2 + (y2-y1)**2)**0.5 11 | dy = (radius**2 - dx**2) ** 0.5 12 | dy *= -1 if (x1 - x2) * (y1 - y2) > 0 else 1 13 | 14 | return ";".join([",".join(map(str, r)) for r in [ 15 | [x1 + dx, y1 + dy], 16 | [x2 + dx, y2 + dy], 17 | [x2 - dx, y2 - dy], 18 | [x1 - dx, y1 - dy], 19 | ]]) 20 | 21 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/misc/meta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/misc/meta.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/.rvmrc: -------------------------------------------------------------------------------- 1 | rvm use 2.1.0@mjpy --create 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'pry' 4 | gem 'activesupport' 5 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (4.1.8) 5 | i18n (~> 0.6, >= 0.6.9) 6 | json (~> 1.7, >= 1.7.7) 7 | minitest (~> 5.1) 8 | thread_safe (~> 0.1) 9 | tzinfo (~> 1.1) 10 | coderay (1.1.0) 11 | i18n (0.7.0) 12 | json (1.8.1) 13 | method_source (0.8.2) 14 | minitest (5.5.1) 15 | pry (0.10.1) 16 | coderay (~> 1.1.0) 17 | method_source (~> 0.8.1) 18 | slop (~> 3.4) 19 | slop (3.6.0) 20 | thread_safe (0.3.4) 21 | tzinfo (1.2.2) 22 | thread_safe (~> 0.1) 23 | 24 | PLATFORMS 25 | ruby 26 | 27 | DEPENDENCIES 28 | activesupport 29 | pry 30 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .mjviewer import MjViewer 2 | from .mjcore import MjModel 3 | from .mjcore import register_license 4 | import os 5 | from .mjconstants import * 6 | 7 | register_license(os.path.join(os.path.dirname(__file__), 8 | '../../vendor/mujoco/mjkey.txt')) 9 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/gen_binding.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | parent_path=$( cd "$(dirname "${BASH_SOURCE}")" ; pwd -P ) 3 | mujoco_path=$parent_path/../../vendor/mujoco 4 | rm /tmp/code_gen_mujoco.h 5 | cat $mujoco_path/mjdata.h >> /tmp/code_gen_mujoco.h && \ 6 | cat $mujoco_path/mjmodel.h >> /tmp/code_gen_mujoco.h && \ 7 | cat $mujoco_path/mjrender.h >> /tmp/code_gen_mujoco.h && \ 8 | cat $mujoco_path/mjvisualize.h >> /tmp/code_gen_mujoco.h && \ 9 | ruby $parent_path/codegen.rb /tmp/code_gen_mujoco.h $mujoco_path/mjxmacro.h > $parent_path/mjtypes.py 10 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/mjconstants.py: -------------------------------------------------------------------------------- 1 | MOUSE_ROTATE_V = 1 2 | MOUSE_ROTATE_H = 2 3 | MOUSE_MOVE_V = 3 4 | MOUSE_MOVE_H = 4 5 | MOUSE_ZOOM = 5 6 | 7 | mjOBJ_BODY = 1 8 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/mujoco_py/mjextra.py: -------------------------------------------------------------------------------- 1 | def append_objects(cur, extra): 2 | for i in range(cur.ngeom, cur.ngeom + extra.ngeom): 3 | cur.geoms[i] = extra.geoms[i - cur.ngeom] 4 | cur.ngeom = cur.ngeom + extra.ngeom 5 | if cur.ngeom > cur.maxgeom: 6 | raise ValueError("buffer limit exceeded!") 7 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/optimizers/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/optimizers/minibatch_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class BatchDataset(object): 5 | 6 | def __init__(self, inputs, batch_size, extra_inputs=None): 7 | self._inputs = [ 8 | i for i in inputs 9 | ] 10 | if extra_inputs is None: 11 | extra_inputs = [] 12 | self._extra_inputs = extra_inputs 13 | self._batch_size = batch_size 14 | if batch_size is not None: 15 | self._ids = np.arange(self._inputs[0].shape[0]) 16 | self.update() 17 | 18 | @property 19 | def number_batches(self): 20 | if self._batch_size is None: 21 | return 1 22 | return int(np.ceil(self._inputs[0].shape[0] * 1.0 / self._batch_size)) 23 | 24 | def iterate(self, update=True): 25 | if self._batch_size is None: 26 | yield list(self._inputs) + list(self._extra_inputs) 27 | else: 28 | for itr in range(self.number_batches): 29 | batch_start = itr * self._batch_size 30 | batch_end = (itr + 1) * self._batch_size 31 | batch_ids = self._ids[batch_start:batch_end] 32 | batch = [d[batch_ids] for d in self._inputs] 33 | yield list(batch) + list(self._extra_inputs) 34 | if update: 35 | self.update() 36 | 37 | def update(self): 38 | np.random.shuffle(self._ids) 39 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/plotter/__init__.py: -------------------------------------------------------------------------------- 1 | from .plotter import * 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/policies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/policies/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/policies/uniform_control_policy.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | from rllab.core.serializable import Serializable 3 | from rllab.distributions.delta import Delta 4 | from rllab.policies.base import Policy 5 | from rllab.misc.overrides import overrides 6 | 7 | 8 | class UniformControlPolicy(Policy, Serializable): 9 | def __init__( 10 | self, 11 | env_spec, 12 | ): 13 | Serializable.quick_init(self, locals()) 14 | super(UniformControlPolicy, self).__init__(env_spec=env_spec) 15 | 16 | @overrides 17 | def get_action(self, observation): 18 | return self.action_space.sample(), dict() 19 | 20 | def get_params_internal(self, **tags): 21 | return [] 22 | 23 | def get_param_values(self, **tags): 24 | return [] 25 | 26 | def get_actions(self, observations): 27 | return self.action_space.sample_n(len(observations)), dict() 28 | 29 | @property 30 | def vectorized(self): 31 | return True 32 | 33 | def reset(self, dones=None): 34 | pass 35 | 36 | @property 37 | def distribution(self): 38 | # Just a placeholder 39 | return Delta() 40 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/q_functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/q_functions/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/q_functions/base.py: -------------------------------------------------------------------------------- 1 | from rllab.core.parameterized import Parameterized 2 | 3 | 4 | class QFunction(Parameterized): 5 | pass 6 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/regressors/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dementrock' 2 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/sampler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/rllab/sampler/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/rllab/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from .product import Product 2 | from .discrete import Discrete 3 | from .box import Box 4 | 5 | __all__ = ["Product", "Discrete", "Box"] -------------------------------------------------------------------------------- /src/rllab-adv/rllab/spaces/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Space(object): 5 | """ 6 | Provides a classification state spaces and action spaces, 7 | so you can write generic code that applies to any Environment. 8 | E.g. to choose a random action. 9 | """ 10 | 11 | def sample(self, seed=0): 12 | """ 13 | Uniformly randomly sample a random elemnt of this space 14 | """ 15 | raise NotImplementedError 16 | 17 | def contains(self, x): 18 | """ 19 | Return boolean specifying if x is a valid 20 | member of this space 21 | """ 22 | raise NotImplementedError 23 | 24 | def flatten(self, x): 25 | raise NotImplementedError 26 | 27 | def unflatten(self, x): 28 | raise NotImplementedError 29 | 30 | def flatten_n(self, xs): 31 | raise NotImplementedError 32 | 33 | def unflatten_n(self, xs): 34 | raise NotImplementedError 35 | 36 | @property 37 | def flat_dim(self): 38 | """ 39 | The dimension of the flattened vector of the tensor representation 40 | """ 41 | raise NotImplementedError 42 | 43 | def new_tensor_variable(self, name, extra_dims): 44 | """ 45 | Create a Theano tensor variable given the name and extra dimensions prepended 46 | :param name: name of the variable 47 | :param extra_dims: extra dimensions in the front 48 | :return: the created tensor variable 49 | """ 50 | raise NotImplementedError 51 | -------------------------------------------------------------------------------- /src/rllab-adv/rllab/viskit/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dementrock' 2 | -------------------------------------------------------------------------------- /src/rllab-adv/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/scripts/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/scripts/setup_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Make sure that conda is available 3 | 4 | hash conda 2>/dev/null || { 5 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer." 6 | exit 0 7 | } 8 | 9 | echo "Installing system dependencies" 10 | echo "You will probably be asked for your sudo password." 11 | sudo apt-get update 12 | sudo apt-get install -y python-pip python-dev swig cmake build-essential 13 | sudo apt-get build-dep -y python-pygame 14 | sudo apt-get build-dep -y python-scipy 15 | 16 | # Make sure that we're under the directory of the project 17 | cd "$(dirname "$0")/.." 18 | 19 | echo "Creating conda environment..." 20 | conda env create -f environment.yml 21 | conda env update 22 | 23 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab." 24 | -------------------------------------------------------------------------------- /src/rllab-adv/scripts/setup_osx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Make sure that pip is available 3 | hash brew 2>/dev/null || { 4 | echo "Please install homebrew before continuing. You can use the following command to install:" 5 | echo "/usr/bin/ruby -e \"\$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)\"" 6 | exit 0 7 | } 8 | 9 | hash conda 2>/dev/null || { 10 | echo "Please install anaconda before continuing. You can download it at https://www.continuum.io/downloads. Please use the Python 2.7 installer." 11 | exit 0 12 | } 13 | 14 | 15 | echo "Installing system dependencies" 16 | echo "You will probably be asked for your sudo password." 17 | 18 | brew install swig sdl sdl_image sdl_mixer sdl_ttf portmidi 19 | 20 | # Make sure that we're under the directory of the project 21 | cd "$(dirname "$0")/.." 22 | echo "Creating conda environment..." 23 | conda env create -f environment.yml 24 | conda env update 25 | 26 | echo "Conda environment created! Make sure to run \`source activate rllab3\` whenever you open a new terminal and want to run programs under rllab." 27 | -------------------------------------------------------------------------------- /src/rllab-adv/scripts/sim_policy.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import joblib 4 | import tensorflow as tf 5 | 6 | from rllab.misc.console import query_yes_no 7 | from rllab.sampler.utils import rollout 8 | 9 | if __name__ == "__main__": 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('file', type=str, 13 | help='path to the snapshot file') 14 | parser.add_argument('--max_path_length', type=int, default=1000, 15 | help='Max length of rollout') 16 | parser.add_argument('--speedup', type=float, default=1, 17 | help='Speedup') 18 | args = parser.parse_args() 19 | 20 | # If the snapshot file use tensorflow, do: 21 | # import tensorflow as tf 22 | # with tf.Session(): 23 | # [rest of the code] 24 | with tf.Session() as sess: 25 | data = joblib.load(args.file) 26 | policy = data['policy'] 27 | env = data['env'] 28 | while True: 29 | path = rollout(env, policy, max_path_length=args.max_path_length, 30 | animated=True, speedup=args.speedup) 31 | if not query_yes_no('Continue simulation?'): 32 | break 33 | -------------------------------------------------------------------------------- /src/rllab-adv/scripts/submit_gym.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import argparse 4 | import os 5 | import os.path as osp 6 | import gym 7 | from rllab.viskit.core import load_params 8 | 9 | if __name__ == "__main__": 10 | # rl_gym.api_key = 'g8JOpnNVmcjMShBiFtyji2VWX3P2uCzc' 11 | if 'OPENAI_GYM_API_KEY' not in os.environ: 12 | raise ValueError("OpenAi Gym API key not configured. Please register an account on https://gym.openai.com and" 13 | " set the OPENAI_GYM_API_KEY environment variable, and try the script again.") 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('log_dir', type=str, 17 | help='path to the logging directory') 18 | parser.add_argument('--algorithm_id', type=str, default=None, help='Algorithm ID') 19 | args = parser.parse_args() 20 | snapshot_dir = osp.abspath(osp.join(args.log_dir, "..")) 21 | params_file_path = osp.join(snapshot_dir, "params.json") 22 | params_json = load_params(params_file_path) 23 | gym.upload(args.log_dir, algorithm_id=args.algorithm_id) 24 | -------------------------------------------------------------------------------- /src/rllab-adv/scripts/sync_s3.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('.') 3 | from rllab import config 4 | import os 5 | import argparse 6 | import ast 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('folder', type=str, default=None, nargs='?') 11 | parser.add_argument('--dry', action='store_true', default=False) 12 | parser.add_argument('--bare', action='store_true', default=False) 13 | args = parser.parse_args() 14 | remote_dir = config.AWS_S3_PATH 15 | local_dir = os.path.join(config.LOG_DIR, "s3") 16 | if args.folder: 17 | remote_dir = os.path.join(remote_dir, args.folder) 18 | local_dir = os.path.join(local_dir, args.folder) 19 | if args.bare: 20 | command = (""" 21 | aws s3 sync {remote_dir} {local_dir} --exclude '*' --include '*.csv' --include '*.json' --content-type "UTF-8" 22 | """.format(local_dir=local_dir, remote_dir=remote_dir)) 23 | else: 24 | command = (""" 25 | aws s3 sync {remote_dir} {local_dir} --exclude '*stdout.log' --exclude '*stdouterr.log' --content-type "UTF-8" 26 | """.format(local_dir=local_dir, remote_dir=remote_dir)) 27 | if args.dry: 28 | print(command) 29 | else: 30 | os.system(command) -------------------------------------------------------------------------------- /src/rllab-adv/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='rllab', 6 | version='0.1.0', 7 | packages= ['rllab'] 8 | ) 9 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/tests/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/tests/algos/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/src/rllab-adv/tests/envs/__init__.py -------------------------------------------------------------------------------- /src/rllab-adv/tests/envs/test_maze_env.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from rllab.envs.mujoco.maze.maze_env_utils import line_intersect, ray_segment_intersect 4 | 5 | 6 | def test_line_intersect(): 7 | assert line_intersect((0, 0), (0, 1), (0, 0), (1, 0))[:2] == (0, 0) 8 | assert line_intersect((0, 0), (0, 1), (0, 0), (0, 1))[2] == 0 9 | assert ray_segment_intersect(ray=((0, 0), 0), segment=((1, -1), (1, 1))) == (1, 0) 10 | assert ray_segment_intersect(ray=((0, 0), math.pi), segment=((1, -1), (1, 1))) is None 11 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/regression_tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/regression_tests/test_issue_3.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from nose2.tools import such 5 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 6 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 7 | from rllab.algos.trpo import TRPO 8 | from rllab.baselines.zero_baseline import ZeroBaseline 9 | 10 | with such.A("Issue #3") as it: 11 | @it.should("be fixed") 12 | def test_issue_3(): 13 | """ 14 | As reported in https://github.com/rllab/rllab/issues/3, the adaptive_std parameter was not functioning properly 15 | """ 16 | env = CartpoleEnv() 17 | policy = GaussianMLPPolicy( 18 | env_spec=env, 19 | adaptive_std=True 20 | ) 21 | baseline = ZeroBaseline(env_spec=env.spec) 22 | algo = TRPO( 23 | env=env, 24 | policy=policy, 25 | baseline=baseline, 26 | batch_size=100, 27 | n_itr=1 28 | ) 29 | algo.train() 30 | 31 | it.createTests(globals()) 32 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_baselines.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ['THEANO_FLAGS'] = 'mode=FAST_COMPILE,optimizer=None' 4 | 5 | from rllab.algos.vpg import VPG 6 | from rllab.envs.box2d.cartpole_env import CartpoleEnv 7 | from rllab.baselines.zero_baseline import ZeroBaseline 8 | from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline 9 | from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline 10 | from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy 11 | from nose2 import tools 12 | 13 | 14 | baselines = [ZeroBaseline, LinearFeatureBaseline, GaussianMLPBaseline] 15 | 16 | 17 | @tools.params(*baselines) 18 | def test_baseline(baseline_cls): 19 | env = CartpoleEnv() 20 | policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(6,)) 21 | baseline = baseline_cls(env_spec=env.spec) 22 | algo = VPG( 23 | env=env, policy=policy, baseline=baseline, 24 | n_itr=1, batch_size=1000, max_path_length=100 25 | ) 26 | algo.train() 27 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_networks.py: -------------------------------------------------------------------------------- 1 | def test_gru_network(): 2 | from rllab.core.network import GRUNetwork 3 | import lasagne.layers as L 4 | from rllab.misc import ext 5 | import numpy as np 6 | network = GRUNetwork( 7 | input_shape=(2, 3), 8 | output_dim=5, 9 | hidden_dim=4, 10 | ) 11 | f_output = ext.compile_function( 12 | inputs=[network.input_layer.input_var], 13 | outputs=L.get_output(network.output_layer) 14 | ) 15 | assert f_output(np.zeros((6, 8, 2, 3))).shape == (6, 8, 5) 16 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import numpy as np 4 | 5 | 6 | def test_truncate_paths(): 7 | from rllab.sampler.parallel_sampler import truncate_paths 8 | 9 | paths = [ 10 | dict( 11 | observations=np.zeros((100, 1)), 12 | actions=np.zeros((100, 1)), 13 | rewards=np.zeros(100), 14 | env_infos=dict(), 15 | agent_infos=dict(lala=np.zeros(100)), 16 | ), 17 | dict( 18 | observations=np.zeros((50, 1)), 19 | actions=np.zeros((50, 1)), 20 | rewards=np.zeros(50), 21 | env_infos=dict(), 22 | agent_infos=dict(lala=np.zeros(50)), 23 | ), 24 | ] 25 | 26 | truncated = truncate_paths(paths, 130) 27 | assert len(truncated) == 2 28 | assert len(truncated[-1]["observations"]) == 30 29 | assert len(truncated[0]["observations"]) == 100 30 | # make sure not to change the original one 31 | assert len(paths) == 2 32 | assert len(paths[-1]["observations"]) == 50 33 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_serializable.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from rllab.core.serializable import Serializable 4 | from sandbox.rocky.tf.core.parameterized import Parameterized, suppress_params_loading 5 | 6 | 7 | class Simple(Parameterized, Serializable): 8 | def __init__(self, name): 9 | Serializable.quick_init(self, locals()) 10 | with tf.variable_scope(name): 11 | self.w = tf.get_variable("w", [10, 10]) 12 | 13 | def get_params_internal(self, **tags): 14 | return [self.w] 15 | 16 | 17 | def test_serializable(): 18 | with suppress_params_loading(): 19 | obj = Simple(name="obj") 20 | obj1 = Serializable.clone(obj, name="obj1") 21 | assert obj.w.name.startswith('obj/') 22 | assert obj1.w.name.startswith('obj1/') 23 | 24 | 25 | if __name__ == "__main__": 26 | test_serializable() 27 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_spaces.py: -------------------------------------------------------------------------------- 1 | 2 | from rllab.spaces import Product, Discrete, Box 3 | import numpy as np 4 | 5 | 6 | def test_product_space(): 7 | _ = Product([Discrete(3), Discrete(2)]) 8 | product_space = Product(Discrete(3), Discrete(2)) 9 | sample = product_space.sample() 10 | assert product_space.contains(sample) 11 | 12 | 13 | def test_product_space_unflatten_n(): 14 | space = Product([Discrete(3), Discrete(3)]) 15 | np.testing.assert_array_equal(space.flatten((2, 2)), space.flatten_n([(2, 2)])[0]) 16 | np.testing.assert_array_equal( 17 | space.unflatten(space.flatten((2, 2))), 18 | space.unflatten_n(space.flatten_n([(2, 2)]))[0] 19 | ) 20 | 21 | 22 | def test_box(): 23 | space = Box(low=-1, high=1, shape=(2, 2)) 24 | np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4]) 25 | np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]]) 26 | np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]]) 27 | np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]]) 28 | -------------------------------------------------------------------------------- /src/rllab-adv/tests/test_stateful_pool.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | def _worker_collect_once(_): 6 | return 'a', 1 7 | 8 | 9 | def test_stateful_pool(): 10 | from rllab.sampler import stateful_pool 11 | stateful_pool.singleton_pool.initialize(n_parallel=3) 12 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False) 13 | assert tuple(results) == ('a', 'a', 'a') 14 | 15 | 16 | def test_stateful_pool_over_capacity(): 17 | from rllab.sampler import stateful_pool 18 | stateful_pool.singleton_pool.initialize(n_parallel=4) 19 | results = stateful_pool.singleton_pool.run_collect(_worker_collect_once, 3, show_prog_bar=False) 20 | assert len(results) >= 3 21 | -------------------------------------------------------------------------------- /src/rllab-adv/vendor/mujoco_models/green_ball.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/rllab-adv/vendor/mujoco_models/red_ball.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /temp.py: -------------------------------------------------------------------------------- 1 | from pprint import pprint 2 | from gym import envs 3 | #xs = envs.registry.all() 4 | #for x in xs: 5 | # print(x) 6 | 7 | #import logging 8 | #logger = logging.getLogger(__name__) 9 | #logger.setLevel(logging.INFO) 10 | 11 | import gym 12 | #from gym_recording.wrappers import TraceRecordingWrapper 13 | env = gym.make('InvertedPendulumAdv-v1') 14 | #env = gym.make('Phoenix-v0') 15 | 16 | #env = TraceRecordingWrapper(env) 17 | #env.directory = "./" 18 | #print(env.directory) 19 | 20 | for i_episode in range(20): 21 | observation = env.reset() 22 | for t in range(10): 23 | env.render() 24 | print(observation) 25 | action = env.action_space.sample() 26 | observation, reward, done, info = env.step(action) 27 | if done: 28 | print("Episode finished after {} timesteps".format(t+1)) 29 | break 30 | 31 | #print(env.directory) 32 | -------------------------------------------------------------------------------- /temp2.py: -------------------------------------------------------------------------------- 1 | from IPython import display 2 | import gym 3 | import matplotlib.pyplot as plt 4 | %matplotlib inline 5 | 6 | env = gym.make('Breakout-v0') # insert your favorite environment 7 | render = lambda : plt.imshow(env.render(mode='rgb_array')) 8 | env.reset() 9 | render() 10 | -------------------------------------------------------------------------------- /temp_results/env-InvertedPendulumAdv-v1_Exp2_Itr5_BS4000_Adv0.25_stp0.01_lam0.97_816990.p.temp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jerinphilip/robust-adversarial-rl/a31671ab876664d5e62add81bf99740cf3e35dbf/temp_results/env-InvertedPendulumAdv-v1_Exp2_Itr5_BS4000_Adv0.25_stp0.01_lam0.97_816990.p.temp --------------------------------------------------------------------------------