├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── baselines ├── __init__.py ├── a2c │ ├── README.md │ ├── __init__.py │ ├── a2c.py │ ├── a2c_sil.py │ ├── policies.py │ ├── run_atari.py │ ├── run_atari_sil.py │ └── utils.py ├── acer │ ├── README.md │ ├── __init__.py │ ├── acer_simple.py │ ├── buffer.py │ ├── policies.py │ └── run_atari.py ├── acktr │ ├── README.md │ ├── __init__.py │ ├── acktr_cont.py │ ├── acktr_disc.py │ ├── kfac.py │ ├── kfac_utils.py │ ├── policies.py │ ├── run_atari.py │ ├── run_mujoco.py │ ├── utils.py │ └── value_functions.py ├── bench │ ├── __init__.py │ ├── benchmarks.py │ └── monitor.py ├── common │ ├── __init__.py │ ├── atari_wrappers.py │ ├── cg.py │ ├── cmd_util.py │ ├── console_util.py │ ├── dataset.py │ ├── distributions.py │ ├── filters.py │ ├── identity_env.py │ ├── input.py │ ├── math_util.py │ ├── misc_util.py │ ├── mpi_adam.py │ ├── mpi_fork.py │ ├── mpi_moments.py │ ├── mpi_running_mean_std.py │ ├── runners.py │ ├── running_mean_std.py │ ├── running_stat.py │ ├── schedules.py │ ├── segment_tree.py │ ├── self_imitation.py │ ├── test_identity.py │ ├── tests │ │ ├── test_schedules.py │ │ ├── test_segment_tree.py │ │ └── test_tf_util.py │ ├── tf_util.py │ └── vec_env │ │ ├── __init__.py │ │ ├── dummy_vec_env.py │ │ ├── subproc_vec_env.py │ │ ├── vec_frame_stack.py │ │ └── vec_normalize.py ├── ddpg │ ├── README.md │ ├── __init__.py │ ├── ddpg.py │ ├── main.py │ ├── memory.py │ ├── models.py │ ├── noise.py │ └── training.py ├── deepq │ ├── README.md │ ├── __init__.py │ ├── build_graph.py │ ├── experiments │ │ ├── __init__.py │ │ ├── custom_cartpole.py │ │ ├── enjoy_cartpole.py │ │ ├── enjoy_mountaincar.py │ │ ├── enjoy_pong.py │ │ ├── run_atari.py │ │ ├── train_cartpole.py │ │ └── train_mountaincar.py │ ├── models.py │ ├── replay_buffer.py │ ├── simple.py │ ├── test_identity.py │ └── utils.py ├── gail │ ├── README.md │ ├── __init__.py │ ├── adversary.py │ ├── behavior_clone.py │ ├── dataset │ │ ├── __init__.py │ │ └── mujoco_dset.py │ ├── gail-eval.py │ ├── mlp_policy.py │ ├── result │ │ ├── HalfCheetah-normalized-deterministic-scores.png │ │ ├── HalfCheetah-normalized-stochastic-scores.png │ │ ├── HalfCheetah-unnormalized-deterministic-scores.png │ │ ├── HalfCheetah-unnormalized-stochastic-scores.png │ │ ├── Hopper-normalized-deterministic-scores.png │ │ ├── Hopper-normalized-stochastic-scores.png │ │ ├── Hopper-unnormalized-deterministic-scores.png │ │ ├── Hopper-unnormalized-stochastic-scores.png │ │ ├── Humanoid-normalized-deterministic-scores.png │ │ ├── Humanoid-normalized-stochastic-scores.png │ │ ├── Humanoid-unnormalized-deterministic-scores.png │ │ ├── Humanoid-unnormalized-stochastic-scores.png │ │ ├── HumanoidStandup-normalized-deterministic-scores.png │ │ ├── HumanoidStandup-normalized-stochastic-scores.png │ │ ├── HumanoidStandup-unnormalized-deterministic-scores.png │ │ ├── HumanoidStandup-unnormalized-stochastic-scores.png │ │ ├── Walker2d-normalized-deterministic-scores.png │ │ ├── Walker2d-normalized-stochastic-scores.png │ │ ├── Walker2d-unnormalized-deterministic-scores.png │ │ ├── Walker2d-unnormalized-stochastic-scores.png │ │ ├── gail-result.md │ │ ├── halfcheetah-training.png │ │ ├── hopper-training.png │ │ ├── humanoid-training.png │ │ ├── humanoidstandup-training.png │ │ └── walker2d-training.png │ ├── run_mujoco.py │ ├── statistics.py │ └── trpo_mpi.py ├── her │ ├── README.md │ ├── __init__.py │ ├── actor_critic.py │ ├── ddpg.py │ ├── experiment │ │ ├── __init__.py │ │ ├── config.py │ │ ├── play.py │ │ ├── plot.py │ │ └── train.py │ ├── her.py │ ├── normalizer.py │ ├── replay_buffer.py │ ├── rollout.py │ └── util.py ├── logger.py ├── ppo1 │ ├── README.md │ ├── __init__.py │ ├── cnn_policy.py │ ├── mlp_policy.py │ ├── pposgd_simple.py │ ├── run_atari.py │ └── run_mujoco.py ├── ppo2 │ ├── README.md │ ├── __init__.py │ ├── policies.py │ ├── ppo2.py │ ├── ppo2_sil.py │ ├── run_atari.py │ ├── run_mujoco.py │ └── run_mujoco_sil.py ├── results_plotter.py └── trpo_mpi │ ├── README.md │ ├── __init__.py │ ├── nosharing_cnn_policy.py │ ├── run_atari.py │ ├── run_mujoco.py │ └── trpo_mpi.py ├── data ├── cartpole.gif └── logo.jpg └── setup.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/.travis.yml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/README.md -------------------------------------------------------------------------------- /baselines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/a2c/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/README.md -------------------------------------------------------------------------------- /baselines/a2c/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/a2c/a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/a2c.py -------------------------------------------------------------------------------- /baselines/a2c/a2c_sil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/a2c_sil.py -------------------------------------------------------------------------------- /baselines/a2c/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/policies.py -------------------------------------------------------------------------------- /baselines/a2c/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/run_atari.py -------------------------------------------------------------------------------- /baselines/a2c/run_atari_sil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/run_atari_sil.py -------------------------------------------------------------------------------- /baselines/a2c/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/a2c/utils.py -------------------------------------------------------------------------------- /baselines/acer/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acer/README.md -------------------------------------------------------------------------------- /baselines/acer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/acer/acer_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acer/acer_simple.py -------------------------------------------------------------------------------- /baselines/acer/buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acer/buffer.py -------------------------------------------------------------------------------- /baselines/acer/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acer/policies.py -------------------------------------------------------------------------------- /baselines/acer/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acer/run_atari.py -------------------------------------------------------------------------------- /baselines/acktr/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/README.md -------------------------------------------------------------------------------- /baselines/acktr/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/acktr/acktr_cont.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/acktr_cont.py -------------------------------------------------------------------------------- /baselines/acktr/acktr_disc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/acktr_disc.py -------------------------------------------------------------------------------- /baselines/acktr/kfac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/kfac.py -------------------------------------------------------------------------------- /baselines/acktr/kfac_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/kfac_utils.py -------------------------------------------------------------------------------- /baselines/acktr/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/policies.py -------------------------------------------------------------------------------- /baselines/acktr/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/run_atari.py -------------------------------------------------------------------------------- /baselines/acktr/run_mujoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/run_mujoco.py -------------------------------------------------------------------------------- /baselines/acktr/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/utils.py -------------------------------------------------------------------------------- /baselines/acktr/value_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/acktr/value_functions.py -------------------------------------------------------------------------------- /baselines/bench/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/bench/__init__.py -------------------------------------------------------------------------------- /baselines/bench/benchmarks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/bench/benchmarks.py -------------------------------------------------------------------------------- /baselines/bench/monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/bench/monitor.py -------------------------------------------------------------------------------- /baselines/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/__init__.py -------------------------------------------------------------------------------- /baselines/common/atari_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/atari_wrappers.py -------------------------------------------------------------------------------- /baselines/common/cg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/cg.py -------------------------------------------------------------------------------- /baselines/common/cmd_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/cmd_util.py -------------------------------------------------------------------------------- /baselines/common/console_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/console_util.py -------------------------------------------------------------------------------- /baselines/common/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/dataset.py -------------------------------------------------------------------------------- /baselines/common/distributions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/distributions.py -------------------------------------------------------------------------------- /baselines/common/filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/filters.py -------------------------------------------------------------------------------- /baselines/common/identity_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/identity_env.py -------------------------------------------------------------------------------- /baselines/common/input.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/input.py -------------------------------------------------------------------------------- /baselines/common/math_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/math_util.py -------------------------------------------------------------------------------- /baselines/common/misc_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/misc_util.py -------------------------------------------------------------------------------- /baselines/common/mpi_adam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/mpi_adam.py -------------------------------------------------------------------------------- /baselines/common/mpi_fork.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/mpi_fork.py -------------------------------------------------------------------------------- /baselines/common/mpi_moments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/mpi_moments.py -------------------------------------------------------------------------------- /baselines/common/mpi_running_mean_std.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/mpi_running_mean_std.py -------------------------------------------------------------------------------- /baselines/common/runners.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/runners.py -------------------------------------------------------------------------------- /baselines/common/running_mean_std.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/running_mean_std.py -------------------------------------------------------------------------------- /baselines/common/running_stat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/running_stat.py -------------------------------------------------------------------------------- /baselines/common/schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/schedules.py -------------------------------------------------------------------------------- /baselines/common/segment_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/segment_tree.py -------------------------------------------------------------------------------- /baselines/common/self_imitation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/self_imitation.py -------------------------------------------------------------------------------- /baselines/common/test_identity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/test_identity.py -------------------------------------------------------------------------------- /baselines/common/tests/test_schedules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/tests/test_schedules.py -------------------------------------------------------------------------------- /baselines/common/tests/test_segment_tree.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/tests/test_segment_tree.py -------------------------------------------------------------------------------- /baselines/common/tests/test_tf_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/tests/test_tf_util.py -------------------------------------------------------------------------------- /baselines/common/tf_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/tf_util.py -------------------------------------------------------------------------------- /baselines/common/vec_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/vec_env/__init__.py -------------------------------------------------------------------------------- /baselines/common/vec_env/dummy_vec_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/vec_env/dummy_vec_env.py -------------------------------------------------------------------------------- /baselines/common/vec_env/subproc_vec_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/vec_env/subproc_vec_env.py -------------------------------------------------------------------------------- /baselines/common/vec_env/vec_frame_stack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/vec_env/vec_frame_stack.py -------------------------------------------------------------------------------- /baselines/common/vec_env/vec_normalize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/common/vec_env/vec_normalize.py -------------------------------------------------------------------------------- /baselines/ddpg/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/README.md -------------------------------------------------------------------------------- /baselines/ddpg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/ddpg/ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/ddpg.py -------------------------------------------------------------------------------- /baselines/ddpg/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/main.py -------------------------------------------------------------------------------- /baselines/ddpg/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/memory.py -------------------------------------------------------------------------------- /baselines/ddpg/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/models.py -------------------------------------------------------------------------------- /baselines/ddpg/noise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/noise.py -------------------------------------------------------------------------------- /baselines/ddpg/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ddpg/training.py -------------------------------------------------------------------------------- /baselines/deepq/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/README.md -------------------------------------------------------------------------------- /baselines/deepq/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/__init__.py -------------------------------------------------------------------------------- /baselines/deepq/build_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/build_graph.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/deepq/experiments/custom_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/custom_cartpole.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/enjoy_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/enjoy_cartpole.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/enjoy_mountaincar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/enjoy_mountaincar.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/enjoy_pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/enjoy_pong.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/run_atari.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/train_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/train_cartpole.py -------------------------------------------------------------------------------- /baselines/deepq/experiments/train_mountaincar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/experiments/train_mountaincar.py -------------------------------------------------------------------------------- /baselines/deepq/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/models.py -------------------------------------------------------------------------------- /baselines/deepq/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/replay_buffer.py -------------------------------------------------------------------------------- /baselines/deepq/simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/simple.py -------------------------------------------------------------------------------- /baselines/deepq/test_identity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/test_identity.py -------------------------------------------------------------------------------- /baselines/deepq/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/deepq/utils.py -------------------------------------------------------------------------------- /baselines/gail/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/README.md -------------------------------------------------------------------------------- /baselines/gail/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/gail/adversary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/adversary.py -------------------------------------------------------------------------------- /baselines/gail/behavior_clone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/behavior_clone.py -------------------------------------------------------------------------------- /baselines/gail/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/gail/dataset/mujoco_dset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/dataset/mujoco_dset.py -------------------------------------------------------------------------------- /baselines/gail/gail-eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/gail-eval.py -------------------------------------------------------------------------------- /baselines/gail/mlp_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/mlp_policy.py -------------------------------------------------------------------------------- /baselines/gail/result/HalfCheetah-normalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HalfCheetah-normalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HalfCheetah-normalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HalfCheetah-normalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HalfCheetah-unnormalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HalfCheetah-unnormalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HalfCheetah-unnormalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HalfCheetah-unnormalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Hopper-normalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Hopper-normalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Hopper-normalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Hopper-normalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Hopper-unnormalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Hopper-unnormalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Hopper-unnormalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Hopper-unnormalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Humanoid-normalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Humanoid-normalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Humanoid-normalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Humanoid-normalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Humanoid-unnormalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Humanoid-unnormalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Humanoid-unnormalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Humanoid-unnormalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HumanoidStandup-normalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HumanoidStandup-normalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HumanoidStandup-normalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HumanoidStandup-normalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HumanoidStandup-unnormalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HumanoidStandup-unnormalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/HumanoidStandup-unnormalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/HumanoidStandup-unnormalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Walker2d-normalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Walker2d-normalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Walker2d-normalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Walker2d-normalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Walker2d-unnormalized-deterministic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Walker2d-unnormalized-deterministic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/Walker2d-unnormalized-stochastic-scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/Walker2d-unnormalized-stochastic-scores.png -------------------------------------------------------------------------------- /baselines/gail/result/gail-result.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/gail-result.md -------------------------------------------------------------------------------- /baselines/gail/result/halfcheetah-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/halfcheetah-training.png -------------------------------------------------------------------------------- /baselines/gail/result/hopper-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/hopper-training.png -------------------------------------------------------------------------------- /baselines/gail/result/humanoid-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/humanoid-training.png -------------------------------------------------------------------------------- /baselines/gail/result/humanoidstandup-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/humanoidstandup-training.png -------------------------------------------------------------------------------- /baselines/gail/result/walker2d-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/result/walker2d-training.png -------------------------------------------------------------------------------- /baselines/gail/run_mujoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/run_mujoco.py -------------------------------------------------------------------------------- /baselines/gail/statistics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/statistics.py -------------------------------------------------------------------------------- /baselines/gail/trpo_mpi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/gail/trpo_mpi.py -------------------------------------------------------------------------------- /baselines/her/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/README.md -------------------------------------------------------------------------------- /baselines/her/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/her/actor_critic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/actor_critic.py -------------------------------------------------------------------------------- /baselines/her/ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/ddpg.py -------------------------------------------------------------------------------- /baselines/her/experiment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/her/experiment/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/experiment/config.py -------------------------------------------------------------------------------- /baselines/her/experiment/play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/experiment/play.py -------------------------------------------------------------------------------- /baselines/her/experiment/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/experiment/plot.py -------------------------------------------------------------------------------- /baselines/her/experiment/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/experiment/train.py -------------------------------------------------------------------------------- /baselines/her/her.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/her.py -------------------------------------------------------------------------------- /baselines/her/normalizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/normalizer.py -------------------------------------------------------------------------------- /baselines/her/replay_buffer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/replay_buffer.py -------------------------------------------------------------------------------- /baselines/her/rollout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/rollout.py -------------------------------------------------------------------------------- /baselines/her/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/her/util.py -------------------------------------------------------------------------------- /baselines/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/logger.py -------------------------------------------------------------------------------- /baselines/ppo1/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/README.md -------------------------------------------------------------------------------- /baselines/ppo1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/ppo1/cnn_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/cnn_policy.py -------------------------------------------------------------------------------- /baselines/ppo1/mlp_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/mlp_policy.py -------------------------------------------------------------------------------- /baselines/ppo1/pposgd_simple.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/pposgd_simple.py -------------------------------------------------------------------------------- /baselines/ppo1/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/run_atari.py -------------------------------------------------------------------------------- /baselines/ppo1/run_mujoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo1/run_mujoco.py -------------------------------------------------------------------------------- /baselines/ppo2/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/README.md -------------------------------------------------------------------------------- /baselines/ppo2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/ppo2/policies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/policies.py -------------------------------------------------------------------------------- /baselines/ppo2/ppo2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/ppo2.py -------------------------------------------------------------------------------- /baselines/ppo2/ppo2_sil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/ppo2_sil.py -------------------------------------------------------------------------------- /baselines/ppo2/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/run_atari.py -------------------------------------------------------------------------------- /baselines/ppo2/run_mujoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/run_mujoco.py -------------------------------------------------------------------------------- /baselines/ppo2/run_mujoco_sil.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/ppo2/run_mujoco_sil.py -------------------------------------------------------------------------------- /baselines/results_plotter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/results_plotter.py -------------------------------------------------------------------------------- /baselines/trpo_mpi/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/trpo_mpi/README.md -------------------------------------------------------------------------------- /baselines/trpo_mpi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /baselines/trpo_mpi/nosharing_cnn_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/trpo_mpi/nosharing_cnn_policy.py -------------------------------------------------------------------------------- /baselines/trpo_mpi/run_atari.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/trpo_mpi/run_atari.py -------------------------------------------------------------------------------- /baselines/trpo_mpi/run_mujoco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/trpo_mpi/run_mujoco.py -------------------------------------------------------------------------------- /baselines/trpo_mpi/trpo_mpi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/baselines/trpo_mpi/trpo_mpi.py -------------------------------------------------------------------------------- /data/cartpole.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/data/cartpole.gif -------------------------------------------------------------------------------- /data/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/data/logo.jpg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junhyukoh/self-imitation-learning/HEAD/setup.py --------------------------------------------------------------------------------