├── .gitignore ├── README.md ├── output └── weights │ ├── CartPole │ ├── CartPole-v0.ckpt.data-00000-of-00001 │ ├── CartPole-v0.ckpt.index │ └── CartPole-v0.ckpt.meta │ └── LunarLander │ ├── 1 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 2 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 3 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 4 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 5 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 6 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 7 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ ├── 8 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint │ └── 9 │ ├── LunarLander-v2.ckpt.data-00000-of-00001 │ ├── LunarLander-v2.ckpt.index │ ├── LunarLander-v2.ckpt.meta │ └── checkpoint ├── policy_gradient.py ├── policy_gradient_layers.py ├── run_acrobot.py ├── run_carracing.py ├── run_cartpole.py ├── run_lunarlander.py └── run_mountaincar.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/* 2 | __pycache__/* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/README.md -------------------------------------------------------------------------------- /output/weights/CartPole/CartPole-v0.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/CartPole/CartPole-v0.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/CartPole/CartPole-v0.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/CartPole/CartPole-v0.ckpt.index -------------------------------------------------------------------------------- /output/weights/CartPole/CartPole-v0.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/CartPole/CartPole-v0.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/1/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/1/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/1/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/1/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/1/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/1/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/1/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/1/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/2/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/2/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/2/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/2/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/2/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/2/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/2/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/2/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/3/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/3/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/3/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/3/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/3/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/3/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/3/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/3/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/4/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/4/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/4/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/4/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/4/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/4/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/4/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/4/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/5/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/5/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/5/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/5/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/5/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/5/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/5/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/5/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/6/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/6/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/6/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/6/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/6/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/6/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/6/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/6/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/7/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/7/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/7/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/7/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/7/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/7/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/7/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/7/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/8/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/8/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/8/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/8/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/8/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/8/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/8/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/8/checkpoint -------------------------------------------------------------------------------- /output/weights/LunarLander/9/LunarLander-v2.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/9/LunarLander-v2.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /output/weights/LunarLander/9/LunarLander-v2.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/9/LunarLander-v2.ckpt.index -------------------------------------------------------------------------------- /output/weights/LunarLander/9/LunarLander-v2.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/9/LunarLander-v2.ckpt.meta -------------------------------------------------------------------------------- /output/weights/LunarLander/9/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/output/weights/LunarLander/9/checkpoint -------------------------------------------------------------------------------- /policy_gradient.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/policy_gradient.py -------------------------------------------------------------------------------- /policy_gradient_layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/policy_gradient_layers.py -------------------------------------------------------------------------------- /run_acrobot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/run_acrobot.py -------------------------------------------------------------------------------- /run_carracing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/run_carracing.py -------------------------------------------------------------------------------- /run_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/run_cartpole.py -------------------------------------------------------------------------------- /run_lunarlander.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/run_lunarlander.py -------------------------------------------------------------------------------- /run_mountaincar.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabrielgarza/openai-gym-policy-gradient/HEAD/run_mountaincar.py --------------------------------------------------------------------------------