├── .gitignore ├── LICENSE ├── README.md ├── img ├── blackjack_scores.PNG ├── ddpg.png ├── dqn.png ├── dsbkk_soai.png ├── ed_thorp.PNG ├── foundational_flaw.PNG ├── mc_control_glie.PNG ├── mc_control_glie_constant.PNG ├── mc_predict_q.PNG ├── mc_predict_v.PNG ├── mdp_weather.png ├── monte_carlo.jpg ├── neural_nets.png ├── optimizers.gif ├── policygradient.png ├── pong_pg.png ├── pong_progress.png ├── qlearning.png ├── reinforce_derivation1.PNG ├── reinforce_derivation2.PNG ├── rl_framework.gif ├── rl_framework.png ├── taxi_scores.PNG └── xkcd_ml.png ├── model_compile.ipynb ├── models ├── bj_optimal_q.pkl ├── ddpg.pth ├── dqn.pth ├── pg.pth └── taxi_optimal_q.pkl ├── session0.ipynb ├── session1.ipynb ├── session2.ipynb ├── session2b.ipynb ├── session3.ipynb ├── session3b.ipynb ├── session3b_solution.ipynb ├── session4.ipynb ├── session5-tf.ipynb ├── session5.ipynb ├── session6.ipynb ├── solutions ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── agents.cpython-36.pyc │ └── environments.cpython-36.pyc ├── agents.py ├── environments.py ├── memory.py ├── networks.py └── utils.py └── tf2_tutorial.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/README.md -------------------------------------------------------------------------------- /img/blackjack_scores.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/blackjack_scores.PNG -------------------------------------------------------------------------------- /img/ddpg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/ddpg.png -------------------------------------------------------------------------------- /img/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/dqn.png -------------------------------------------------------------------------------- /img/dsbkk_soai.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/dsbkk_soai.png -------------------------------------------------------------------------------- /img/ed_thorp.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/ed_thorp.PNG -------------------------------------------------------------------------------- /img/foundational_flaw.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/foundational_flaw.PNG -------------------------------------------------------------------------------- /img/mc_control_glie.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/mc_control_glie.PNG -------------------------------------------------------------------------------- /img/mc_control_glie_constant.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/mc_control_glie_constant.PNG -------------------------------------------------------------------------------- /img/mc_predict_q.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/mc_predict_q.PNG -------------------------------------------------------------------------------- /img/mc_predict_v.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/mc_predict_v.PNG -------------------------------------------------------------------------------- /img/mdp_weather.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/mdp_weather.png -------------------------------------------------------------------------------- /img/monte_carlo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/monte_carlo.jpg -------------------------------------------------------------------------------- /img/neural_nets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/neural_nets.png -------------------------------------------------------------------------------- /img/optimizers.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/optimizers.gif -------------------------------------------------------------------------------- /img/policygradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/policygradient.png -------------------------------------------------------------------------------- /img/pong_pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/pong_pg.png -------------------------------------------------------------------------------- /img/pong_progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/pong_progress.png -------------------------------------------------------------------------------- /img/qlearning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/qlearning.png -------------------------------------------------------------------------------- /img/reinforce_derivation1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/reinforce_derivation1.PNG -------------------------------------------------------------------------------- /img/reinforce_derivation2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/reinforce_derivation2.PNG -------------------------------------------------------------------------------- /img/rl_framework.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/rl_framework.gif -------------------------------------------------------------------------------- /img/rl_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/rl_framework.png -------------------------------------------------------------------------------- /img/taxi_scores.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/taxi_scores.PNG -------------------------------------------------------------------------------- /img/xkcd_ml.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/img/xkcd_ml.png -------------------------------------------------------------------------------- /model_compile.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/model_compile.ipynb -------------------------------------------------------------------------------- /models/bj_optimal_q.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/models/bj_optimal_q.pkl -------------------------------------------------------------------------------- /models/ddpg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/models/ddpg.pth -------------------------------------------------------------------------------- /models/dqn.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/models/dqn.pth -------------------------------------------------------------------------------- /models/pg.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/models/pg.pth -------------------------------------------------------------------------------- /models/taxi_optimal_q.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/models/taxi_optimal_q.pkl -------------------------------------------------------------------------------- /session0.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session0.ipynb -------------------------------------------------------------------------------- /session1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session1.ipynb -------------------------------------------------------------------------------- /session2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session2.ipynb -------------------------------------------------------------------------------- /session2b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session2b.ipynb -------------------------------------------------------------------------------- /session3.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session3.ipynb -------------------------------------------------------------------------------- /session3b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session3b.ipynb -------------------------------------------------------------------------------- /session3b_solution.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session3b_solution.ipynb -------------------------------------------------------------------------------- /session4.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session4.ipynb -------------------------------------------------------------------------------- /session5-tf.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session5-tf.ipynb -------------------------------------------------------------------------------- /session5.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session5.ipynb -------------------------------------------------------------------------------- /session6.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/session6.ipynb -------------------------------------------------------------------------------- /solutions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /solutions/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /solutions/__pycache__/agents.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/__pycache__/agents.cpython-36.pyc -------------------------------------------------------------------------------- /solutions/__pycache__/environments.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/__pycache__/environments.cpython-36.pyc -------------------------------------------------------------------------------- /solutions/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/agents.py -------------------------------------------------------------------------------- /solutions/environments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/environments.py -------------------------------------------------------------------------------- /solutions/memory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/memory.py -------------------------------------------------------------------------------- /solutions/networks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/networks.py -------------------------------------------------------------------------------- /solutions/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/solutions/utils.py -------------------------------------------------------------------------------- /tf2_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Datatouille/rl-workshop/HEAD/tf2_tutorial.ipynb --------------------------------------------------------------------------------