├── .vscode ├── launch.json └── settings.json ├── README.md ├── data ├── processed │ ├── test.pt │ └── training.pt └── raw │ ├── t10k-images-idx3-ubyte │ ├── t10k-labels-idx1-ubyte │ ├── train-images-idx3-ubyte │ └── train-labels-idx1-ubyte ├── javascript ├── PuckWorldC.html ├── README.md ├── _config.yml ├── agentzoo │ ├── puckagent.json │ └── wateragent.json ├── car_rent.html ├── demo_iteration.html ├── external │ ├── .DS_Store │ ├── bootstrap.min.css │ ├── bootstrap.min.js │ ├── d3.min.js │ ├── highlight.pack.js │ ├── highlight_default.css │ ├── images │ │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ │ ├── ui-bg_flat_10_000000_40x100.png │ │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ │ ├── ui-bg_highlight-soft_75_ffe45c_1x100.png │ │ ├── ui-icons_222222_256x240.png │ │ ├── ui-icons_228ef1_256x240.png │ │ ├── ui-icons_ef8c08_256x240.png │ │ ├── ui-icons_ffd27a_256x240.png │ │ └── ui-icons_ffffff_256x240.png │ ├── jquery-1.11.2.min.js │ ├── jquery-2.1.3.min.js │ ├── jquery-ui.min.css │ ├── jquery-ui.min.js │ ├── jquery.flot.min.js │ ├── marked.js │ ├── mathjax.js │ └── underscore-min.js ├── gridworld.js ├── gridworld_dp.html ├── gridworld_td.html ├── img │ ├── dpsolved.jpeg │ ├── lambda.png │ ├── policyiter.png │ ├── qsa.jpeg │ ├── sarsa.png │ └── traces.png ├── index.html ├── lib │ ├── car_rent.js │ ├── car_rent_old.js │ ├── myRL.js │ ├── rl.js │ └── smallGrid.js ├── loop.svg ├── myPuckWorld.html ├── puckworld.html ├── test.html ├── value_policy_iteration.md ├── waterworld.html └── waterworld.js ├── main-RL-QiangYe.pdf ├── reinforce ├── .project ├── .pydevproject ├── MNIST_data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── __init__.py ├── __pycache__ │ ├── agents.cpython-35.pyc │ ├── approximator.cpython-35.pyc │ ├── core.cpython-35.pyc │ ├── dqn.cpython-35.pyc │ ├── gridworld.cpython-35.pyc │ └── puckworld.cpython-35.pyc ├── agents.py ├── approx_q_agent.py ├── approximator.py ├── codes_for_book │ ├── c01 │ │ ├── Practice01_Basic_model.ipynb │ │ ├── basic_concept.py │ │ └── introduction.py │ ├── c02 │ │ ├── Practice02_Markov_Decision_Process.ipynb │ │ ├── __pycache__ │ │ │ └── utils.cpython-35.pyc │ │ └── utils.py │ ├── c03 │ │ ├── Practice03_Dynamic_Programming.ipynb │ │ ├── Practice03_Jack_Car_Rental.ipynb │ │ ├── __pycache__ │ │ │ └── utils.cpython-35.pyc │ │ └── utils.py │ ├── c04 │ │ ├── Practice04_MC_control_BlackJack.ipynb │ │ ├── Practice04_NEW_MC_control_BlackJack.ipynb │ │ ├── __pycache__ │ │ │ └── utils.cpython-35.pyc │ │ ├── graph_for_lambda_weight.ipynb │ │ └── utils.py │ ├── c05 │ │ ├── 01_blackjack │ │ │ ├── MC_control_BlackJack.ipynb │ │ │ ├── Practice04_NEW_MC_control_BlackJack.ipynb │ │ │ ├── __pycache__ │ │ │ │ ├── blackjack.cpython-35.pyc │ │ │ │ └── utils.cpython-35.pyc │ │ │ ├── blackjack.py │ │ │ └── utils.py │ │ ├── 02_windy_grid │ │ │ ├── Practice05_Cliff_Walk.ipynb │ │ │ ├── Practice05_Q_learning.ipynb │ │ │ ├── Practice05_sarsa_agent.ipynb │ │ │ ├── Practice05_sarsa_lambda_agent.ipynb │ │ │ ├── __pycache__ │ │ │ │ ├── core.cpython-35.pyc │ │ │ │ ├── gridworld.cpython-35.pyc │ │ │ │ └── utils.cpython-35.pyc │ │ │ ├── agents.py │ │ │ ├── approximator.py │ │ │ ├── core.py │ │ │ ├── gridworld.py │ │ │ ├── test_grid_world_env.ipynb │ │ │ └── utils.py │ │ └── __pycache__ │ │ │ ├── agents.cpython-35.pyc │ │ │ ├── approximator.cpython-35.pyc │ │ │ ├── core.cpython-35.pyc │ │ │ ├── gridworld.cpython-35.pyc │ │ │ └── utils.cpython-35.pyc │ ├── c06 │ │ ├── __pycache__ │ │ │ ├── agents.cpython-35.pyc │ │ │ ├── approximator.cpython-35.pyc │ │ │ ├── core.cpython-35.pyc │ │ │ ├── gridworld.cpython-35.pyc │ │ │ ├── mountain_car.cpython-35.pyc │ │ │ ├── puckworld.cpython-35.pyc │ │ │ └── utils.cpython-35.pyc │ │ ├── agents.py │ │ ├── approximator.py │ │ ├── autograd_tutorial.ipynb │ │ ├── core.py │ │ ├── gradient_intro.ipynb │ │ ├── gridworld.py │ │ ├── linear_sasa_lambda.ipynb │ │ ├── models │ │ │ └── model1.pt │ │ ├── mountain_car.py │ │ ├── neural_networks_tutorial.ipynb │ │ ├── puckworld.py │ │ ├── reinforcement_q_learning.ipynb │ │ ├── tensor_tutorial.ipynb │ │ ├── test_dqn.ipynb │ │ └── utils.py │ ├── c07 │ │ ├── Models │ │ │ ├── 100_actor.pt │ │ │ ├── 100_critic.pt │ │ │ ├── 200_actor.pt │ │ │ ├── 200_critic.pt │ │ │ ├── 3000_actor.pt │ │ │ └── 3000_critic.pt │ │ ├── __pycache__ │ │ │ ├── approximator.cpython-35.pyc │ │ │ ├── core.cpython-35.pyc │ │ │ ├── ddpg_agent.cpython-35.pyc │ │ │ ├── puckworld.cpython-35.pyc │ │ │ ├── puckworld_con_enemy.cpython-35.pyc │ │ │ ├── puckworld_continuous.cpython-35.pyc │ │ │ └── utils.cpython-35.pyc │ │ ├── approximator.py │ │ ├── core.py │ │ ├── ddpg_agent.py │ │ ├── puckworld_con_enemy.py │ │ ├── puckworld_continuous.py │ │ ├── softmax_policy.ipynb │ │ ├── test_ddpg.ipynb │ │ ├── test_ou_noise.ipynb │ │ ├── test_puckword_con_pentagon.ipynb │ │ └── utils.py │ ├── c09 │ │ ├── Practice09_Exploration_and_Expoitation.ipynb │ │ └── total_regret.ipynb │ ├── c10 │ │ └── Practice10_Monte_Carlo_Tree_Search.ipynb │ └── readme.md ├── core.py ├── examples │ ├── SarsaAgent.py │ ├── SarsaLambdaAgent.py │ ├── SimplifiedSarsaAgent.py │ ├── SimplifiedSarsaLambdaAgent.py │ ├── self_grid_env.py │ ├── small_grid_world.py │ └── windy_grid_world.py ├── gridworld.py ├── gym-results │ ├── openaigym.episode_batch.0.7202.stats.json │ ├── openaigym.manifest.0.7202.manifest.json │ ├── openaigym.video.0.7202.video000000.meta.json │ ├── openaigym.video.0.7202.video000000.mp4 │ ├── openaigym.video.0.7202.video000001.meta.json │ ├── openaigym.video.0.7202.video000001.mp4 │ ├── openaigym.video.0.7202.video000008.meta.json │ ├── openaigym.video.0.7202.video000008.mp4 │ ├── openaigym.video.0.7202.video000027.meta.json │ ├── openaigym.video.0.7202.video000027.mp4 │ ├── openaigym.video.0.7202.video000064.meta.json │ ├── openaigym.video.0.7202.video000064.mp4 │ ├── openaigym.video.0.7202.video000125.meta.json │ ├── openaigym.video.0.7202.video000125.mp4 │ ├── openaigym.video.0.7202.video000216.meta.json │ ├── openaigym.video.0.7202.video000216.mp4 │ ├── openaigym.video.0.7202.video000343.meta.json │ └── openaigym.video.0.7202.video000343.mp4 ├── playground.py ├── puckworld.py ├── readme.md └── references │ ├── DDPG_example.py │ ├── MountainCarEnv.py │ ├── PDAgent.py │ ├── ddpg.py │ ├── dqn.py │ ├── dqn_example1.py │ ├── keras_playground.py │ ├── puckworld_agent.py │ └── rendering.py ├── setup.py └── videos ├── CartPole.mp4 ├── PuckWorld.mp4 └── puckworld2.mp4 /.vscode/launch.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/.vscode/launch.json -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/.vscode/settings.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/README.md -------------------------------------------------------------------------------- /data/processed/test.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/processed/test.pt -------------------------------------------------------------------------------- /data/processed/training.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/processed/training.pt -------------------------------------------------------------------------------- /data/raw/t10k-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/raw/t10k-images-idx3-ubyte -------------------------------------------------------------------------------- /data/raw/t10k-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/raw/t10k-labels-idx1-ubyte -------------------------------------------------------------------------------- /data/raw/train-images-idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/raw/train-images-idx3-ubyte -------------------------------------------------------------------------------- /data/raw/train-labels-idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/data/raw/train-labels-idx1-ubyte -------------------------------------------------------------------------------- /javascript/PuckWorldC.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/PuckWorldC.html -------------------------------------------------------------------------------- /javascript/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/README.md -------------------------------------------------------------------------------- /javascript/_config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/_config.yml -------------------------------------------------------------------------------- /javascript/agentzoo/puckagent.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/agentzoo/puckagent.json -------------------------------------------------------------------------------- /javascript/agentzoo/wateragent.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/agentzoo/wateragent.json -------------------------------------------------------------------------------- /javascript/car_rent.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/car_rent.html -------------------------------------------------------------------------------- /javascript/demo_iteration.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/demo_iteration.html -------------------------------------------------------------------------------- /javascript/external/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/.DS_Store -------------------------------------------------------------------------------- /javascript/external/bootstrap.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/bootstrap.min.css -------------------------------------------------------------------------------- /javascript/external/bootstrap.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/bootstrap.min.js -------------------------------------------------------------------------------- /javascript/external/d3.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/d3.min.js -------------------------------------------------------------------------------- /javascript/external/highlight.pack.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/highlight.pack.js -------------------------------------------------------------------------------- /javascript/external/highlight_default.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/highlight_default.css -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_diagonals-thick_18_b81900_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_diagonals-thick_18_b81900_40x40.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_diagonals-thick_20_666666_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_diagonals-thick_20_666666_40x40.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_flat_10_000000_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_flat_10_000000_40x100.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_glass_100_f6f6f6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_glass_100_f6f6f6_1x400.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_glass_100_fdf5ce_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_glass_100_fdf5ce_1x400.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_gloss-wave_35_f6a828_500x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_gloss-wave_35_f6a828_500x100.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_highlight-soft_100_eeeeee_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_highlight-soft_100_eeeeee_1x100.png -------------------------------------------------------------------------------- /javascript/external/images/ui-bg_highlight-soft_75_ffe45c_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-bg_highlight-soft_75_ffe45c_1x100.png -------------------------------------------------------------------------------- /javascript/external/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /javascript/external/images/ui-icons_228ef1_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-icons_228ef1_256x240.png -------------------------------------------------------------------------------- /javascript/external/images/ui-icons_ef8c08_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-icons_ef8c08_256x240.png -------------------------------------------------------------------------------- /javascript/external/images/ui-icons_ffd27a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-icons_ffd27a_256x240.png -------------------------------------------------------------------------------- /javascript/external/images/ui-icons_ffffff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/images/ui-icons_ffffff_256x240.png -------------------------------------------------------------------------------- /javascript/external/jquery-1.11.2.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/jquery-1.11.2.min.js -------------------------------------------------------------------------------- /javascript/external/jquery-2.1.3.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/jquery-2.1.3.min.js -------------------------------------------------------------------------------- /javascript/external/jquery-ui.min.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/jquery-ui.min.css -------------------------------------------------------------------------------- /javascript/external/jquery-ui.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/jquery-ui.min.js -------------------------------------------------------------------------------- /javascript/external/jquery.flot.min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/jquery.flot.min.js -------------------------------------------------------------------------------- /javascript/external/marked.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/marked.js -------------------------------------------------------------------------------- /javascript/external/mathjax.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/mathjax.js -------------------------------------------------------------------------------- /javascript/external/underscore-min.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/external/underscore-min.js -------------------------------------------------------------------------------- /javascript/gridworld.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/gridworld.js -------------------------------------------------------------------------------- /javascript/gridworld_dp.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/gridworld_dp.html -------------------------------------------------------------------------------- /javascript/gridworld_td.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/gridworld_td.html -------------------------------------------------------------------------------- /javascript/img/dpsolved.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/dpsolved.jpeg -------------------------------------------------------------------------------- /javascript/img/lambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/lambda.png -------------------------------------------------------------------------------- /javascript/img/policyiter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/policyiter.png -------------------------------------------------------------------------------- /javascript/img/qsa.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/qsa.jpeg -------------------------------------------------------------------------------- /javascript/img/sarsa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/sarsa.png -------------------------------------------------------------------------------- /javascript/img/traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/img/traces.png -------------------------------------------------------------------------------- /javascript/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/index.html -------------------------------------------------------------------------------- /javascript/lib/car_rent.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/lib/car_rent.js -------------------------------------------------------------------------------- /javascript/lib/car_rent_old.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/lib/car_rent_old.js -------------------------------------------------------------------------------- /javascript/lib/myRL.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/lib/myRL.js -------------------------------------------------------------------------------- /javascript/lib/rl.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/lib/rl.js -------------------------------------------------------------------------------- /javascript/lib/smallGrid.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/lib/smallGrid.js -------------------------------------------------------------------------------- /javascript/loop.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/loop.svg -------------------------------------------------------------------------------- /javascript/myPuckWorld.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/myPuckWorld.html -------------------------------------------------------------------------------- /javascript/puckworld.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/puckworld.html -------------------------------------------------------------------------------- /javascript/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/test.html -------------------------------------------------------------------------------- /javascript/value_policy_iteration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/value_policy_iteration.md -------------------------------------------------------------------------------- /javascript/waterworld.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/waterworld.html -------------------------------------------------------------------------------- /javascript/waterworld.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/javascript/waterworld.js -------------------------------------------------------------------------------- /main-RL-QiangYe.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/main-RL-QiangYe.pdf -------------------------------------------------------------------------------- /reinforce/.project: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/.project -------------------------------------------------------------------------------- /reinforce/.pydevproject: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/.pydevproject -------------------------------------------------------------------------------- /reinforce/MNIST_data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/MNIST_data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /reinforce/MNIST_data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/MNIST_data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /reinforce/MNIST_data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/MNIST_data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /reinforce/MNIST_data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/MNIST_data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /reinforce/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__init__.py -------------------------------------------------------------------------------- /reinforce/__pycache__/agents.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/agents.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/__pycache__/approximator.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/approximator.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/__pycache__/dqn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/dqn.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/__pycache__/gridworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/gridworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/__pycache__/puckworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/__pycache__/puckworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/agents.py -------------------------------------------------------------------------------- /reinforce/approx_q_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/approx_q_agent.py -------------------------------------------------------------------------------- /reinforce/approximator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/approximator.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c01/Practice01_Basic_model.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c01/Practice01_Basic_model.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c01/basic_concept.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c01/basic_concept.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c01/introduction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c01/introduction.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c02/Practice02_Markov_Decision_Process.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c02/Practice02_Markov_Decision_Process.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c02/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c02/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c02/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c02/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c03/Practice03_Dynamic_Programming.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c03/Practice03_Dynamic_Programming.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c03/Practice03_Jack_Car_Rental.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c03/Practice03_Jack_Car_Rental.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c03/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c03/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c03/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c03/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c04/Practice04_MC_control_BlackJack.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c04/Practice04_MC_control_BlackJack.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c04/Practice04_NEW_MC_control_BlackJack.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c04/Practice04_NEW_MC_control_BlackJack.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c04/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c04/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c04/graph_for_lambda_weight.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c04/graph_for_lambda_weight.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c04/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c04/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/MC_control_BlackJack.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/MC_control_BlackJack.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/Practice04_NEW_MC_control_BlackJack.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/Practice04_NEW_MC_control_BlackJack.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/__pycache__/blackjack.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/__pycache__/blackjack.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/blackjack.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/blackjack.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/01_blackjack/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/01_blackjack/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/Practice05_Cliff_Walk.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/Practice05_Cliff_Walk.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/Practice05_Q_learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/Practice05_Q_learning.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/Practice05_sarsa_agent.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/Practice05_sarsa_agent.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/Practice05_sarsa_lambda_agent.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/Practice05_sarsa_lambda_agent.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/__pycache__/gridworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/__pycache__/gridworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/agents.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/approximator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/approximator.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/core.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/gridworld.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/test_grid_world_env.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/test_grid_world_env.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/02_windy_grid/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/02_windy_grid/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/__pycache__/agents.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/__pycache__/agents.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/__pycache__/approximator.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/__pycache__/approximator.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/__pycache__/gridworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/__pycache__/gridworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c05/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c05/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/agents.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/agents.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/approximator.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/approximator.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/gridworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/gridworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/mountain_car.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/mountain_car.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/puckworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/puckworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/agents.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/approximator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/approximator.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/autograd_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/autograd_tutorial.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/core.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/gradient_intro.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/gradient_intro.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/gridworld.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/linear_sasa_lambda.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/linear_sasa_lambda.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/models/model1.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/models/model1.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/mountain_car.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/mountain_car.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/neural_networks_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/neural_networks_tutorial.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/puckworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/puckworld.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/reinforcement_q_learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/reinforcement_q_learning.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/tensor_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/tensor_tutorial.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/test_dqn.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/test_dqn.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c06/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c06/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/100_actor.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/100_actor.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/100_critic.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/100_critic.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/200_actor.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/200_actor.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/200_critic.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/200_critic.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/3000_actor.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/3000_actor.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/Models/3000_critic.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/Models/3000_critic.pt -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/approximator.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/approximator.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/core.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/core.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/ddpg_agent.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/ddpg_agent.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/puckworld.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/puckworld.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/puckworld_con_enemy.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/puckworld_con_enemy.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/puckworld_continuous.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/puckworld_continuous.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/approximator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/approximator.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/core.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/ddpg_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/ddpg_agent.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/puckworld_con_enemy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/puckworld_con_enemy.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/puckworld_continuous.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/puckworld_continuous.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/softmax_policy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/softmax_policy.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/test_ddpg.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/test_ddpg.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/test_ou_noise.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/test_ou_noise.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/test_puckword_con_pentagon.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/test_puckword_con_pentagon.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c07/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c07/utils.py -------------------------------------------------------------------------------- /reinforce/codes_for_book/c09/Practice09_Exploration_and_Expoitation.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c09/Practice09_Exploration_and_Expoitation.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c09/total_regret.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c09/total_regret.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/c10/Practice10_Monte_Carlo_Tree_Search.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/codes_for_book/c10/Practice10_Monte_Carlo_Tree_Search.ipynb -------------------------------------------------------------------------------- /reinforce/codes_for_book/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /reinforce/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/core.py -------------------------------------------------------------------------------- /reinforce/examples/SarsaAgent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/SarsaAgent.py -------------------------------------------------------------------------------- /reinforce/examples/SarsaLambdaAgent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/SarsaLambdaAgent.py -------------------------------------------------------------------------------- /reinforce/examples/SimplifiedSarsaAgent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/SimplifiedSarsaAgent.py -------------------------------------------------------------------------------- /reinforce/examples/SimplifiedSarsaLambdaAgent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/SimplifiedSarsaLambdaAgent.py -------------------------------------------------------------------------------- /reinforce/examples/self_grid_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/self_grid_env.py -------------------------------------------------------------------------------- /reinforce/examples/small_grid_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/small_grid_world.py -------------------------------------------------------------------------------- /reinforce/examples/windy_grid_world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/examples/windy_grid_world.py -------------------------------------------------------------------------------- /reinforce/gridworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gridworld.py -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.episode_batch.0.7202.stats.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.episode_batch.0.7202.stats.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.manifest.0.7202.manifest.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.manifest.0.7202.manifest.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000000.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000000.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000000.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000001.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000001.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000001.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000001.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000008.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000008.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000008.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000008.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000027.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000027.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000027.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000027.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000064.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000064.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000064.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000125.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000125.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000125.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000125.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000216.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000216.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000216.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000216.mp4 -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000343.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000343.meta.json -------------------------------------------------------------------------------- /reinforce/gym-results/openaigym.video.0.7202.video000343.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/gym-results/openaigym.video.0.7202.video000343.mp4 -------------------------------------------------------------------------------- /reinforce/playground.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/playground.py -------------------------------------------------------------------------------- /reinforce/puckworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/puckworld.py -------------------------------------------------------------------------------- /reinforce/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/readme.md -------------------------------------------------------------------------------- /reinforce/references/DDPG_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/DDPG_example.py -------------------------------------------------------------------------------- /reinforce/references/MountainCarEnv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/MountainCarEnv.py -------------------------------------------------------------------------------- /reinforce/references/PDAgent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/PDAgent.py -------------------------------------------------------------------------------- /reinforce/references/ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/ddpg.py -------------------------------------------------------------------------------- /reinforce/references/dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/dqn.py -------------------------------------------------------------------------------- /reinforce/references/dqn_example1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/dqn_example1.py -------------------------------------------------------------------------------- /reinforce/references/keras_playground.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/keras_playground.py -------------------------------------------------------------------------------- /reinforce/references/puckworld_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/puckworld_agent.py -------------------------------------------------------------------------------- /reinforce/references/rendering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/reinforce/references/rendering.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/setup.py -------------------------------------------------------------------------------- /videos/CartPole.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/videos/CartPole.mp4 -------------------------------------------------------------------------------- /videos/PuckWorld.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/videos/PuckWorld.mp4 -------------------------------------------------------------------------------- /videos/puckworld2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qqiang00/Reinforce/HEAD/videos/puckworld2.mp4 --------------------------------------------------------------------------------