├── .gitignore ├── 1-gym_developing ├── README.md ├── core.py ├── grid_game.py ├── maze_game.py └── suceess.png ├── 2-markov_decision_process ├── __init__.py ├── game.py └── our_life.py ├── 3-dynamic_program ├── grid_game_with_average_policy.py ├── grid_game_with_policy_iterate.py ├── grid_game_with_value_iterate.py ├── maze_game_with_dynamic_program.py ├── policy_iteration_algorithm.png └── value_iteration_algorithm.png ├── 4-monte_carlo ├── monte_carlo_evaluate.py └── monte_carlo_sample.py ├── 5-temporal_difference ├── README.md ├── push_box_game.py ├── push_box_game │ ├── agent.py │ ├── main.py │ └── q_table.pkl ├── q_learning_algortihm.png ├── sarsa_algorithm.png └── sarsa_lambda_algorithm.png ├── 6-value_function_approximate ├── deep_learning_flappy_bird │ ├── .gitignore │ ├── README.md │ ├── assets │ │ ├── audio │ │ │ ├── die.ogg │ │ │ ├── die.wav │ │ │ ├── hit.ogg │ │ │ ├── hit.wav │ │ │ ├── point.ogg │ │ │ ├── point.wav │ │ │ ├── swoosh.ogg │ │ │ ├── swoosh.wav │ │ │ ├── wing.ogg │ │ │ └── wing.wav │ │ └── sprites │ │ │ ├── 0.png │ │ │ ├── 1.png │ │ │ ├── 2.png │ │ │ ├── 3.png │ │ │ ├── 4.png │ │ │ ├── 5.png │ │ │ ├── 6.png │ │ │ ├── 7.png │ │ │ ├── 8.png │ │ │ ├── 9.png │ │ │ ├── background-black.png │ │ │ ├── base.png │ │ │ ├── pipe-green.png │ │ │ ├── redbird-downflap.png │ │ │ ├── redbird-midflap.png │ │ │ └── redbird-upflap.png │ ├── deep_q_network.py │ ├── game │ │ ├── flappy_bird_utils.py │ │ └── wrapped_flappy_bird.py │ ├── images │ │ ├── flappy_bird_demp.gif │ │ ├── network.png │ │ └── preprocess.png │ ├── logs_bird │ │ ├── hidden.txt │ │ └── readout.txt │ └── saved_networks │ │ ├── bird-dqn-2880000 │ │ ├── bird-dqn-2880000.meta │ │ ├── bird-dqn-2890000 │ │ ├── bird-dqn-2890000.meta │ │ ├── bird-dqn-2900000 │ │ ├── bird-dqn-2900000.meta │ │ ├── bird-dqn-2910000 │ │ ├── bird-dqn-2910000.meta │ │ ├── bird-dqn-2920000 │ │ ├── bird-dqn-2920000.meta │ │ ├── checkpoint │ │ └── pretrained_model │ │ └── bird-dqn-policy ├── deep_q_network_algortihm.png └── deep_q_network_template.py └── README.md /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/.gitignore -------------------------------------------------------------------------------- /1-gym_developing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/1-gym_developing/README.md -------------------------------------------------------------------------------- /1-gym_developing/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/1-gym_developing/core.py -------------------------------------------------------------------------------- /1-gym_developing/grid_game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/1-gym_developing/grid_game.py -------------------------------------------------------------------------------- /1-gym_developing/maze_game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/1-gym_developing/maze_game.py -------------------------------------------------------------------------------- /1-gym_developing/suceess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/1-gym_developing/suceess.png -------------------------------------------------------------------------------- /2-markov_decision_process/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/2-markov_decision_process/__init__.py -------------------------------------------------------------------------------- /2-markov_decision_process/game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/2-markov_decision_process/game.py -------------------------------------------------------------------------------- /2-markov_decision_process/our_life.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/2-markov_decision_process/our_life.py -------------------------------------------------------------------------------- /3-dynamic_program/grid_game_with_average_policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/grid_game_with_average_policy.py -------------------------------------------------------------------------------- /3-dynamic_program/grid_game_with_policy_iterate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/grid_game_with_policy_iterate.py -------------------------------------------------------------------------------- /3-dynamic_program/grid_game_with_value_iterate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/grid_game_with_value_iterate.py -------------------------------------------------------------------------------- /3-dynamic_program/maze_game_with_dynamic_program.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/maze_game_with_dynamic_program.py -------------------------------------------------------------------------------- /3-dynamic_program/policy_iteration_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/policy_iteration_algorithm.png -------------------------------------------------------------------------------- /3-dynamic_program/value_iteration_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/3-dynamic_program/value_iteration_algorithm.png -------------------------------------------------------------------------------- /4-monte_carlo/monte_carlo_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/4-monte_carlo/monte_carlo_evaluate.py -------------------------------------------------------------------------------- /4-monte_carlo/monte_carlo_sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/4-monte_carlo/monte_carlo_sample.py -------------------------------------------------------------------------------- /5-temporal_difference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/README.md -------------------------------------------------------------------------------- /5-temporal_difference/push_box_game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/push_box_game.py -------------------------------------------------------------------------------- /5-temporal_difference/push_box_game/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/push_box_game/agent.py -------------------------------------------------------------------------------- /5-temporal_difference/push_box_game/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/push_box_game/main.py -------------------------------------------------------------------------------- /5-temporal_difference/push_box_game/q_table.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/push_box_game/q_table.pkl -------------------------------------------------------------------------------- /5-temporal_difference/q_learning_algortihm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/q_learning_algortihm.png -------------------------------------------------------------------------------- /5-temporal_difference/sarsa_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/sarsa_algorithm.png -------------------------------------------------------------------------------- /5-temporal_difference/sarsa_lambda_algorithm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/5-temporal_difference/sarsa_lambda_algorithm.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/.gitignore: -------------------------------------------------------------------------------- 1 | # ignore all pyc files. 2 | *.pyc 3 | 4 | -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/README.md -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/die.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/die.ogg -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/die.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/die.wav -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/hit.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/hit.ogg -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/hit.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/hit.wav -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/point.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/point.ogg -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/point.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/point.wav -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/swoosh.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/swoosh.ogg -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/swoosh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/swoosh.wav -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/wing.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/wing.ogg -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/audio/wing.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/audio/wing.wav -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/0.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/1.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/2.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/3.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/4.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/5.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/6.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/7.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/8.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/9.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/background-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/background-black.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/base.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/pipe-green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/pipe-green.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-downflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-downflap.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-midflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-midflap.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-upflap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/assets/sprites/redbird-upflap.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/deep_q_network.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/deep_q_network.py -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/game/flappy_bird_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/game/flappy_bird_utils.py -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/game/wrapped_flappy_bird.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/game/wrapped_flappy_bird.py -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/images/flappy_bird_demp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/images/flappy_bird_demp.gif -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/images/network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/images/network.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/images/preprocess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/images/preprocess.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/logs_bird/hidden.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/logs_bird/readout.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2880000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2880000 -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2880000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2880000.meta -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2890000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2890000 -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2890000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2890000.meta -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2900000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2900000 -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2900000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2900000.meta -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2910000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2910000 -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2910000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2910000.meta -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2920000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2920000 -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2920000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/bird-dqn-2920000.meta -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/checkpoint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/checkpoint -------------------------------------------------------------------------------- /6-value_function_approximate/deep_learning_flappy_bird/saved_networks/pretrained_model/bird-dqn-policy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_learning_flappy_bird/saved_networks/pretrained_model/bird-dqn-policy -------------------------------------------------------------------------------- /6-value_function_approximate/deep_q_network_algortihm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_q_network_algortihm.png -------------------------------------------------------------------------------- /6-value_function_approximate/deep_q_network_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/6-value_function_approximate/deep_q_network_template.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuliquan/reinforcement_learning_basic_book/HEAD/README.md --------------------------------------------------------------------------------