├── .github └── workflows │ ├── deploy-to-gh-pages.yml │ └── release-pdf.yml ├── .gitignore ├── .nojekyll ├── .travis.yml ├── Makefile ├── README.md ├── requirements.txt ├── smallbookcover.gif └── source ├── _static └── css │ └── custom.css ├── _templates └── layout.html ├── chapter1 ├── images │ ├── figure-1.1.png │ └── tic-tac-toe.png └── introduction.rst ├── conf.py ├── index.rst ├── notation.rst ├── partI ├── chapter2 │ ├── images │ │ ├── figure-2.1.png │ │ ├── figure-2.2.png │ │ ├── figure-2.3.png │ │ ├── figure-2.4.png │ │ ├── figure-2.5.png │ │ └── figure-2.6.png │ └── multi_armed_bandits.rst ├── chapter3 │ ├── finite_markov_decision_process.rst │ └── images │ │ ├── backup_diagram_for_v_pi.png │ │ ├── exercise-3.18.png │ │ ├── exercise-3.19.png │ │ ├── exercise-3.22.png │ │ ├── figure-3.1.png │ │ ├── figure-3.2.png │ │ ├── figure-3.3.png │ │ ├── figure-3.4.png │ │ ├── figure-3.5.png │ │ ├── pole_balancing.png │ │ ├── q_pi_backup_diagram.png │ │ ├── state_transition_diagram.png │ │ └── table_figure.png ├── chapter4 │ ├── dynamic_programming.rst │ └── images │ │ ├── figure-4.0.png │ │ ├── figure-4.1.png │ │ ├── figure-4.2.png │ │ ├── figure-4.3.png │ │ ├── generalized_policy_iteration.png │ │ └── two_lines.png ├── chapter5 │ ├── images │ │ ├── GPI_chp5.3.png │ │ ├── backup_diagrams_of_MC.png │ │ ├── bubble.png │ │ ├── figure-5.1.png │ │ ├── figure-5.2.png │ │ ├── figure-5.3.png │ │ ├── figure-5.4.png │ │ └── figure-5.5.png │ └── monte_carlo_methods.rst ├── chapter6 │ ├── images │ │ ├── TD(0).png │ │ ├── backup_of_sarsa.png │ │ ├── figure-6.1.png │ │ ├── figure-6.2.png │ │ ├── figure-6.3.png │ │ ├── figure-6.4.png │ │ ├── figure-6.5.png │ │ ├── performance_of_Sarsa_and_Q-learning.png │ │ ├── random_walk_comparison.png │ │ ├── random_walk_markov_reward_process.png │ │ ├── sarsa_for_windy_gridworld.png │ │ ├── sequence_of_states_and_state-action_pairs.png │ │ ├── the_cliff_gridworld.png │ │ ├── tic-tac-toe.png │ │ └── you_are_the_predictor.png │ └── temporal_difference_learning.rst ├── chapter7 │ ├── images │ │ ├── 4-step-TD-and-4-step-Q-sigma.png │ │ ├── figure-7.1.png │ │ ├── figure-7.2.png │ │ ├── figure-7.3.png │ │ ├── figure-7.4.png │ │ ├── figure-7.5.png │ │ └── the-3-step-tree-backup-update.png │ └── n_step_bootstrapping.rst ├── chapter8 │ ├── images │ │ ├── example-8.4-prioritized-sweeping-on-mazes.png │ │ ├── example-8.5-prioritized-sweeping-for-rod-maneuvering.png │ │ ├── figure-8.1.png │ │ ├── figure-8.10.png │ │ ├── figure-8.11.png │ │ ├── figure-8.2.png │ │ ├── figure-8.3.png │ │ ├── figure-8.4.png │ │ ├── figure-8.5.png │ │ ├── figure-8.6.png │ │ ├── figure-8.7.png │ │ ├── figure-8.8.png │ │ ├── figure-8.9.png │ │ ├── relationships-between-experience-model-values-and-policy.png │ │ └── start-relevant-irrelevant-states.png │ └── planning_and_learning_with_tabular_methods.rst └── index.rst ├── partII ├── chapter10 │ ├── images │ │ ├── figure-10.1.png │ │ ├── figure-10.2.png │ │ ├── figure-10.3.png │ │ ├── figure-10.4.png │ │ └── figure-10.5.png │ └── on-policy_control_with_approximation.rst ├── chapter11 │ ├── images │ │ ├── example-11.1.png │ │ ├── example-11.2.png │ │ ├── example-11.3.png │ │ ├── example-11.4.png │ │ ├── figure-11.1.png │ │ ├── figure-11.2.png │ │ ├── figure-11.3.png │ │ ├── figure-11.4.png │ │ ├── figure-11.5.png │ │ ├── figure-11.6.png │ │ ├── simple_MDP.png │ │ └── two-markov-reward-processes.png │ └── off-policy_methods_with_approximation.rst ├── chapter12 │ ├── eligibility_traces.rst │ └── images │ │ ├── figure-12.1.png │ │ ├── figure-12.2.png │ │ ├── figure-12.3.png │ │ ├── figure-12.4.png │ │ └── the-compound-update.png ├── chapter13 │ ├── images │ │ └── example-13.1.png │ └── policy_gradient_methods.rst ├── chapter9 │ ├── images │ │ ├── figure-9.1.png │ │ ├── figure-9.10.png │ │ ├── figure-9.11.png │ │ ├── figure-9.12.png │ │ ├── figure-9.13.png │ │ ├── figure-9.14.png │ │ ├── figure-9.15.png │ │ ├── figure-9.2.png │ │ ├── figure-9.3.png │ │ ├── figure-9.4.png │ │ ├── figure-9.5.png │ │ ├── figure-9.6.png │ │ ├── figure-9.7.png │ │ ├── figure-9.8.png │ │ ├── figure-9.9.png │ │ ├── four-state-markov-reward-process.png │ │ └── one-tile-with-four-subtiles.png │ └── on-policy_prediction_with_approximation.rst └── index.rst ├── partIII ├── chapter14 │ ├── images │ │ └── delay_conditioning_trace_conditioning.png │ └── psychology.rst ├── chapter15 │ ├── images │ │ ├── figure-15.1.png │ │ ├── figure-15.2.png │ │ ├── figure-15.3.png │ │ ├── figure-15.4.png │ │ ├── figure-15.5.png │ │ └── single_neuron_producing_dopamine.png │ └── neuroscience.rst ├── chapter16 │ └── applications_and_case_studies.rst ├── chapter17 │ └── frontiers.rst └── index.rst ├── preface1st.rst ├── preface2nd.rst └── references.rst /.github/workflows/deploy-to-gh-pages.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/.github/workflows/deploy-to-gh-pages.yml -------------------------------------------------------------------------------- /.github/workflows/release-pdf.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/.github/workflows/release-pdf.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | build/ -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/.travis.yml -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/README.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/requirements.txt -------------------------------------------------------------------------------- /smallbookcover.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/smallbookcover.gif -------------------------------------------------------------------------------- /source/_static/css/custom.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/_static/css/custom.css -------------------------------------------------------------------------------- /source/_templates/layout.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/_templates/layout.html -------------------------------------------------------------------------------- /source/chapter1/images/figure-1.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/chapter1/images/figure-1.1.png -------------------------------------------------------------------------------- /source/chapter1/images/tic-tac-toe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/chapter1/images/tic-tac-toe.png -------------------------------------------------------------------------------- /source/chapter1/introduction.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/chapter1/introduction.rst -------------------------------------------------------------------------------- /source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/conf.py -------------------------------------------------------------------------------- /source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/index.rst -------------------------------------------------------------------------------- /source/notation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/notation.rst -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.1.png -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.2.png -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.3.png -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.4.png -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.5.png -------------------------------------------------------------------------------- /source/partI/chapter2/images/figure-2.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/images/figure-2.6.png -------------------------------------------------------------------------------- /source/partI/chapter2/multi_armed_bandits.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter2/multi_armed_bandits.rst -------------------------------------------------------------------------------- /source/partI/chapter3/finite_markov_decision_process.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/finite_markov_decision_process.rst -------------------------------------------------------------------------------- /source/partI/chapter3/images/backup_diagram_for_v_pi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/backup_diagram_for_v_pi.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/exercise-3.18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/exercise-3.18.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/exercise-3.19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/exercise-3.19.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/exercise-3.22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/exercise-3.22.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/figure-3.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/figure-3.1.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/figure-3.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/figure-3.2.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/figure-3.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/figure-3.3.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/figure-3.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/figure-3.4.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/figure-3.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/figure-3.5.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/pole_balancing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/pole_balancing.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/q_pi_backup_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/q_pi_backup_diagram.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/state_transition_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/state_transition_diagram.png -------------------------------------------------------------------------------- /source/partI/chapter3/images/table_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter3/images/table_figure.png -------------------------------------------------------------------------------- /source/partI/chapter4/dynamic_programming.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/dynamic_programming.rst -------------------------------------------------------------------------------- /source/partI/chapter4/images/figure-4.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/figure-4.0.png -------------------------------------------------------------------------------- /source/partI/chapter4/images/figure-4.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/figure-4.1.png -------------------------------------------------------------------------------- /source/partI/chapter4/images/figure-4.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/figure-4.2.png -------------------------------------------------------------------------------- /source/partI/chapter4/images/figure-4.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/figure-4.3.png -------------------------------------------------------------------------------- /source/partI/chapter4/images/generalized_policy_iteration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/generalized_policy_iteration.png -------------------------------------------------------------------------------- /source/partI/chapter4/images/two_lines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter4/images/two_lines.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/GPI_chp5.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/GPI_chp5.3.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/backup_diagrams_of_MC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/backup_diagrams_of_MC.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/bubble.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/bubble.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/figure-5.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/figure-5.1.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/figure-5.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/figure-5.2.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/figure-5.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/figure-5.3.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/figure-5.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/figure-5.4.png -------------------------------------------------------------------------------- /source/partI/chapter5/images/figure-5.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/images/figure-5.5.png -------------------------------------------------------------------------------- /source/partI/chapter5/monte_carlo_methods.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter5/monte_carlo_methods.rst -------------------------------------------------------------------------------- /source/partI/chapter6/images/TD(0).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/TD(0).png -------------------------------------------------------------------------------- /source/partI/chapter6/images/backup_of_sarsa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/backup_of_sarsa.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/figure-6.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/figure-6.1.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/figure-6.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/figure-6.2.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/figure-6.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/figure-6.3.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/figure-6.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/figure-6.4.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/figure-6.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/figure-6.5.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/performance_of_Sarsa_and_Q-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/performance_of_Sarsa_and_Q-learning.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/random_walk_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/random_walk_comparison.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/random_walk_markov_reward_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/random_walk_markov_reward_process.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/sarsa_for_windy_gridworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/sarsa_for_windy_gridworld.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/sequence_of_states_and_state-action_pairs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/sequence_of_states_and_state-action_pairs.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/the_cliff_gridworld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/the_cliff_gridworld.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/tic-tac-toe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/tic-tac-toe.png -------------------------------------------------------------------------------- /source/partI/chapter6/images/you_are_the_predictor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/images/you_are_the_predictor.png -------------------------------------------------------------------------------- /source/partI/chapter6/temporal_difference_learning.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter6/temporal_difference_learning.rst -------------------------------------------------------------------------------- /source/partI/chapter7/images/4-step-TD-and-4-step-Q-sigma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/4-step-TD-and-4-step-Q-sigma.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/figure-7.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/figure-7.1.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/figure-7.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/figure-7.2.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/figure-7.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/figure-7.3.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/figure-7.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/figure-7.4.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/figure-7.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/figure-7.5.png -------------------------------------------------------------------------------- /source/partI/chapter7/images/the-3-step-tree-backup-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/images/the-3-step-tree-backup-update.png -------------------------------------------------------------------------------- /source/partI/chapter7/n_step_bootstrapping.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter7/n_step_bootstrapping.rst -------------------------------------------------------------------------------- /source/partI/chapter8/images/example-8.4-prioritized-sweeping-on-mazes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/example-8.4-prioritized-sweeping-on-mazes.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/example-8.5-prioritized-sweeping-for-rod-maneuvering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/example-8.5-prioritized-sweeping-for-rod-maneuvering.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.1.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.10.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.11.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.2.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.3.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.4.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.5.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.6.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.7.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.8.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/figure-8.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/figure-8.9.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/relationships-between-experience-model-values-and-policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/relationships-between-experience-model-values-and-policy.png -------------------------------------------------------------------------------- /source/partI/chapter8/images/start-relevant-irrelevant-states.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/images/start-relevant-irrelevant-states.png -------------------------------------------------------------------------------- /source/partI/chapter8/planning_and_learning_with_tabular_methods.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/chapter8/planning_and_learning_with_tabular_methods.rst -------------------------------------------------------------------------------- /source/partI/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partI/index.rst -------------------------------------------------------------------------------- /source/partII/chapter10/images/figure-10.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/images/figure-10.1.png -------------------------------------------------------------------------------- /source/partII/chapter10/images/figure-10.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/images/figure-10.2.png -------------------------------------------------------------------------------- /source/partII/chapter10/images/figure-10.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/images/figure-10.3.png -------------------------------------------------------------------------------- /source/partII/chapter10/images/figure-10.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/images/figure-10.4.png -------------------------------------------------------------------------------- /source/partII/chapter10/images/figure-10.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/images/figure-10.5.png -------------------------------------------------------------------------------- /source/partII/chapter10/on-policy_control_with_approximation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter10/on-policy_control_with_approximation.rst -------------------------------------------------------------------------------- /source/partII/chapter11/images/example-11.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/example-11.1.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/example-11.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/example-11.2.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/example-11.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/example-11.3.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/example-11.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/example-11.4.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.1.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.2.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.3.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.4.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.5.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/figure-11.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/figure-11.6.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/simple_MDP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/simple_MDP.png -------------------------------------------------------------------------------- /source/partII/chapter11/images/two-markov-reward-processes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/images/two-markov-reward-processes.png -------------------------------------------------------------------------------- /source/partII/chapter11/off-policy_methods_with_approximation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter11/off-policy_methods_with_approximation.rst -------------------------------------------------------------------------------- /source/partII/chapter12/eligibility_traces.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/eligibility_traces.rst -------------------------------------------------------------------------------- /source/partII/chapter12/images/figure-12.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/images/figure-12.1.png -------------------------------------------------------------------------------- /source/partII/chapter12/images/figure-12.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/images/figure-12.2.png -------------------------------------------------------------------------------- /source/partII/chapter12/images/figure-12.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/images/figure-12.3.png -------------------------------------------------------------------------------- /source/partII/chapter12/images/figure-12.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/images/figure-12.4.png -------------------------------------------------------------------------------- /source/partII/chapter12/images/the-compound-update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter12/images/the-compound-update.png -------------------------------------------------------------------------------- /source/partII/chapter13/images/example-13.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter13/images/example-13.1.png -------------------------------------------------------------------------------- /source/partII/chapter13/policy_gradient_methods.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter13/policy_gradient_methods.rst -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.1.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.10.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.11.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.12.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.13.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.14.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.15.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.2.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.3.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.4.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.5.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.6.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.7.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.8.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/figure-9.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/figure-9.9.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/four-state-markov-reward-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/four-state-markov-reward-process.png -------------------------------------------------------------------------------- /source/partII/chapter9/images/one-tile-with-four-subtiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/images/one-tile-with-four-subtiles.png -------------------------------------------------------------------------------- /source/partII/chapter9/on-policy_prediction_with_approximation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/chapter9/on-policy_prediction_with_approximation.rst -------------------------------------------------------------------------------- /source/partII/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partII/index.rst -------------------------------------------------------------------------------- /source/partIII/chapter14/images/delay_conditioning_trace_conditioning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter14/images/delay_conditioning_trace_conditioning.png -------------------------------------------------------------------------------- /source/partIII/chapter14/psychology.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter14/psychology.rst -------------------------------------------------------------------------------- /source/partIII/chapter15/images/figure-15.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/figure-15.1.png -------------------------------------------------------------------------------- /source/partIII/chapter15/images/figure-15.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/figure-15.2.png -------------------------------------------------------------------------------- /source/partIII/chapter15/images/figure-15.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/figure-15.3.png -------------------------------------------------------------------------------- /source/partIII/chapter15/images/figure-15.4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/figure-15.4.png -------------------------------------------------------------------------------- /source/partIII/chapter15/images/figure-15.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/figure-15.5.png -------------------------------------------------------------------------------- /source/partIII/chapter15/images/single_neuron_producing_dopamine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/images/single_neuron_producing_dopamine.png -------------------------------------------------------------------------------- /source/partIII/chapter15/neuroscience.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter15/neuroscience.rst -------------------------------------------------------------------------------- /source/partIII/chapter16/applications_and_case_studies.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter16/applications_and_case_studies.rst -------------------------------------------------------------------------------- /source/partIII/chapter17/frontiers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/chapter17/frontiers.rst -------------------------------------------------------------------------------- /source/partIII/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/partIII/index.rst -------------------------------------------------------------------------------- /source/preface1st.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/preface1st.rst -------------------------------------------------------------------------------- /source/preface2nd.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/preface2nd.rst -------------------------------------------------------------------------------- /source/references.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qiwihui/reinforcement-learning-an-introduction-chinese/HEAD/source/references.rst --------------------------------------------------------------------------------