├── .gitignore ├── Chapter1-初识强化学习 └── 1.6-案例:基于Gym库的智能体-环境交互.ipynb ├── Chapter2-Markov决策过程 ├── 2.2-Bellman期望方程.ipynb ├── 2.3-最优策略及其性质.ipynb └── 2.4-案例:悬崖寻路.ipynb ├── Chapter3-有模型数值迭代 └── 3.5-案例:冰面滑行.ipynb ├── Chapter4-回合更新价值迭代 └── 4.3-案例:21点游戏.ipynb ├── Chapter5-时序差分价值迭代 └── 5.4-案例:出租车调度.ipynb ├── Chapter6-函数近似方法 └── 6.5-案例:小车上山.ipynb ├── Chapter7-回合更新策略梯度方法 └── 7.5-案例:车杆平衡.ipynb └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Chapter1-初识强化学习/1.6-案例:基于Gym库的智能体-环境交互.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gym\n", 10 | "import warnings\n", 11 | "import numpy as np\n", 12 | "\n", 13 | "warnings.filterwarnings('ignore')" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "观测空间 = Box(2,)\n", 26 | "动作空间 = Discrete(3)\n", 27 | "观测范围 = [-1.2 -0.07] ~ [0.6 0.07]\n", 28 | "动作数 = 3\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "# 导入环境并查看观测空间和动作空间\n", 34 | "\n", 35 | "env = gym.make('MountainCar-v0')\n", 36 | "print('观测空间 = {}'.format(env.observation_space))\n", 37 | "print('动作空间 = {}'.format(env.action_space))\n", 38 | "print('观测范围 = {} ~ {}'.format(env.observation_space.low, env.observation_space.high))\n", 39 | "print('动作数 = {}'.format(env.action_space.n))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# 根据指定确定性策略决定动作的智能体\n", 49 | "\n", 50 | "class BespokeAgent(object):\n", 51 | " def __init__(self, env):\n", 52 | " pass\n", 53 | " \n", 54 | " def decide(self, observation):\n", 55 | " position, velocity = observation\n", 56 | " lb = min(-0.09 * (position + 0.25) ** 2 + 0.03, 0.3 * (position + 0.9) ** 4 - 0.008)\n", 57 | " ub = -0.07 * (position + 0.38) ** 2 + 0.06\n", 58 | " \n", 59 | " if lb < velocity < ub:\n", 60 | " action = 2\n", 61 | " else:\n", 62 | " action = 0\n", 63 | " return action\n", 64 | "\n", 65 | " def learn(self, *args):\n", 66 | " pass" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# 智能体和环境交互一个回合的代码\n", 76 | "\n", 77 | "def play_montecarlo(env, agent, render=False, train=False):\n", 78 | " episode_reward = 0.0\n", 79 | " observation = env.reset()\n", 80 | " \n", 81 | " while True:\n", 82 | " if render:\n", 83 | " env.render()\n", 84 | " \n", 85 | " action = agent.decide(observation)\n", 86 | " next_observation, reward, done, _ = env.step(action)\n", 87 | " episode_reward += reward\n", 88 | " \n", 89 | " if train:\n", 90 | " agent.learn(observation, action, reward, done)\n", 91 | " if done:\n", 92 | " break\n", 93 | " \n", 94 | " observation = next_observation\n", 95 | " \n", 96 | " return episode_reward" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "agent = BespokeAgent(env)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "回合奖励 = -113.0\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "env.seed(0)\n", 123 | "episode_reward = play_montecarlo(env, agent, render=True)\n", 124 | "print('回合奖励 = {}'.format(episode_reward))\n", 125 | "env.close()" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 7, 131 | "metadata": {}, 132 | "outputs": [ 133 | { 134 | "name": "stdout", 135 | "output_type": "stream", 136 | "text": [ 137 | "平均回合奖励 = -108.26\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "# 运行100回合求平均以测试性能\n", 143 | "\n", 144 | "episode_rewards = [play_montecarlo(env, agent) for _ in range(100)]\n", 145 | "print('平均回合奖励 = {}'.format(np.mean(episode_rewards)))" 146 | ] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 3", 152 | "language": "python", 153 | "name": "python3" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 3 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython3", 165 | "version": "3.7.6" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 4 170 | } 171 | -------------------------------------------------------------------------------- /Chapter2-Markov决策过程/2.2-Bellman期望方程.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sympy\n", 10 | "\n", 11 | "from sympy import symbols\n", 12 | "\n", 13 | "sympy.init_printing()" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "v_hungry, v_full = symbols('v_hungry v_full')\n", 23 | "q_hungry_eat, q_hungry_none, q_full_eat, q_full_none = symbols('q_hungry_eat q_hungry_none q_full_eat q_full_none')\n", 24 | "alpha, beta, x, y, gamma = symbols('alpha, beta, x, y, gamma')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# 求解示例Bellman期望方程\n", 34 | "\n", 35 | "system = sympy.Matrix((\n", 36 | " (1, 0, x - 1, -x, 0, 0, 0),\n", 37 | " (0, 1, 0, 0, -y, y - 1, 0),\n", 38 | " (-gamma, 0, 1, 0, 0, 0, -2),\n", 39 | " ((alpha - 1) * gamma, -alpha * gamma, 0, 1, 0, 0, 4 * alpha - 3),\n", 40 | " (-beta * gamma, (beta - 1) * gamma, 0, 0, 1, 0, -4 * beta + 2),\n", 41 | " (0, -gamma, 0, 0, 0, 1, 1)\n", 42 | "))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/latex": [ 53 | "$\\displaystyle \\left\\{ q_{full eat} : \\frac{- \\alpha \\gamma^{2} x y + \\alpha \\gamma^{2} x - 2 \\alpha \\gamma x + \\beta \\gamma^{2} x y - \\beta \\gamma^{2} x - \\beta \\gamma^{2} y + \\beta \\gamma^{2} + \\beta \\gamma x + 3 \\beta \\gamma y - 5 \\beta \\gamma + 4 \\beta + \\gamma^{2} y - \\gamma^{2} - \\gamma y + 3 \\gamma - 2}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{full none} : \\frac{- \\alpha \\gamma^{2} x y - \\alpha \\gamma x + \\beta \\gamma^{2} x y - \\beta \\gamma^{2} y + 3 \\beta \\gamma y + \\gamma^{2} y - \\gamma y + \\gamma - 1}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry eat} : \\frac{- \\alpha \\gamma^{2} x y + \\alpha \\gamma^{2} x - 2 \\alpha \\gamma x + \\beta \\gamma^{2} x y + 2 \\beta \\gamma y - \\gamma^{2} x + \\gamma x - 2 \\gamma + 2}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry none} : \\frac{- \\alpha \\gamma^{2} x y + \\alpha \\gamma^{2} x + \\alpha \\gamma^{2} y - \\alpha \\gamma^{2} - 2 \\alpha \\gamma x - \\alpha \\gamma y + 5 \\alpha \\gamma - 4 \\alpha + \\beta \\gamma^{2} x y - \\beta \\gamma^{2} y + 3 \\beta \\gamma y - \\gamma^{2} x + \\gamma^{2} + \\gamma x - 4 \\gamma + 3}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}, \\ v_{full} : \\frac{- \\alpha \\gamma x y - \\alpha \\gamma x + \\beta \\gamma x y - 2 \\beta \\gamma y + 4 \\beta y + \\gamma y + \\gamma - y - 1}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}, \\ v_{hungry} : \\frac{- \\alpha \\gamma x y + 3 \\alpha \\gamma x - 4 \\alpha x + \\beta \\gamma x y + 2 \\beta \\gamma y - \\gamma x - 2 \\gamma + x + 2}{\\alpha \\gamma^{2} x - \\alpha \\gamma x + \\beta \\gamma^{2} y - \\beta \\gamma y - \\gamma^{2} + 2 \\gamma - 1}\\right\\}$" 54 | ], 55 | "text/plain": [ 56 | "⎧ 2 2 2 2 2 2\n", 57 | "⎪ - α⋅γ ⋅x⋅y + α⋅γ ⋅x - 2⋅α⋅γ⋅x + β⋅γ ⋅x⋅y - β⋅γ ⋅x - β⋅γ ⋅y + β⋅γ \n", 58 | "⎨q_full_eat: ─────────────────────────────────────────────────────────────────\n", 59 | "⎪ 2 2 \n", 60 | "⎩ α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅y -\n", 61 | "\n", 62 | " 2 2 \n", 63 | " + β⋅γ⋅x + 3⋅β⋅γ⋅y - 5⋅β⋅γ + 4⋅β + γ ⋅y - γ - γ⋅y + 3⋅γ - 2 - α\n", 64 | "────────────────────────────────────────────────────────────, q_full_none: ───\n", 65 | " 2 \n", 66 | " β⋅γ⋅y - γ + 2⋅γ - 1 \n", 67 | "\n", 68 | " 2 2 2 2 \n", 69 | "⋅γ ⋅x⋅y - α⋅γ⋅x + β⋅γ ⋅x⋅y - β⋅γ ⋅y + 3⋅β⋅γ⋅y + γ ⋅y - γ⋅y + γ - 1 \n", 70 | "──────────────────────────────────────────────────────────────────, q_hungry_e\n", 71 | " 2 2 2 \n", 72 | " α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅y - β⋅γ⋅y - γ + 2⋅γ - 1 \n", 73 | "\n", 74 | " 2 2 2 2 \n", 75 | " - α⋅γ ⋅x⋅y + α⋅γ ⋅x - 2⋅α⋅γ⋅x + β⋅γ ⋅x⋅y + 2⋅β⋅γ⋅y - γ ⋅x + γ⋅x - 2⋅γ + 2 \n", 76 | "at: ─────────────────────────────────────────────────────────────────────────,\n", 77 | " 2 2 2 \n", 78 | " α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅y - β⋅γ⋅y - γ + 2⋅γ - 1 \n", 79 | "\n", 80 | " 2 2 2 2 \n", 81 | " - α⋅γ ⋅x⋅y + α⋅γ ⋅x + α⋅γ ⋅y - α⋅γ - 2⋅α⋅γ⋅x - α⋅γ⋅y + 5⋅α⋅γ \n", 82 | " q_hungry_none: ──────────────────────────────────────────────────────────────\n", 83 | " 2 2 \n", 84 | " α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅\n", 85 | "\n", 86 | " 2 2 2 2 \n", 87 | "- 4⋅α + β⋅γ ⋅x⋅y - β⋅γ ⋅y + 3⋅β⋅γ⋅y - γ ⋅x + γ + γ⋅x - 4⋅γ + 3 -α⋅γ⋅\n", 88 | "───────────────────────────────────────────────────────────────, v_full: ─────\n", 89 | " 2 \n", 90 | "y - β⋅γ⋅y - γ + 2⋅γ - 1 \n", 91 | "\n", 92 | " \n", 93 | "x⋅y - α⋅γ⋅x + β⋅γ⋅x⋅y - 2⋅β⋅γ⋅y + 4⋅β⋅y + γ⋅y + γ - y - 1 -α⋅γ⋅x⋅y \n", 94 | "─────────────────────────────────────────────────────────, v_hungry: ─────────\n", 95 | " 2 2 2 \n", 96 | " α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅y - β⋅γ⋅y - γ + 2⋅γ - 1 \n", 97 | "\n", 98 | " ⎫\n", 99 | "+ 3⋅α⋅γ⋅x - 4⋅α⋅x + β⋅γ⋅x⋅y + 2⋅β⋅γ⋅y - γ⋅x - 2⋅γ + x + 2⎪\n", 100 | "─────────────────────────────────────────────────────────⎬\n", 101 | " 2 2 2 ⎪\n", 102 | " α⋅γ ⋅x - α⋅γ⋅x + β⋅γ ⋅y - β⋅γ⋅y - γ + 2⋅γ - 1 ⎭" 103 | ] 104 | }, 105 | "execution_count": 4, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "sympy.solve_linear_system(system, v_hungry, v_full, q_hungry_eat, q_hungry_none, q_full_eat, q_full_none)" 112 | ] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.7.6" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 4 136 | } 137 | -------------------------------------------------------------------------------- /Chapter2-Markov决策过程/2.3-最优策略及其性质.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import sympy\n", 10 | "\n", 11 | "from sympy import symbols\n", 12 | "\n", 13 | "sympy.init_printing()" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "alpha, beta, gamma = symbols('alpha beta gamma')\n", 23 | "v_hungry, v_full = symbols('v_hungry v_full')\n", 24 | "q_hungry_eat, q_hungry_none, q_full_eat, q_full_none = symbols('q_hungry_eat q_hungry_none q_full_eat q_full_none')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "==== v(饿) = q(饿, 不吃), v(饱) = q(饱, 吃) ==== x = 0, y = 0 ====\n" 37 | ] 38 | }, 39 | { 40 | "data": { 41 | "text/latex": [ 42 | "$\\displaystyle \\left\\{ q_{full eat} : \\frac{- \\beta \\gamma + 4 \\beta + \\gamma - 2}{\\gamma - 1}, \\ q_{full none} : - \\frac{1}{\\gamma - 1}, \\ q_{hungry eat} : \\frac{2}{\\gamma - 1}, \\ q_{hungry none} : \\frac{\\alpha \\gamma - 4 \\alpha - \\gamma + 3}{\\gamma - 1}, \\ v_{full} : - \\frac{1}{\\gamma - 1}, \\ v_{hungry} : \\frac{2}{\\gamma - 1}\\right\\}$" 43 | ], 44 | "text/plain": [ 45 | "⎧ -β⋅γ + 4⋅β + γ - 2 -1 2 \n", 46 | "⎨q_full_eat: ──────────────────, q_full_none: ─────, q_hungry_eat: ─────, q_hu\n", 47 | "⎩ γ - 1 γ - 1 γ - 1 \n", 48 | "\n", 49 | " α⋅γ - 4⋅α - γ + 3 -1 2 ⎫\n", 50 | "ngry_none: ─────────────────, v_full: ─────, v_hungry: ─────⎬\n", 51 | " γ - 1 γ - 1 γ - 1⎭" 52 | ] 53 | }, 54 | "metadata": {}, 55 | "output_type": "display_data" 56 | }, 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "==== v(饿) = q(饿, 吃), v(饱) = q(饱, 吃) ==== x = 1, y = 0 ====\n" 62 | ] 63 | }, 64 | { 65 | "data": { 66 | "text/latex": [ 67 | "$\\displaystyle \\left\\{ q_{full eat} : \\frac{\\alpha \\gamma^{2} - 2 \\alpha \\gamma - 4 \\beta \\gamma + 4 \\beta - \\gamma^{2} + 3 \\gamma - 2}{\\alpha \\gamma^{2} - \\alpha \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{full none} : - \\frac{1}{\\gamma - 1}, \\ q_{hungry eat} : \\frac{\\alpha \\gamma^{2} - 2 \\alpha \\gamma - \\gamma^{2} - \\gamma + 2}{\\alpha \\gamma^{2} - \\alpha \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry none} : \\frac{- \\alpha \\gamma + \\left(4 \\alpha - 3\\right) \\left(\\gamma - 1\\right)}{\\gamma^{2} \\left(\\alpha - 1\\right) - \\gamma \\left(\\alpha - 1\\right) + \\gamma - 1}, \\ v_{full} : - \\frac{1}{\\gamma - 1}, \\ v_{hungry} : \\frac{- \\alpha \\gamma + \\left(4 \\alpha - 3\\right) \\left(\\gamma - 1\\right)}{\\gamma^{2} \\left(\\alpha - 1\\right) - \\gamma \\left(\\alpha - 1\\right) + \\gamma - 1}\\right\\}$" 68 | ], 69 | "text/plain": [ 70 | "⎧ 2 2 \n", 71 | "⎪ α⋅γ - 2⋅α⋅γ - 4⋅β⋅γ + 4⋅β - γ + 3⋅γ - 2 -1 \n", 72 | "⎨q_full_eat: ─────────────────────────────────────────, q_full_none: ─────, q_\n", 73 | "⎪ 2 2 γ - 1 \n", 74 | "⎩ α⋅γ - α⋅γ - γ + 2⋅γ - 1 \n", 75 | "\n", 76 | " 2 2 \n", 77 | " α⋅γ - 2⋅α⋅γ - γ - γ + 2 -α⋅γ + (4⋅α - 3)⋅(γ -\n", 78 | "hungry_eat: ─────────────────────────, q_hungry_none: ────────────────────────\n", 79 | " 2 2 2 \n", 80 | " α⋅γ - α⋅γ - γ + 2⋅γ - 1 γ ⋅(α - 1) - γ⋅(α - 1) +\n", 81 | "\n", 82 | " ⎫\n", 83 | " 1) -1 -α⋅γ + (4⋅α - 3)⋅(γ - 1) ⎪\n", 84 | "──────, v_full: ─────, v_hungry: ──────────────────────────────⎬\n", 85 | " γ - 1 2 ⎪\n", 86 | " γ - 1 γ ⋅(α - 1) - γ⋅(α - 1) + γ - 1⎭" 87 | ] 88 | }, 89 | "metadata": {}, 90 | "output_type": "display_data" 91 | }, 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "==== v(饿) = q(饿, 不吃), v(饱) = q(饱, 不吃) ==== x = 0, y = 1 ====\n" 97 | ] 98 | }, 99 | { 100 | "data": { 101 | "text/latex": [ 102 | "$\\displaystyle \\left\\{ q_{full eat} : \\frac{2 \\left(\\beta \\gamma - \\left(2 \\beta - 1\\right) \\left(\\gamma - 1\\right)\\right)}{\\gamma^{2} \\left(\\beta - 1\\right) - \\gamma \\left(\\beta - 1\\right) + \\gamma - 1}, \\ q_{full none} : \\frac{- \\beta \\gamma^{2} + 3 \\beta \\gamma + \\gamma^{2} - 1}{\\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry eat} : \\frac{2}{\\gamma - 1}, \\ q_{hungry none} : \\frac{4 \\alpha \\gamma - 4 \\alpha - \\beta \\gamma^{2} + 3 \\beta \\gamma + \\gamma^{2} - 4 \\gamma + 3}{\\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ v_{full} : \\frac{2 \\left(\\beta \\gamma - \\left(2 \\beta - 1\\right) \\left(\\gamma - 1\\right)\\right)}{\\gamma^{2} \\left(\\beta - 1\\right) - \\gamma \\left(\\beta - 1\\right) + \\gamma - 1}, \\ v_{hungry} : \\frac{2}{\\gamma - 1}\\right\\}$" 103 | ], 104 | "text/plain": [ 105 | "⎧ 2 2\n", 106 | "⎪ 2⋅(β⋅γ - (2⋅β - 1)⋅(γ - 1)) - β⋅γ + 3⋅β⋅γ + γ \n", 107 | "⎨q_full_eat: ──────────────────────────────, q_full_none: ────────────────────\n", 108 | "⎪ 2 2 2 \n", 109 | "⎩ γ ⋅(β - 1) - γ⋅(β - 1) + γ - 1 β⋅γ - β⋅γ - γ + 2⋅\n", 110 | "\n", 111 | " 2 2 \n", 112 | " - 1 2 4⋅α⋅γ - 4⋅α - β⋅γ + 3⋅β⋅γ + γ - 4\n", 113 | "─────, q_hungry_eat: ─────, q_hungry_none: ───────────────────────────────────\n", 114 | " γ - 1 2 2 \n", 115 | "γ - 1 β⋅γ - β⋅γ - γ + 2⋅γ - 1 \n", 116 | "\n", 117 | " ⎫\n", 118 | "⋅γ + 3 2⋅(β⋅γ - (2⋅β - 1)⋅(γ - 1)) 2 ⎪\n", 119 | "──────, v_full: ──────────────────────────────, v_hungry: ─────⎬\n", 120 | " 2 γ - 1⎪\n", 121 | " γ ⋅(β - 1) - γ⋅(β - 1) + γ - 1 ⎭" 122 | ] 123 | }, 124 | "metadata": {}, 125 | "output_type": "display_data" 126 | }, 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "==== v(饿) = q(饿, 吃), v(饱) = q(饱, 不吃) ==== x = 1, y = 1 ====\n" 132 | ] 133 | }, 134 | { 135 | "data": { 136 | "text/latex": [ 137 | "$\\displaystyle \\left\\{ q_{full eat} : \\frac{- 2 \\alpha \\gamma - \\beta \\gamma + 4 \\beta + 2 \\gamma - 2}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{full none} : \\frac{- \\alpha \\gamma^{2} - \\alpha \\gamma + 3 \\beta \\gamma + \\gamma^{2} - 1}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry eat} : \\frac{- 2 \\alpha \\gamma + \\beta \\gamma^{2} + 2 \\beta \\gamma - \\gamma^{2} - \\gamma + 2}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ q_{hungry none} : \\frac{2 \\alpha \\gamma - 4 \\alpha + 3 \\beta \\gamma - 3 \\gamma + 3}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ v_{full} : \\frac{- 2 \\alpha \\gamma - \\beta \\gamma + 4 \\beta + 2 \\gamma - 2}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}, \\ v_{hungry} : \\frac{2 \\alpha \\gamma - 4 \\alpha + 3 \\beta \\gamma - 3 \\gamma + 3}{\\alpha \\gamma^{2} - \\alpha \\gamma + \\beta \\gamma^{2} - \\beta \\gamma - \\gamma^{2} + 2 \\gamma - 1}\\right\\}$" 138 | ], 139 | "text/plain": [ 140 | "⎧ 2 \n", 141 | "⎪ -2⋅α⋅γ - β⋅γ + 4⋅β + 2⋅γ - 2 - α⋅γ -\n", 142 | "⎨q_full_eat: ──────────────────────────────────────, q_full_none: ────────────\n", 143 | "⎪ 2 2 2 2 \n", 144 | "⎩ α⋅γ - α⋅γ + β⋅γ - β⋅γ - γ + 2⋅γ - 1 α⋅γ - α⋅γ +\n", 145 | "\n", 146 | " 2 2 2 \n", 147 | " α⋅γ + 3⋅β⋅γ + γ - 1 -2⋅α⋅γ + β⋅γ + 2⋅β⋅γ - γ - γ + 2\n", 148 | "──────────────────────────, q_hungry_eat: ────────────────────────────────────\n", 149 | " 2 2 2 2 2 \n", 150 | " β⋅γ - β⋅γ - γ + 2⋅γ - 1 α⋅γ - α⋅γ + β⋅γ - β⋅γ - γ + 2⋅γ -\n", 151 | "\n", 152 | " \n", 153 | " 2⋅α⋅γ - 4⋅α + 3⋅β⋅γ - 3⋅γ + 3 -2⋅α⋅γ\n", 154 | "──, q_hungry_none: ──────────────────────────────────────, v_full: ───────────\n", 155 | " 2 2 2 2 \n", 156 | " 1 α⋅γ - α⋅γ + β⋅γ - β⋅γ - γ + 2⋅γ - 1 α⋅γ - α⋅γ \n", 157 | "\n", 158 | " ⎫\n", 159 | " - β⋅γ + 4⋅β + 2⋅γ - 2 2⋅α⋅γ - 4⋅α + 3⋅β⋅γ - 3⋅γ + 3 ⎪\n", 160 | "───────────────────────────, v_hungry: ──────────────────────────────────────⎬\n", 161 | " 2 2 2 2 2 ⎪\n", 162 | "+ β⋅γ - β⋅γ - γ + 2⋅γ - 1 α⋅γ - α⋅γ + β⋅γ - β⋅γ - γ + 2⋅γ - 1⎭" 163 | ] 164 | }, 165 | "metadata": {}, 166 | "output_type": "display_data" 167 | } 168 | ], 169 | "source": [ 170 | "# 求解示例Bellman最优方程\n", 171 | "\n", 172 | "xy_tuples = ((0, 0), (1, 0), (0, 1), (1, 1))\n", 173 | "for x, y in xy_tuples:\n", 174 | " system = sympy.Matrix((\n", 175 | " (1, 0, x - 1, -x, 0, 0, 0),\n", 176 | " (0, 1, 0, 0, -y, y - 1, 0),\n", 177 | " (-gamma, 0, 1, 0, 0, 0, -2),\n", 178 | " ((alpha - 1) * gamma, -alpha * gamma, 0, 1, 0, 0, 4 * alpha - 3),\n", 179 | " (-beta * gamma, (beta - 1) * gamma, 0, 0, 1, 0, -4 * beta + 2),\n", 180 | " (0, -gamma, 0, 0, 0, 1, 1)\n", 181 | " ))\n", 182 | " \n", 183 | " result = sympy.solve_linear_system(system, v_hungry, v_full, q_hungry_eat, q_hungry_none, q_full_eat, q_full_none)\n", 184 | " msgx = 'v(饿) = q(饿, {}吃)'.format('' if x else '不')\n", 185 | " msgy = 'v(饱) = q(饱, {}吃)'.format('不' if y else '')\n", 186 | " print('==== {}, {} ==== x = {}, y = {} ===='.format(msgx, msgy, x, y))\n", 187 | " display(result)" 188 | ] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.7.6" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 4 212 | } 213 | -------------------------------------------------------------------------------- /Chapter2-Markov决策过程/2.4-案例:悬崖寻路.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gym\n", 10 | "import scipy\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "观测空间 = Discrete(48)\n", 24 | "动作空间 = Discrete(4)\n", 25 | "观测数量 = 48, 动作数量 = 4\n", 26 | "地图大小 = (4, 12)\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "# 导入‘CliffWalking-v0’环境\n", 32 | "\n", 33 | "env = gym.make('CliffWalking-v0')\n", 34 | "print('观测空间 = {}'.format(env.observation_space))\n", 35 | "print('动作空间 = {}'.format(env.action_space))\n", 36 | "print('观测数量 = {}, 动作数量 = {}'.format(env.nS, env.nA))\n", 37 | "print('地图大小 = {}'.format(env.shape))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# 运行一个回合\n", 47 | "\n", 48 | "def play_once(env, policy):\n", 49 | " total_reward = 0\n", 50 | " state = env.reset()\n", 51 | " \n", 52 | " while True:\n", 53 | " loc = np.unravel_index(state, env.shape)\n", 54 | " print('状态 = {}, 位置 = {}'.format(state, loc), end = ' | ')\n", 55 | " action = np.random.choice(env.nA, p=policy[state])\n", 56 | " state, reward, done, _ = env.step(action)\n", 57 | " print('动作 = {}, 奖励 = {}'.format(action, reward))\n", 58 | " total_reward += reward\n", 59 | " if done:\n", 60 | " break\n", 61 | " \n", 62 | " return total_reward" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "最优策略 = \n", 75 | "[[1 1 1 1 1 1 1 1 1 1 1 2]\n", 76 | " [1 1 1 1 1 1 1 1 1 1 1 2]\n", 77 | " [1 1 1 1 1 1 1 1 1 1 1 2]\n", 78 | " [0 0 0 0 0 0 0 0 0 0 0 2]]\n", 79 | "\n", 80 | "状态 = 36, 位置 = (3, 0) | 动作 = 0, 奖励 = -1\n", 81 | "状态 = 24, 位置 = (2, 0) | 动作 = 1, 奖励 = -1\n", 82 | "状态 = 25, 位置 = (2, 1) | 动作 = 1, 奖励 = -1\n", 83 | "状态 = 26, 位置 = (2, 2) | 动作 = 1, 奖励 = -1\n", 84 | "状态 = 27, 位置 = (2, 3) | 动作 = 1, 奖励 = -1\n", 85 | "状态 = 28, 位置 = (2, 4) | 动作 = 1, 奖励 = -1\n", 86 | "状态 = 29, 位置 = (2, 5) | 动作 = 1, 奖励 = -1\n", 87 | "状态 = 30, 位置 = (2, 6) | 动作 = 1, 奖励 = -1\n", 88 | "状态 = 31, 位置 = (2, 7) | 动作 = 1, 奖励 = -1\n", 89 | "状态 = 32, 位置 = (2, 8) | 动作 = 1, 奖励 = -1\n", 90 | "状态 = 33, 位置 = (2, 9) | 动作 = 1, 奖励 = -1\n", 91 | "状态 = 34, 位置 = (2, 10) | 动作 = 1, 奖励 = -1\n", 92 | "状态 = 35, 位置 = (2, 11) | 动作 = 2, 奖励 = -1\n", 93 | "总奖励 = -13\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "# 最优策略\n", 99 | "\n", 100 | "actions = np.ones(env.shape, dtype=int)\n", 101 | "actions[-1, :] = 0\n", 102 | "actions[:, -1] = 2\n", 103 | "optimal_policy = np.eye(4)[actions.reshape(-1)]\n", 104 | "print('最优策略 = \\n{}\\n'.format(actions))\n", 105 | "\n", 106 | "total_reward = play_once(env, optimal_policy)\n", 107 | "print('总奖励 = {}'.format(total_reward))" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# 用Bellman方程求解状态价值和动作价值\n", 117 | "\n", 118 | "def evaluate_bellman(env, policy, gamma=1.0):\n", 119 | " a, b = np.eye(env.nS), np.zeros((env.nS))\n", 120 | " for state in range(env.nS - 1):\n", 121 | " for action in range(env.nA):\n", 122 | " pi = policy[state][action]\n", 123 | " for p, next_state, reward, done in env.P[state][action]:\n", 124 | " a[state, next_state] -= pi * gamma * p\n", 125 | " b[state] += pi * reward * p\n", 126 | " v = np.linalg.solve(a, b)\n", 127 | " \n", 128 | " q = np.zeros((env.nS, env.nA))\n", 129 | " for state in range(env.nS - 1):\n", 130 | " for action in range(env.nA):\n", 131 | " for p, next_state, reward, done in env.P[state][action]:\n", 132 | " q[state][action] += (reward + gamma * v[next_state]) * p\n", 133 | " return v, q" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "状态价值 = [-84537767.17599674 -84537734.14924777 -84537128.96237774\n", 146 | " -84536314.46853566 -84534411.5386176 -84523673.65553102\n", 147 | " -84484789.05526869 -84373265.75787517 -84329922.07866065\n", 148 | " -84275296.29400891 -84157473.9489998 -84126181.5476279\n", 149 | " -84537790.11340794 -84537698.61188367 -84537527.58746669\n", 150 | " -84537068.71889049 -84535609.97717507 -84519784.83687702\n", 151 | " -84501674.3798621 -84427458.1592079 -84303436.46428464\n", 152 | " -84229578.6346756 -84101343.11962558 -84084223.0419421\n", 153 | " -84538076.4686468 -84538041.2098556 -84537974.47486386\n", 154 | " -84537748.23394884 -84536900.56879608 -84526997.16611306\n", 155 | " -84524811.57365067 -84514151.94362962 -84382801.25759234\n", 156 | " -84220081.03580694 -83779780.71472944 -82915912.33177578\n", 157 | " -84538305.52849488 -84538312.12583822 -84538153.59525718\n", 158 | " -84538145.98539561 -84538403.63886732 -84535498.19949386\n", 159 | " -84533771.18294747 -84535051.68012775 -84496447.7055068\n", 160 | " -84499725.97483775 -63199813.39233176 0. ]\n", 161 | "动作价值 = [[-8.45377682e+07 -8.45377351e+07 -8.45377911e+07 -8.45377682e+07]\n", 162 | " [-8.45377351e+07 -8.45371300e+07 -8.45376996e+07 -8.45377682e+07]\n", 163 | " [-8.45371300e+07 -8.45363155e+07 -8.45375286e+07 -8.45377351e+07]\n", 164 | " [-8.45363155e+07 -8.45344125e+07 -8.45370697e+07 -8.45371300e+07]\n", 165 | " [-8.45344125e+07 -8.45236747e+07 -8.45356110e+07 -8.45363155e+07]\n", 166 | " [-8.45236747e+07 -8.44847901e+07 -8.45197858e+07 -8.45344125e+07]\n", 167 | " [-8.44847901e+07 -8.43732668e+07 -8.45016754e+07 -8.45236747e+07]\n", 168 | " [-8.43732668e+07 -8.43299231e+07 -8.44274592e+07 -8.44847901e+07]\n", 169 | " [-8.43299231e+07 -8.42752973e+07 -8.43034375e+07 -8.43732668e+07]\n", 170 | " [-8.42752973e+07 -8.41574749e+07 -8.42295796e+07 -8.43299231e+07]\n", 171 | " [-8.41574749e+07 -8.41261825e+07 -8.41013441e+07 -8.42752973e+07]\n", 172 | " [-8.41261825e+07 -8.41261825e+07 -8.40842240e+07 -8.41574749e+07]\n", 173 | " [-8.45377682e+07 -8.45376996e+07 -8.45380775e+07 -8.45377911e+07]\n", 174 | " [-8.45377351e+07 -8.45375286e+07 -8.45380422e+07 -8.45377911e+07]\n", 175 | " [-8.45371300e+07 -8.45370697e+07 -8.45379755e+07 -8.45376996e+07]\n", 176 | " [-8.45363155e+07 -8.45356110e+07 -8.45377492e+07 -8.45375286e+07]\n", 177 | " [-8.45344125e+07 -8.45197858e+07 -8.45369016e+07 -8.45370697e+07]\n", 178 | " [-8.45236747e+07 -8.45016754e+07 -8.45269982e+07 -8.45356110e+07]\n", 179 | " [-8.44847901e+07 -8.44274592e+07 -8.45248126e+07 -8.45197858e+07]\n", 180 | " [-8.43732668e+07 -8.43034375e+07 -8.45141529e+07 -8.45016754e+07]\n", 181 | " [-8.43299231e+07 -8.42295796e+07 -8.43828023e+07 -8.44274592e+07]\n", 182 | " [-8.42752973e+07 -8.41013441e+07 -8.42200820e+07 -8.43034375e+07]\n", 183 | " [-8.41574749e+07 -8.40842240e+07 -8.37797817e+07 -8.42295796e+07]\n", 184 | " [-8.41261825e+07 -8.40842240e+07 -8.29159133e+07 -8.41013441e+07]\n", 185 | " [-8.45377911e+07 -8.45380422e+07 -8.45383065e+07 -8.45380775e+07]\n", 186 | " [-8.45376996e+07 -8.45379755e+07 -8.45384055e+07 -8.45380775e+07]\n", 187 | " [-8.45375286e+07 -8.45377492e+07 -8.45384055e+07 -8.45380422e+07]\n", 188 | " [-8.45370697e+07 -8.45369016e+07 -8.45384055e+07 -8.45379755e+07]\n", 189 | " [-8.45356110e+07 -8.45269982e+07 -8.45384055e+07 -8.45377492e+07]\n", 190 | " [-8.45197858e+07 -8.45248126e+07 -8.45384055e+07 -8.45369016e+07]\n", 191 | " [-8.45016754e+07 -8.45141529e+07 -8.45384055e+07 -8.45269982e+07]\n", 192 | " [-8.44274592e+07 -8.43828023e+07 -8.45384055e+07 -8.45248126e+07]\n", 193 | " [-8.43034375e+07 -8.42200820e+07 -8.45384055e+07 -8.45141529e+07]\n", 194 | " [-8.42295796e+07 -8.37797817e+07 -8.45384055e+07 -8.43828023e+07]\n", 195 | " [-8.41013441e+07 -8.29159133e+07 -8.45384055e+07 -8.42200820e+07]\n", 196 | " [-8.40842240e+07 -8.29159133e+07 -1.00000000e+00 -8.37797817e+07]\n", 197 | " [-8.45380775e+07 -8.45384055e+07 -8.45383065e+07 -8.45383065e+07]\n", 198 | " [-8.45380422e+07 -8.45384055e+07 -8.45384055e+07 -8.45383065e+07]\n", 199 | " [-8.45379755e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 200 | " [-8.45377492e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 201 | " [-8.45369016e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 202 | " [-8.45269982e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 203 | " [-8.45248126e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 204 | " [-8.45141529e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 205 | " [-8.43828023e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 206 | " [-8.42200820e+07 -8.45384055e+07 -8.45384055e+07 -8.45384055e+07]\n", 207 | " [-8.37797817e+07 -1.00000000e+00 -8.45384055e+07 -8.45384055e+07]\n", 208 | " [ 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]]\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "# 评估随机策略\n", 214 | "\n", 215 | "policy = np.random.uniform(size=(env.nS, env.nA))\n", 216 | "policy = policy / np.sum(policy, axis=1)[:, np.newaxis]\n", 217 | "state_values, action_values = evaluate_bellman(env, policy)\n", 218 | "print('状态价值 = {}'.format(state_values))\n", 219 | "print('动作价值 = {}'.format(action_values))" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "最优状态价值 = [-14. -13. -12. -11. -10. -9. -8. -7. -6. -5. -4. -3. -13. -12.\n", 232 | " -11. -10. -9. -8. -7. -6. -5. -4. -3. -2. -12. -11. -10. -9.\n", 233 | " -8. -7. -6. -5. -4. -3. -2. -1. -13. -12. -11. -10. -9. -8.\n", 234 | " -7. -6. -5. -4. -3. 0.]\n", 235 | "最优动作价值 = [[ -15. -14. -14. -15.]\n", 236 | " [ -14. -13. -13. -15.]\n", 237 | " [ -13. -12. -12. -14.]\n", 238 | " [ -12. -11. -11. -13.]\n", 239 | " [ -11. -10. -10. -12.]\n", 240 | " [ -10. -9. -9. -11.]\n", 241 | " [ -9. -8. -8. -10.]\n", 242 | " [ -8. -7. -7. -9.]\n", 243 | " [ -7. -6. -6. -8.]\n", 244 | " [ -6. -5. -5. -7.]\n", 245 | " [ -5. -4. -4. -6.]\n", 246 | " [ -4. -4. -3. -5.]\n", 247 | " [ -15. -13. -13. -14.]\n", 248 | " [ -14. -12. -12. -14.]\n", 249 | " [ -13. -11. -11. -13.]\n", 250 | " [ -12. -10. -10. -12.]\n", 251 | " [ -11. -9. -9. -11.]\n", 252 | " [ -10. -8. -8. -10.]\n", 253 | " [ -9. -7. -7. -9.]\n", 254 | " [ -8. -6. -6. -8.]\n", 255 | " [ -7. -5. -5. -7.]\n", 256 | " [ -6. -4. -4. -6.]\n", 257 | " [ -5. -3. -3. -5.]\n", 258 | " [ -4. -3. -2. -4.]\n", 259 | " [ -14. -12. -14. -13.]\n", 260 | " [ -13. -11. -113. -13.]\n", 261 | " [ -12. -10. -113. -12.]\n", 262 | " [ -11. -9. -113. -11.]\n", 263 | " [ -10. -8. -113. -10.]\n", 264 | " [ -9. -7. -113. -9.]\n", 265 | " [ -8. -6. -113. -8.]\n", 266 | " [ -7. -5. -113. -7.]\n", 267 | " [ -6. -4. -113. -6.]\n", 268 | " [ -5. -3. -113. -5.]\n", 269 | " [ -4. -2. -113. -4.]\n", 270 | " [ -3. -2. -1. -3.]\n", 271 | " [ -13. -113. -14. -14.]\n", 272 | " [ -12. -113. -113. -14.]\n", 273 | " [ -11. -113. -113. -113.]\n", 274 | " [ -10. -113. -113. -113.]\n", 275 | " [ -9. -113. -113. -113.]\n", 276 | " [ -8. -113. -113. -113.]\n", 277 | " [ -7. -113. -113. -113.]\n", 278 | " [ -6. -113. -113. -113.]\n", 279 | " [ -5. -113. -113. -113.]\n", 280 | " [ -4. -113. -113. -113.]\n", 281 | " [ -3. -1. -113. -113.]\n", 282 | " [ 0. 0. 0. 0.]]\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "# 评估最优策略\n", 288 | "\n", 289 | "optimal_state_values, optimal_action_values = evaluate_bellman(env, optimal_policy)\n", 290 | "print('最优状态价值 = {}'.format(optimal_state_values))\n", 291 | "print('最优动作价值 = {}'.format(optimal_action_values))" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 8, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "name": "stdout", 301 | "output_type": "stream", 302 | "text": [ 303 | "最优状态价值 = [-1.40000000e+01 -1.30000000e+01 -1.20000000e+01 -1.10000000e+01\n", 304 | " -1.00000000e+01 -9.00000000e+00 -8.00000000e+00 -7.00000000e+00\n", 305 | " -6.00000000e+00 -5.00000000e+00 -4.00000000e+00 -3.00000000e+00\n", 306 | " -1.30000000e+01 -1.20000000e+01 -1.10000000e+01 -1.00000000e+01\n", 307 | " -9.00000000e+00 -8.00000000e+00 -7.00000000e+00 -6.00000000e+00\n", 308 | " -5.00000000e+00 -4.00000000e+00 -3.00000000e+00 -2.00000000e+00\n", 309 | " -1.20000000e+01 -1.10000000e+01 -1.00000000e+01 -9.00000000e+00\n", 310 | " -8.00000000e+00 -7.00000000e+00 -6.00000000e+00 -5.00000000e+00\n", 311 | " -4.00000000e+00 -3.00000000e+00 -2.00000000e+00 -1.00000000e+00\n", 312 | " -1.30000000e+01 -1.20000000e+01 -1.10000000e+01 -1.00000000e+01\n", 313 | " -9.00000000e+00 -8.00000000e+00 -7.00000000e+00 -6.00000000e+00\n", 314 | " -5.00000000e+00 -4.00000000e+00 -9.99999999e-01 1.82270928e-11]\n", 315 | "最优动作价值 = [[ -14.99999999 -13.99999999 -13.99999999 -14.99999999]\n", 316 | " [ -13.99999999 -13. -13. -14.99999999]\n", 317 | " [ -13. -12. -12. -13.99999999]\n", 318 | " [ -12. -11. -11. -13. ]\n", 319 | " [ -11. -10. -10. -12. ]\n", 320 | " [ -10. -9. -9. -11. ]\n", 321 | " [ -9. -8. -8. -10. ]\n", 322 | " [ -8. -7. -7. -9. ]\n", 323 | " [ -7. -6. -6. -8. ]\n", 324 | " [ -6. -5. -5. -7. ]\n", 325 | " [ -5. -4. -4. -6. ]\n", 326 | " [ -4. -4. -3. -5. ]\n", 327 | " [ -14.99999999 -13. -13. -13.99999999]\n", 328 | " [ -13.99999999 -12. -12. -13.99999999]\n", 329 | " [ -13. -11. -11. -13. ]\n", 330 | " [ -12. -10. -10. -12. ]\n", 331 | " [ -11. -9. -9. -11. ]\n", 332 | " [ -10. -8. -8. -10. ]\n", 333 | " [ -9. -7. -7. -9. ]\n", 334 | " [ -8. -6. -6. -8. ]\n", 335 | " [ -7. -5. -5. -7. ]\n", 336 | " [ -6. -4. -4. -6. ]\n", 337 | " [ -5. -3. -3. -5. ]\n", 338 | " [ -4. -3. -2. -4. ]\n", 339 | " [ -13.99999999 -12. -14. -13. ]\n", 340 | " [ -13. -11. -113. -13. ]\n", 341 | " [ -12. -10. -113. -12. ]\n", 342 | " [ -11. -9. -113. -11. ]\n", 343 | " [ -10. -8. -113. -10. ]\n", 344 | " [ -9. -7. -113. -9. ]\n", 345 | " [ -8. -6. -113. -8. ]\n", 346 | " [ -7. -5. -113. -7. ]\n", 347 | " [ -6. -4. -113. -6. ]\n", 348 | " [ -5. -3. -113. -5. ]\n", 349 | " [ -4. -2. -113. -4. ]\n", 350 | " [ -3. -2. -1. -3. ]\n", 351 | " [ -13. -113. -14. -14. ]\n", 352 | " [ -12. -113. -113. -14. ]\n", 353 | " [ -11. -113. -113. -113. ]\n", 354 | " [ -10. -113. -113. -113. ]\n", 355 | " [ -9. -113. -113. -113. ]\n", 356 | " [ -8. -113. -113. -113. ]\n", 357 | " [ -7. -113. -113. -113. ]\n", 358 | " [ -6. -113. -113. -113. ]\n", 359 | " [ -5. -113. -113. -113. ]\n", 360 | " [ -4. -113. -113. -113. ]\n", 361 | " [ -3. -1. -113. -113. ]\n", 362 | " [ 0. 0. 0. 0. ]]\n" 363 | ] 364 | } 365 | ], 366 | "source": [ 367 | "# 用线性规划求解Bellman最优方程\n", 368 | "\n", 369 | "def optimal_bellman(env, gamma=1.0):\n", 370 | " p = np.zeros((env.nS, env.nA, env.nS))\n", 371 | " r = np.zeros((env.nS, env.nA))\n", 372 | " for state in range(env.nS - 1):\n", 373 | " for action in range(env.nA):\n", 374 | " for prob, next_state, reward, done in env.P[state][action]:\n", 375 | " p[state, action, next_state] += prob\n", 376 | " r[state, action] += reward * prob\n", 377 | " \n", 378 | " c = np.ones((env.nS))\n", 379 | " a_ub = gamma * p.reshape(-1, env.nS) - np.repeat(np.eye(env.nS), env.nA, axis=0)\n", 380 | " b_ub = -r.reshape(-1)\n", 381 | " \n", 382 | " bounds = [(None, None),] * env.nS\n", 383 | " res = scipy.optimize.linprog(c, a_ub, b_ub, bounds=bounds, method='interior-point')\n", 384 | " v = res.x\n", 385 | " q = r + gamma * np.dot(p, v)\n", 386 | " return v, q\n", 387 | "\n", 388 | "optimal_state_values, optimal_action_values = optimal_bellman(env)\n", 389 | "print('最优状态价值 = {}'.format(optimal_state_values))\n", 390 | "print('最优动作价值 = {}'.format(optimal_action_values))" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 9, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "最优策略 = \n", 403 | "[[2 1 1 1 1 1 1 1 1 1 1 2]\n", 404 | " [1 1 1 1 1 1 1 1 1 1 1 2]\n", 405 | " [1 1 1 1 1 1 1 1 1 1 1 2]\n", 406 | " [0 0 0 0 0 0 0 0 0 0 1 0]]\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "# 用最优动作价值确定最优确定性策略\n", 412 | "\n", 413 | "optimal_actions = optimal_action_values.argmax(axis=1)\n", 414 | "print('最优策略 = \\n{}'.format(optimal_actions.reshape(env.shape)))" 415 | ] 416 | } 417 | ], 418 | "metadata": { 419 | "kernelspec": { 420 | "display_name": "Python 3", 421 | "language": "python", 422 | "name": "python3" 423 | }, 424 | "language_info": { 425 | "codemirror_mode": { 426 | "name": "ipython", 427 | "version": 3 428 | }, 429 | "file_extension": ".py", 430 | "mimetype": "text/x-python", 431 | "name": "python", 432 | "nbconvert_exporter": "python", 433 | "pygments_lexer": "ipython3", 434 | "version": "3.7.6" 435 | } 436 | }, 437 | "nbformat": 4, 438 | "nbformat_minor": 4 439 | } 440 | -------------------------------------------------------------------------------- /Chapter3-有模型数值迭代/3.5-案例:冰面滑行.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gym\n", 10 | "import numpy as np" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": {}, 17 | "outputs": [ 18 | { 19 | "name": "stdout", 20 | "output_type": "stream", 21 | "text": [ 22 | "观测空间:Discrete(16)\n", 23 | "动作空间:Discrete(4)\n" 24 | ] 25 | }, 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "{0: {0: [(0.3333333333333333, 0, 0.0, False),\n", 30 | " (0.3333333333333333, 0, 0.0, False),\n", 31 | " (0.3333333333333333, 4, 0.0, False)],\n", 32 | " 1: [(0.3333333333333333, 0, 0.0, False),\n", 33 | " (0.3333333333333333, 4, 0.0, False),\n", 34 | " (0.3333333333333333, 1, 0.0, False)],\n", 35 | " 2: [(0.3333333333333333, 4, 0.0, False),\n", 36 | " (0.3333333333333333, 1, 0.0, False),\n", 37 | " (0.3333333333333333, 0, 0.0, False)],\n", 38 | " 3: [(0.3333333333333333, 1, 0.0, False),\n", 39 | " (0.3333333333333333, 0, 0.0, False),\n", 40 | " (0.3333333333333333, 0, 0.0, False)]},\n", 41 | " 1: {0: [(0.3333333333333333, 1, 0.0, False),\n", 42 | " (0.3333333333333333, 0, 0.0, False),\n", 43 | " (0.3333333333333333, 5, 0.0, True)],\n", 44 | " 1: [(0.3333333333333333, 0, 0.0, False),\n", 45 | " (0.3333333333333333, 5, 0.0, True),\n", 46 | " (0.3333333333333333, 2, 0.0, False)],\n", 47 | " 2: [(0.3333333333333333, 5, 0.0, True),\n", 48 | " (0.3333333333333333, 2, 0.0, False),\n", 49 | " (0.3333333333333333, 1, 0.0, False)],\n", 50 | " 3: [(0.3333333333333333, 2, 0.0, False),\n", 51 | " (0.3333333333333333, 1, 0.0, False),\n", 52 | " (0.3333333333333333, 0, 0.0, False)]},\n", 53 | " 2: {0: [(0.3333333333333333, 2, 0.0, False),\n", 54 | " (0.3333333333333333, 1, 0.0, False),\n", 55 | " (0.3333333333333333, 6, 0.0, False)],\n", 56 | " 1: [(0.3333333333333333, 1, 0.0, False),\n", 57 | " (0.3333333333333333, 6, 0.0, False),\n", 58 | " (0.3333333333333333, 3, 0.0, False)],\n", 59 | " 2: [(0.3333333333333333, 6, 0.0, False),\n", 60 | " (0.3333333333333333, 3, 0.0, False),\n", 61 | " (0.3333333333333333, 2, 0.0, False)],\n", 62 | " 3: [(0.3333333333333333, 3, 0.0, False),\n", 63 | " (0.3333333333333333, 2, 0.0, False),\n", 64 | " (0.3333333333333333, 1, 0.0, False)]},\n", 65 | " 3: {0: [(0.3333333333333333, 3, 0.0, False),\n", 66 | " (0.3333333333333333, 2, 0.0, False),\n", 67 | " (0.3333333333333333, 7, 0.0, True)],\n", 68 | " 1: [(0.3333333333333333, 2, 0.0, False),\n", 69 | " (0.3333333333333333, 7, 0.0, True),\n", 70 | " (0.3333333333333333, 3, 0.0, False)],\n", 71 | " 2: [(0.3333333333333333, 7, 0.0, True),\n", 72 | " (0.3333333333333333, 3, 0.0, False),\n", 73 | " (0.3333333333333333, 3, 0.0, False)],\n", 74 | " 3: [(0.3333333333333333, 3, 0.0, False),\n", 75 | " (0.3333333333333333, 3, 0.0, False),\n", 76 | " (0.3333333333333333, 2, 0.0, False)]},\n", 77 | " 4: {0: [(0.3333333333333333, 0, 0.0, False),\n", 78 | " (0.3333333333333333, 4, 0.0, False),\n", 79 | " (0.3333333333333333, 8, 0.0, False)],\n", 80 | " 1: [(0.3333333333333333, 4, 0.0, False),\n", 81 | " (0.3333333333333333, 8, 0.0, False),\n", 82 | " (0.3333333333333333, 5, 0.0, True)],\n", 83 | " 2: [(0.3333333333333333, 8, 0.0, False),\n", 84 | " (0.3333333333333333, 5, 0.0, True),\n", 85 | " (0.3333333333333333, 0, 0.0, False)],\n", 86 | " 3: [(0.3333333333333333, 5, 0.0, True),\n", 87 | " (0.3333333333333333, 0, 0.0, False),\n", 88 | " (0.3333333333333333, 4, 0.0, False)]},\n", 89 | " 5: {0: [(1.0, 5, 0, True)],\n", 90 | " 1: [(1.0, 5, 0, True)],\n", 91 | " 2: [(1.0, 5, 0, True)],\n", 92 | " 3: [(1.0, 5, 0, True)]},\n", 93 | " 6: {0: [(0.3333333333333333, 2, 0.0, False),\n", 94 | " (0.3333333333333333, 5, 0.0, True),\n", 95 | " (0.3333333333333333, 10, 0.0, False)],\n", 96 | " 1: [(0.3333333333333333, 5, 0.0, True),\n", 97 | " (0.3333333333333333, 10, 0.0, False),\n", 98 | " (0.3333333333333333, 7, 0.0, True)],\n", 99 | " 2: [(0.3333333333333333, 10, 0.0, False),\n", 100 | " (0.3333333333333333, 7, 0.0, True),\n", 101 | " (0.3333333333333333, 2, 0.0, False)],\n", 102 | " 3: [(0.3333333333333333, 7, 0.0, True),\n", 103 | " (0.3333333333333333, 2, 0.0, False),\n", 104 | " (0.3333333333333333, 5, 0.0, True)]},\n", 105 | " 7: {0: [(1.0, 7, 0, True)],\n", 106 | " 1: [(1.0, 7, 0, True)],\n", 107 | " 2: [(1.0, 7, 0, True)],\n", 108 | " 3: [(1.0, 7, 0, True)]},\n", 109 | " 8: {0: [(0.3333333333333333, 4, 0.0, False),\n", 110 | " (0.3333333333333333, 8, 0.0, False),\n", 111 | " (0.3333333333333333, 12, 0.0, True)],\n", 112 | " 1: [(0.3333333333333333, 8, 0.0, False),\n", 113 | " (0.3333333333333333, 12, 0.0, True),\n", 114 | " (0.3333333333333333, 9, 0.0, False)],\n", 115 | " 2: [(0.3333333333333333, 12, 0.0, True),\n", 116 | " (0.3333333333333333, 9, 0.0, False),\n", 117 | " (0.3333333333333333, 4, 0.0, False)],\n", 118 | " 3: [(0.3333333333333333, 9, 0.0, False),\n", 119 | " (0.3333333333333333, 4, 0.0, False),\n", 120 | " (0.3333333333333333, 8, 0.0, False)]},\n", 121 | " 9: {0: [(0.3333333333333333, 5, 0.0, True),\n", 122 | " (0.3333333333333333, 8, 0.0, False),\n", 123 | " (0.3333333333333333, 13, 0.0, False)],\n", 124 | " 1: [(0.3333333333333333, 8, 0.0, False),\n", 125 | " (0.3333333333333333, 13, 0.0, False),\n", 126 | " (0.3333333333333333, 10, 0.0, False)],\n", 127 | " 2: [(0.3333333333333333, 13, 0.0, False),\n", 128 | " (0.3333333333333333, 10, 0.0, False),\n", 129 | " (0.3333333333333333, 5, 0.0, True)],\n", 130 | " 3: [(0.3333333333333333, 10, 0.0, False),\n", 131 | " (0.3333333333333333, 5, 0.0, True),\n", 132 | " (0.3333333333333333, 8, 0.0, False)]},\n", 133 | " 10: {0: [(0.3333333333333333, 6, 0.0, False),\n", 134 | " (0.3333333333333333, 9, 0.0, False),\n", 135 | " (0.3333333333333333, 14, 0.0, False)],\n", 136 | " 1: [(0.3333333333333333, 9, 0.0, False),\n", 137 | " (0.3333333333333333, 14, 0.0, False),\n", 138 | " (0.3333333333333333, 11, 0.0, True)],\n", 139 | " 2: [(0.3333333333333333, 14, 0.0, False),\n", 140 | " (0.3333333333333333, 11, 0.0, True),\n", 141 | " (0.3333333333333333, 6, 0.0, False)],\n", 142 | " 3: [(0.3333333333333333, 11, 0.0, True),\n", 143 | " (0.3333333333333333, 6, 0.0, False),\n", 144 | " (0.3333333333333333, 9, 0.0, False)]},\n", 145 | " 11: {0: [(1.0, 11, 0, True)],\n", 146 | " 1: [(1.0, 11, 0, True)],\n", 147 | " 2: [(1.0, 11, 0, True)],\n", 148 | " 3: [(1.0, 11, 0, True)]},\n", 149 | " 12: {0: [(1.0, 12, 0, True)],\n", 150 | " 1: [(1.0, 12, 0, True)],\n", 151 | " 2: [(1.0, 12, 0, True)],\n", 152 | " 3: [(1.0, 12, 0, True)]},\n", 153 | " 13: {0: [(0.3333333333333333, 9, 0.0, False),\n", 154 | " (0.3333333333333333, 12, 0.0, True),\n", 155 | " (0.3333333333333333, 13, 0.0, False)],\n", 156 | " 1: [(0.3333333333333333, 12, 0.0, True),\n", 157 | " (0.3333333333333333, 13, 0.0, False),\n", 158 | " (0.3333333333333333, 14, 0.0, False)],\n", 159 | " 2: [(0.3333333333333333, 13, 0.0, False),\n", 160 | " (0.3333333333333333, 14, 0.0, False),\n", 161 | " (0.3333333333333333, 9, 0.0, False)],\n", 162 | " 3: [(0.3333333333333333, 14, 0.0, False),\n", 163 | " (0.3333333333333333, 9, 0.0, False),\n", 164 | " (0.3333333333333333, 12, 0.0, True)]},\n", 165 | " 14: {0: [(0.3333333333333333, 10, 0.0, False),\n", 166 | " (0.3333333333333333, 13, 0.0, False),\n", 167 | " (0.3333333333333333, 14, 0.0, False)],\n", 168 | " 1: [(0.3333333333333333, 13, 0.0, False),\n", 169 | " (0.3333333333333333, 14, 0.0, False),\n", 170 | " (0.3333333333333333, 15, 1.0, True)],\n", 171 | " 2: [(0.3333333333333333, 14, 0.0, False),\n", 172 | " (0.3333333333333333, 15, 1.0, True),\n", 173 | " (0.3333333333333333, 10, 0.0, False)],\n", 174 | " 3: [(0.3333333333333333, 15, 1.0, True),\n", 175 | " (0.3333333333333333, 10, 0.0, False),\n", 176 | " (0.3333333333333333, 13, 0.0, False)]},\n", 177 | " 15: {0: [(1.0, 15, 0, True)],\n", 178 | " 1: [(1.0, 15, 0, True)],\n", 179 | " 2: [(1.0, 15, 0, True)],\n", 180 | " 3: [(1.0, 15, 0, True)]}}" 181 | ] 182 | }, 183 | "execution_count": 2, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "env = gym.make('FrozenLake-v0')\n", 190 | "env = env.unwrapped\n", 191 | "\n", 192 | "print('观测空间:{}'.format(env.observation_space))\n", 193 | "print('动作空间:{}'.format(env.action_space))\n", 194 | "env.P" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 3, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# 用策略执行一个回合\n", 204 | "\n", 205 | "def play_policy(env, policy, render=False):\n", 206 | " total_reward = 0\n", 207 | " observation = env.reset()\n", 208 | " while True:\n", 209 | " if render:\n", 210 | " env.render()\n", 211 | " \n", 212 | " action = np.random.choice(env.action_space.n, p=policy[observation])\n", 213 | " observation, reward, done, _ = env.step(action)\n", 214 | " total_reward += reward\n", 215 | " if done:\n", 216 | " break\n", 217 | " \n", 218 | " return total_reward" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 4, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | "随机策略 平均奖励 = 0.0\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "# 求随机策略的期望奖励\n", 236 | "\n", 237 | "random_policy = np.ones((env.nS, env.nA)) / env.nA\n", 238 | "episode_rewards = [play_policy(env, random_policy) for _ in range(100)]\n", 239 | "print('随机策略 平均奖励 = {}'.format(np.mean(episode_rewards)))" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 5, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "# 策略评估的实现\n", 249 | "\n", 250 | "# 根据状态价值函数计算动作价值函数\n", 251 | "def v2q(env, v, s=None, gamma=1.0):\n", 252 | " # 针对单个状态求解\n", 253 | " if s is not None:\n", 254 | " q = np.zeros(env.nA)\n", 255 | " for a in range(env.nA):\n", 256 | " for prob, next_state, reward, done in env.P[s][a]:\n", 257 | " q[a] += prob * (reward + gamma * v[next_state] * (1.0 - done))\n", 258 | " else: # 针对所有状态求解\n", 259 | " q = np.zeros((env.nS, env.nA))\n", 260 | " for s in range(env.nS):\n", 261 | " q[s] = v2q(env, v, s, gamma)\n", 262 | " return q\n", 263 | "\n", 264 | "def evaluate_policy(env, policy, gamma=1.0, tolerant=1e-6):\n", 265 | " v = np.zeros(env.nS)\n", 266 | " while True:\n", 267 | " delta = 0\n", 268 | " for s in range(env.nS):\n", 269 | " vs = sum(policy[s] * v2q(env, v, s, gamma))\n", 270 | " delta = max(delta, abs(v[s] - vs))\n", 271 | " v[s] = vs\n", 272 | " if delta < tolerant:\n", 273 | " break\n", 274 | " return v" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 6, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "name": "stdout", 284 | "output_type": "stream", 285 | "text": [ 286 | "状态价值函数:\n", 287 | "[[0.0139372 0.01162942 0.02095187 0.01047569]\n", 288 | " [0.01624741 0. 0.04075119 0. ]\n", 289 | " [0.03480561 0.08816967 0.14205297 0. ]\n", 290 | " [0. 0.17582021 0.43929104 0. ]]\n", 291 | "动作状态价值:\n", 292 | "[[0.01470727 0.01393801 0.01393801 0.01316794]\n", 293 | " [0.00852221 0.01162969 0.01086043 0.01550616]\n", 294 | " [0.02444416 0.0209521 0.02405958 0.01435233]\n", 295 | " [0.01047585 0.01047585 0.00698379 0.01396775]\n", 296 | " [0.02166341 0.01701767 0.0162476 0.01006154]\n", 297 | " [0. 0. 0. 0. ]\n", 298 | " [0.05433495 0.04735099 0.05433495 0.00698396]\n", 299 | " [0. 0. 0. 0. ]\n", 300 | " [0.01701767 0.04099176 0.03480569 0.04640756]\n", 301 | " [0.0702086 0.11755959 0.10595772 0.05895286]\n", 302 | " [0.18940397 0.17582024 0.16001408 0.04297362]\n", 303 | " [0. 0. 0. 0. ]\n", 304 | " [0. 0. 0. 0. ]\n", 305 | " [0.08799662 0.20503708 0.23442697 0.17582024]\n", 306 | " [0.25238807 0.53837042 0.52711467 0.43929106]\n", 307 | " [0. 0. 0. 0. ]]\n" 308 | ] 309 | } 310 | ], 311 | "source": [ 312 | "# 对随机策略进行策略评估\n", 313 | "\n", 314 | "print('状态价值函数:')\n", 315 | "v_random = evaluate_policy(env, random_policy)\n", 316 | "print(v_random.reshape(4, 4))\n", 317 | "\n", 318 | "print('动作状态价值:')\n", 319 | "q_random = v2q(env, v_random)\n", 320 | "print(q_random)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 7, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# 策略改进的实现\n", 330 | "\n", 331 | "def improve_policy(env, v, policy, gamma=1.0):\n", 332 | " optimal = True\n", 333 | " for s in range(env.nS):\n", 334 | " q = v2q(env, v, s, gamma)\n", 335 | " a = np.argmax(q)\n", 336 | " if policy[s][a] != 1.0:\n", 337 | " optimal = False\n", 338 | " policy[s] = 0.0\n", 339 | " policy[s][a] = 1.0\n", 340 | " return optimal" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 8, 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "name": "stdout", 350 | "output_type": "stream", 351 | "text": [ 352 | "有更新,更新后的策略为:\n", 353 | "[[1. 0. 0. 0.]\n", 354 | " [0. 0. 0. 1.]\n", 355 | " [1. 0. 0. 0.]\n", 356 | " [0. 0. 0. 1.]\n", 357 | " [1. 0. 0. 0.]\n", 358 | " [1. 0. 0. 0.]\n", 359 | " [1. 0. 0. 0.]\n", 360 | " [1. 0. 0. 0.]\n", 361 | " [0. 0. 0. 1.]\n", 362 | " [0. 1. 0. 0.]\n", 363 | " [1. 0. 0. 0.]\n", 364 | " [1. 0. 0. 0.]\n", 365 | " [1. 0. 0. 0.]\n", 366 | " [0. 0. 1. 0.]\n", 367 | " [0. 1. 0. 0.]\n", 368 | " [1. 0. 0. 0.]]\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "# 对随机策略进行策略改进\n", 374 | "\n", 375 | "policy = random_policy.copy()\n", 376 | "optimal = improve_policy(env, v_random, policy)\n", 377 | "if optimal:\n", 378 | " print('无更新,最优策略为:')\n", 379 | "else:\n", 380 | " print('有更新,更新后的策略为:')\n", 381 | "print(policy)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 9, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "# 策略迭代的实现\n", 391 | "\n", 392 | "def iterate_policy(env, gamma=1.0, tolerant=1e-6):\n", 393 | " policy = np.ones((env.nS, env.nA)) / env.nA\n", 394 | " while True:\n", 395 | " v = evaluate_policy(env, policy, gamma, tolerant)\n", 396 | " if improve_policy(env, v, policy):\n", 397 | " break\n", 398 | " return policy, v" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 10, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "name": "stdout", 408 | "output_type": "stream", 409 | "text": [ 410 | "状态价值函数 =\n", 411 | "[[0.82351246 0.82350689 0.82350303 0.82350106]\n", 412 | " [0.82351416 0. 0.5294002 0. ]\n", 413 | " [0.82351683 0.82352026 0.76469786 0. ]\n", 414 | " [0. 0.88234658 0.94117323 0. ]]\n", 415 | "最优策略 =\n", 416 | "[[0 3 3 3]\n", 417 | " [0 0 0 0]\n", 418 | " [3 1 0 0]\n", 419 | " [0 2 1 0]]\n" 420 | ] 421 | } 422 | ], 423 | "source": [ 424 | "# 利用策略迭代求解最优策略\n", 425 | "\n", 426 | "policy_pi, v_pi = iterate_policy(env)\n", 427 | "print('状态价值函数 =')\n", 428 | "print(v_pi.reshape(4, 4))\n", 429 | "print('最优策略 =')\n", 430 | "print(np.argmax(policy_pi, axis=1).reshape(4, 4))" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 11, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "# 价值迭代的实现\n", 440 | "\n", 441 | "def iterate_value(env, gamma=1.0, tolerant=1e-6):\n", 442 | " v = np.zeros(env.nS)\n", 443 | " while True:\n", 444 | " delta = 0\n", 445 | " for s in range(env.nS):\n", 446 | " vmax = max(v2q(env, v, s, gamma))\n", 447 | " delta = max(delta, abs(v[s] - vmax))\n", 448 | " v[s] = vmax\n", 449 | " if delta < tolerant:\n", 450 | " break\n", 451 | " \n", 452 | " policy = np.zeros((env.nS, env.nA))\n", 453 | " for s in range(env.nS):\n", 454 | " a = np.argmax(v2q(env, v, s, gamma))\n", 455 | " policy[s][a] = 1.0\n", 456 | " return policy, v" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 12, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "name": "stdout", 466 | "output_type": "stream", 467 | "text": [ 468 | "状态价值函数 =\n", 469 | "[[0.82351232 0.82350671 0.82350281 0.82350083]\n", 470 | " [0.82351404 0. 0.52940011 0. ]\n", 471 | " [0.82351673 0.82352018 0.76469779 0. ]\n", 472 | " [0. 0.88234653 0.94117321 0. ]]\n", 473 | "最优策略 =\n", 474 | "[[0 3 3 3]\n", 475 | " [0 0 0 0]\n", 476 | " [3 1 0 0]\n", 477 | " [0 2 1 0]]\n", 478 | "价值迭代 平均奖励:0.0\n" 479 | ] 480 | } 481 | ], 482 | "source": [ 483 | "# 利用价值迭代算法求解最优策略\n", 484 | "\n", 485 | "policy_vi, v_vi = iterate_value(env)\n", 486 | "print('状态价值函数 =')\n", 487 | "print(v_vi.reshape((4, 4)))\n", 488 | "print('最优策略 =')\n", 489 | "print(np.argmax(policy_vi, axis=1).reshape(4, 4))\n", 490 | "episode_rewards = [play_policy(env, policy_vi) for _ in range(100)]\n", 491 | "print('价值迭代 平均奖励:{}'.format(np.mean(episode_rewards)))" 492 | ] 493 | } 494 | ], 495 | "metadata": { 496 | "kernelspec": { 497 | "display_name": "Python 3", 498 | "language": "python", 499 | "name": "python3" 500 | }, 501 | "language_info": { 502 | "codemirror_mode": { 503 | "name": "ipython", 504 | "version": 3 505 | }, 506 | "file_extension": ".py", 507 | "mimetype": "text/x-python", 508 | "name": "python", 509 | "nbconvert_exporter": "python", 510 | "pygments_lexer": "ipython3", 511 | "version": "3.7.6" 512 | } 513 | }, 514 | "nbformat": 4, 515 | "nbformat_minor": 4 516 | } 517 | -------------------------------------------------------------------------------- /Chapter4-回合更新价值迭代/4.3-案例:21点游戏.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import gym\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "from tqdm.notebook import tqdm\n", 14 | "\n", 15 | "np.random.seed(0)" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "观测空间: Tuple(Discrete(32), Discrete(11), Discrete(2))\n", 28 | "动作空间: Discrete(2)\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "env = gym.make('Blackjack-v0')\n", 34 | "env.seed(0)\n", 35 | "\n", 36 | "print('观测空间: {}'.format(env.observation_space))\n", 37 | "print('动作空间: {}'.format(env.action_space))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "观测 = (18, 1, False)\n", 50 | "玩家 = [10, 8], 庄家 = [1, 7]\n", 51 | "动作 = 0\n", 52 | "观测 = (18, 1, False), 奖励 = 0.0, 结束指示 = True\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "# 用随机策略玩一个回合\n", 58 | "\n", 59 | "observation = env.reset()\n", 60 | "print('观测 = {}'.format(observation))\n", 61 | "\n", 62 | "while True:\n", 63 | " print('玩家 = {}, 庄家 = {}'.format(env.player, env.dealer))\n", 64 | " action = np.random.choice(env.action_space.n)\n", 65 | " print('动作 = {}'.format(action))\n", 66 | " observation, reward, done, _ = env.step(action)\n", 67 | " print('观测 = {}, 奖励 = {}, 结束指示 = {}'.format(observation, reward, done))\n", 68 | " \n", 69 | " if done:\n", 70 | " break" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "# 从观测到状态\n", 80 | "\n", 81 | "def ob2state(observation):\n", 82 | " return (observation[0], observation[1], int(observation[2]))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# 同策回合更新策略评估\n", 92 | "\n", 93 | "def evaluate_action_monte_carlo(env, policy, episode_num=500000):\n", 94 | " q = np.zeros_like(policy)\n", 95 | " c = np.zeros_like(policy)\n", 96 | " \n", 97 | " for _ in tqdm(range(episode_num)):\n", 98 | " state_actions = []\n", 99 | " observation = env.reset()\n", 100 | " while True:\n", 101 | " state = ob2state(observation)\n", 102 | " action = np.random.choice(env.action_space.n, p=policy[state])\n", 103 | " state_actions.append((state, action))\n", 104 | " observation, reward, done, _ = env.step(action)\n", 105 | " \n", 106 | " if done:\n", 107 | " break\n", 108 | " \n", 109 | " g = reward\n", 110 | " for state, action in state_actions:\n", 111 | " c[state][action] += 1\n", 112 | " q[state][action] += (g - q[state][action]) / c[state][action]\n", 113 | " return q" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 6, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "application/vnd.jupyter.widget-view+json": { 124 | "model_id": "55843890a9a04fe6b676c26cf69065f1", 125 | "version_major": 2, 126 | "version_minor": 0 127 | }, 128 | "text/plain": [ 129 | "HBox(children=(FloatProgress(value=0.0, max=500000.0), HTML(value='')))" 130 | ] 131 | }, 132 | "metadata": {}, 133 | "output_type": "display_data" 134 | }, 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "policy = np.zeros((22, 11, 2, 2))\n", 145 | "policy[20:, :, :, 0] = 1 # >=20时不再要牌\n", 146 | "policy[:20, :, :, 1] = 1 # <20时再要牌\n", 147 | "q = evaluate_action_monte_carlo(env, policy)\n", 148 | "v = (q * policy).sum(axis=-1)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 7, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "# 绘制最后一维的指标为0或1的3维数组\n", 158 | "def plot(data):\n", 159 | " fig, axes = plt.subplots(1, 2, figsize=(9, 4))\n", 160 | " titles = ['without ace', 'with ace']\n", 161 | " have_aces = [0, 1]\n", 162 | " extent = [12, 22, 1, 11]\n", 163 | " \n", 164 | " for title, have_ace, axis in zip(titles, have_aces, axes):\n", 165 | " dat = data[extent[0]:extent[1], extent[2]:extent[3], have_ace].T\n", 166 | " axis.imshow(dat, extent=extent, origin='lower')\n", 167 | " axis.set_xlabel('player sum')\n", 168 | " axis.set_ylabel('dealer showing')\n", 169 | " axis.set_title(title)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 8, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAeFklEQVR4nO3deZhkB1nv8e9vemYySzIZyIIhCQxggoAXQxyQGNYAEZBFvS54yb0QkLjwCOKCgIjiwxUBRVGfC+beSCLkgixBQ0ANiwmBC8EQkhCICMRAliH7HjLTPf3eP+qMdnqb6plz+nTVfD/P009Xnap+z1s9dX7z9qlTdVJVSJIkLbdVfTcgSZL2TQ4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4h+6AkdyV56CK3X5Xk6cvZk6TxZN5oMQ4h+6Cq2r+qrgRIcnqSNy3HepO8OMlnl2NdklaGvvJGo8EhRJIk9cIhZEwkOTnJR2dc/2aSD8y4fnWSY5rLleT7k5wCvBB4dbPL9KMzSh6T5LIktyf52yTrZtR6WVP/liRnJ3lgs3xLU3v1jPuel+QXkjwCeBdwXLOu2xZ5HFckuTPJlUl+cdbtz09ySZI7knwryTOb5QcmOS3JtiTXJnlTkom9+JVKWsBy5s2s9T4syaeT3JzkpiRnJtk84/Yjk5yV5MbmPn8547aXNNlya5J/SvLgFn8l2kMOIePjfOCJSVYlOQxYAxwP0Lweuz9w2cwfqKpTgTOBtza7TJ874+afBZ4JPAR4NPDiptYJwJub2w8Dvg28f3fNVdUVwC8Bn2/WtXmBu94APAfYBJwM/GmSY5t1Pw74G+C3gM3Ak4Crmp87A5gCvh94DHAi8Au760vSHlmWvJlHGOTPA4FHAEcCv9+sdwI4h0EmbQEOp8mmJD8BvA74KeAQ4ALgfXv42NWi1bu/i0ZBVV2Z5E7gGOBo4J8Y/HXxA8BxwAVVNb2Ekn9eVdcBNH+xHNMsfyHw11V1cXPba4Fbk2xp6XF8bMbV85OcCzwRuBh4abPuTzS3X9v08ADgWcDmqvoecHeSPwVOAf6qjb4k/adlzJvZ6/0m8M3m6o1J3g78XnP9cQyGk9+qqqlm2a5j0H4ReHPzxxBJ/hB4XZIHV9W3l9CnWuYQMl7OB57CYG/A+cBtwJMZhML5S6z13RmX72GwcdN8v3jXDVV1V5KbGfzVce0edT1DkmcxCJWjGeyp2wB8pbn5SODj8/zYgxn8JbYtya5lq4Cr97YfSQtajry5jySHAn/O4A+TAxhs57c2Nx8JfHvGADLTg4F3JPmTmeUY5JZDSI98OWa87AqFJzaXz2cQCk9m4VBY6mmUr2OwQQOQZCNwEIMB5O5m8YYZ9/++YdeVZD/gw8AfAw9oXrL5OIOwgMFQ8bB5fvRqYDtwcFVtbr42VdWjhn5UkpZqOfJmtjc3NR5dVZuAk7hvPjxo5jFpM1wN/OKMfNhcVeur6v/tZT/aSw4h4+V84KnA+qq6hsHrns9kMCR8eYGfuR5Y8D388/i/wMlJjmmGhj8ELqyqq6rqRgbDyElJJpK8hPsODdcDRyRZu0DttcB+wI3AVLNX5MQZt5/WrPtpzWvRhyf5garaBpwL/EmSTc1tD0vy5CU8LklLsxx5M9sBwF3AbUkOZ3B82C5fBLYBf5RkY5J1SY5vbnsX8Nokj4L/OJD9Z/aiD7XEIWSMVNW/MdhAL2iu3wFcCXyuqnYu8GOnAY9McluSvxtiHZ8CfpfBHottDIaMF8y4y8sYBMPNwKOAmX9pfBr4KvDdJDfNU/tO4BXABxjsYv1vwNkzbv8izcGqwO0MQnDXXpn/wWCI+Vrzsx9icOCspA4sR97M443AsQy2/48BZ83oZyfwXAYvD30HuAb4uea2jwBvAd6f5A7gcgbHkalnqdrbvWOSJElL554QSZLUC4cQSZLUC4cQSZLUC4cQSZLUi5H4sLK1qzfU+jUHtl+4q4Nyu6jb1fHDS/pQw6XU7aZsV/9mnRygfVQ3m9dD1926+zst0dVXT3HzLdPZ/T1H19o1G2vduoXOFrDyZOcIvWmgqyzd2VE+dRVQ0+3XnXpoNzly1PqbW6+5JzkyEkPI+jUHctxDT26/8OR8H6y397J9sv2i091sjHXvvZ3UZedC79DbO9XRv1lNdVD3rw5uvybw3qM+sPs7LdGJz57zjumxs27dZh77mF9pvW46+v9s4vaOts0OpKssvft7ndSli+2dbvL0lj8/pPWaAOf8lzNar7knOeLLMZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRedDSFJ/jrJDUkun7Hs/kk+keQbzff7dbV+SaPPHJHGW5d7Qk4Hnjlr2WuAT1XVUcCnmuuStJDTMUeksdXZEFJVnwFumbX4+cAZzeUzgJ/oav2SRp85Io231cu8vgdU1TaAqtqW5NCF7pjkFOAUgHVrD2R6w9rWm8lURw8/ab/kjsnWawJk/fpO6tZkR/1OTHRSl+9V6yVf+aBPtl4T4OCJja3XXD3n//kVbY9yZL/1m9l+UPs5su6mHa3XBNh54LrWa67asbP1mp3a0P7vAIDp6U7KZs2a1mu+7qiPt14TVk6OrNgDU6vq1KraWlVb16ze0Hc7kkbQzBxZvV/7oStp7yz3EHJ9ksMAmu83LPP6JY0+c0QaE8s9hJwNvKi5/CLg75d5/ZJGnzkijYku36L7PuDzwMOTXJPkpcAfAc9I8g3gGc11SZqXOSKNt84OTK2qn1/gpqd1tU5J48Uckcbbij0wVZIkjTeHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1IvVfTcwtOn2S2ZyZ/tFAZLWS9aabv6pMj3ZTd0OfgcANV2d1O3C/Sfu6qTuzppovWYxOr/XPVUT4d7N7f/uYG0HNWHNne3n08Sd97ZeE2Dnpv06qTtxRydlyY5uco/V7T+/JtLBf37Azmq/7p7kiHtCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSL3oZQpK8KslXk1ye5H1J1vXRh6TRZY5Io2/Zh5AkhwOvALZW1Q8CE8ALlrsPSaPLHJHGQ18vx6wG1idZDWwAruupD0mjyxyRRtzq5V5hVV2b5I+B7wDfA86tqnN3+4OjdPTKRPvNZmpn6zUBWJVOytbO6U7qsrqjp+zUVOsl7601rdcEmKaj58II2ZMcmdhRbPr29tZ72bluovWaAJmu1mtObermFavVd+3opG5nOshogJpuv+4E7T8PAKY7qrtUfbwccz/g+cBDgAcCG5OcNM/9TklyUZKLJqfuWe42Ja1ge5IjO3bcvdxtStqNPvYvPB3496q6saomgbOAH519p6o6taq2VtXWNas3LHuTkla0JefI2rUbl71JSYvrYwj5DvD4JBuSBHgacEUPfUgaXeaINAaWfQipqguBDwEXA19pejh1ufuQNLrMEWk8LPuBqQBV9XvA7/WxbknjwRyRRt8ovedEkiSNEYcQSZLUi92+HJPkp+ZZfDvwlaq6of2WJI0bc0TSfIY5JuSlwHHAPzfXnwJ8ATg6yR9U1Xs66k3S+DBHJM0xzBAyDTyiqq4HSPIA4J3AjwCfAQwPSbtjjkiaY5hjQrbsCo7GDcDRVXULMNlNW5LGjDkiaY5h9oRckOQc4IPN9f8KfCbJRuC2zjqTNE7MEUlzDDOEvJxBYBwPBPgb4MNVVcBTO+xN0vgwRyTNsdshpAmJDzVfkrRk5oik+ez2mJAkP5XkG0luT3JHkjuT3LEczUkaD+aIpPkM83LMW4HnVpUnh5K0p8wRSXMM8+6Y6w0OSXvJHJE0xzB7Qi5K8rfA3wHbdy2sqrM660rSuDFHJM0xzBCyCbgHOHHGsgIMD0nDMkckzTHMu2NOXo5GJI0vc0TSfBYcQpK8uqremuQvGPzFch9V9YpOO5M08swRSYtZbE/IroPILlqORhaTgmzf2X7hmpOJ7Zicar/m9HT7NYG6d/vu77SSTO7opu7O9p9f103er/WaAPesva71mtNz54O2rJgcqcDU+onW6665p4PtHciO9rf5dJV5HeVTusqnW2/vpGzWrm295rd2HNp6TYAnrPtG6zX3JEcWHEKq6qPNxQuq6so9bUrSvssckbSYYQ5MPT3J4cC/MDjb5QVV9ZVu25I0ZswRSXMMc2Dqk5KsBR4LPAX4WJL9q+r+XTcnaTyYI5Lms9shJMkTgCc2X5uBc4ALOu5L0hgxRyTNZ5iXY85ncFDZm4GPV1VHRwZKGmPmiKQ5hhlCDmJw+u0nAa9IMg18vqp+t9POJI0Tc0TSHMMcE3JbkiuBI4EjgB8F1nTdmKTxYY5Ims8wx4R8C/g68FngXcDJ7kqVtBTmiKT5DPNyzFFV1c0n0UjaV5gjkuZYNcR9HpjkI0luSHJ9kg8nOaLzziSNE3NE0hzDDCHvBs4GHggcDny0WSZJwzJHJM0xzBBySFW9u6qmmq/TgUM67kvSeDFHJM0xzBByU5KTkkw0XycBN3fdmKSxYo5ImmOYIeQlwM8C3wW2AT/dLJOkYZkjkuYY5nNCvgM8bxl6kTSmzBFJ8xnmc0IOAV4GbJl5/6ryrxhJQzFHJM1nmM8J+XsGJ5r6JLCz23YkjSlzRNIcwwwhG6rqtzvvRNI4M0ckzTHMgannJHl2mytNsjnJh5L8a5IrkhzXZn1JK445ImmOBfeEJLkTKCDA65JsByab61VVm/Zive8A/rGqfjrJWmDDXtSStEKZI5IWs+AQUlUHdLHCJJsYnM77xc16dgCeyEoaQ+aIpMUM8+6Y44FLquru5gOGjgX+rHnL3Z54KHAj8O4kPwR8CXhlVd09a72nAKcArFuzabgXjpZqsjooCtnZwXm6uqgJMN3N76Az6eKJALWz/WMlb965f+s1Aaar/X+zrp8FKyFH9lu/ebD/pWXTE908J9dMTrZec9Ud32u9JgCrJzopm8mpTurWQffrpu4tt7Ve866d61qvCbCz861+OMNsPe8E7mk29FcD3wbesxfrXM0ggN5ZVY8B7gZeM/tOVXVqVW2tqq1rV2/ci9VJWgF6z5E1a80RaaUZZgiZqqoCng+8o6reAezNLtZrgGuq6sLm+ocYhImk8WWOSJpjmCHkziSvBU4CPpZkAlizpyusqu8CVyd5eLPoacDX9rSepJFgjkiaY5gh5OeA7cBLmw3/cOBte7neXwXOTHIZcAzwh3tZT9LKZo5ImmOYc8d8F3j7jOvfAf5mb1ZaVZcAW/emhqTRYY5Imk83h3VLkiTthkOIJEnqxaJDSJKJJO9drmYkjR9zRNJCFh1CqmoncEjzkciStGTmiKSFDHMW3auAzyU5m8EHAgFQVW9f8Cck6b6uwhyRNMswQ8h1zdcq9u7DhSTtu8wRSXMM8xbdNwIk2Tj7vAySNAxzRNJ8dvvumCTHJfkacEVz/YeS/K/OO5M0NswRSfMZ5i26fwb8GHAzQFVdyuAU2pI0LHNE0hxDfU5IVV09a1H75z2XNNbMEUmzDXNg6tVJfhSo5i12r6DZpSpJQzJHJM0xzJ6QXwJezuCEU9cwOFHUy7tsStLYMUckzTHMu2NuAl64DL1IGlPmiKT5LDiEJPkLoBa6vape0UlHksaGOSJpMYvtCblo2bqQNK7MEUkLWnAIqaozlrMRSePHHJG0mN0eE5LkEOC3gUcC63Ytr6oTOuxrrlVDvZt4aSY6qAnUmmHedLQ0mero3Yyr0k3dqalOytaOHZ3UzcREJ3U1sCJypCDT7ZddNdlBUaA62DZrfTfnENy5oZu6E2vbz1KAVXfc00nduv/m1muuylWt1wSYrgVfJd1je1JxmP+Fz2TwVrqHAG9kcCKqf9mDdUnad5kjkuYYZgg5qKpOAyar6vyqegnw+I77kjRezBFJcwyzr2uy+b4tyY8zOBPmEd21JGkMmSOS5hhmCHlTkgOB3wD+AtgEvKrTriSNG3NE0hzDfFjZOc3F24GndtuOpHFkjkiaz26PCUlydJJPJbm8uf7oJK/vvjVJ48IckTSfYQ5M/d/Aa2le062qy4AXdNmUpLFjjkiaY5ghZENVfXHWsm4+BELSuDJHJM0xzBByU5KH0XwOSZKfBrZ12pWkcWOOSJpjmHfHvBw4FfiBJNcC/w6c1GlXksaNOSJpjmHeHXMl8PQkG4FVVXVn921JGifmiKT5LDiEJPn1BZYDUFVv76gnSWPCHJG0mMX2hBzQfH848Fjg7Ob6c4HPdNmUpLFhjkha0IJDSFW9ESDJucCxu3afJvl94IPL0p2kkWaOSFrMMO+OeRAw8/zpO4AtnXQjaVyZI5LmGObdMe8BvpjkIwzeXveTwBmddiVp3JgjkuYY5t0x/zPJPwBPbBadXFVf7rYtSePEHJE0n2H2hFBVFwMXt7niJBPARcC1VfWcNmtLWnnMEUmzDXNMSFdeCVzR4/oljT5zRBphvQwhSY4Afhz4P32sX9LoM0ek0dfXnpA/A14NTPe0fkmjzxyRRtxQx4S0KclzgBuq6ktJnrLI/U4BTgFYt2YTTHeQM1Xt1+yybgeyqps5tFZNdFK3M80neI6CnYzO86sre5Qjaw9k7W3b2+9lqpsZ6J4jNrRec/1132u9JsDE3e3/XgFW3X53J3W72t5zRze/3y6slBzpY0/I8cDzklwFvB84Icl7Z9+pqk6tqq1VtXXt6o3L3aOklW3JObJmjTkirTTLPoRU1Wur6oiq2gK8APh0VXk2TUlDM0ek8dDnu2MkSdI+bNmPCZmpqs4DzuuzB0mjzRyRRpd7QiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi9W993A0KY7qLmzi6KQqZ3tF52cbL8mUFWd1GVqqpOymZjopG510O9kddNrF8/ajp4FK0qtClMb17Red/Xd3Wyb+3/t5tZrTh62qfWaABO33dNJXTra3vnevZ2UrQ7+T7lp8oDWa8LKyRH3hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF4s+xCS5Mgk/5zkiiRfTfLK5e5B0mgzR6TxsLqHdU4Bv1FVFyc5APhSkk9U1dd66EXSaDJHpDGw7HtCqmpbVV3cXL4TuAI4fLn7kDS6zBFpPPSxJ+Q/JNkCPAa4cJ7bTgFOAVg3cQCrbr+r/QYmJ9uvCUzffU8ndTvR0e+gqrqpu317J3W78I4LTuyk7vbj17Re8+apz7Vec7kMnSOrN7HfNbe338DNt7ZfE+DQg1ovuebL32q9JsDOO+7opO6qjRs7qTt9992d1CVpveSHP3lc6zUB1p3YfvbfOHXekn+mtwNTk+wPfBj4taqa8wyuqlOramtVbV07sX75G5S04i0pR1ZvWP4GJS2qlyEkyRoGwXFmVZ3VRw+SRps5Io2+Pt4dE+A04Iqqevtyr1/S6DNHpPHQx56Q44H/DpyQ5JLm69k99CFpdJkj0hhY9gNTq+qzQPtH70jaZ5gj0njwE1MlSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvUlV997BbSe4Evt53H0M6GLip7yaWYJT6HaVeYbT6fXhVHdB3E10yRzo1Sv2OUq8wWv0uOUdWd9VJy75eVVv7bmIYSS4alV5htPodpV5htPpNclHfPSwDc6Qjo9TvKPUKo9XvnuSIL8dIkqReOIRIkqRejMoQcmrfDSzBKPUKo9XvKPUKo9XvKPW6p0bpMY5SrzBa/Y5SrzBa/S6515E4MFWSJI2fUdkTIkmSxoxDiCRJ6sWKG0KS/HWSG5JcPmPZ25L8a5LLknwkyeY+e9xlvl5n3PabSSrJwX30Np+F+k3yq0m+nuSrSd7aV38zLfA8OCbJF5JckuSiJI/rs8ddkhyZ5J+TXNH8Dl/ZLL9/kk8k+Ubz/X599wqL9rsit7M9YY50Y5QyBMyRLrWWI1W1or6AJwHHApfPWHYisLq5/BbgLX33uVCvzfIjgX8Cvg0c3Hefu/ndPhX4JLBfc/3QvvtcpNdzgWc1l58NnNd3n00vhwHHNpcPAP4NeCTwVuA1zfLXrKDn7UL9rsjtrMXnz4p8fKOUI6OUIYv0a4502++StrMVtyekqj4D3DJr2blVNdVc/QJwxLI3No/5em38KfBqYEUd9btAv78M/FFVbW/uc8OyNzaPBXotYFNz+UDgumVtagFVta2qLm4u3wlcARwOPB84o7nbGcBP9NPhfS3U70rdzvaEOdKNUcoQMEe61FaOrLghZAgvAf6h7yYWkuR5wLVVdWnfvQzpaOCJSS5Mcn6Sx/bd0CJ+DXhbkquBPwZe23M/cyTZAjwGuBB4QFVtg8EGCxzaX2fzm9XvTCt6O2vBin58I5Yjo5QhYI60bm9yZKSGkCS/A0wBZ/bdy3ySbAB+B3hD370swWrgfsDjgd8CPpAk/ba0oF8GXlVVRwKvAk7ruZ/7SLI/8GHg16rqjr772Z2F+l3p29neWumPbwRzZJQyBMyRVu1tjozMEJLkRcBzgBdW82LTCvQw4CHApUmuYrAb6uIk39drV4u7BjirBr4ITDM4YdJK9CLgrObyB4EVcUAZQJI1DDbEM6tqV4/XJzmsuf0wYMXspl6g31HZzvbYiDy+UcuRUcoQMEda00aOjMQQkuSZwG8Dz6uqe/ruZyFV9ZWqOrSqtlTVFgYb57FV9d2eW1vM3wEnACQ5GljLyj1j43XAk5vLJwDf6LGX/9D81XcacEVVvX3GTWczCDya73+/3L3NZ6F+R2U721Oj8vhGMEdGKUPAHGlFaznS9xG2s7+A9wHbgEkGG99LgW8CVwOXNF/v6rvPhXqddftVrJCj2hf53a4F3gtcDlwMnNB3n4v0+gTgS8ClDF57/OG++2x6fQKDg90um/EcfTZwEPApBiH3KeD+ffe6m35X5HbW4vNnRT6+UcqRUcqQRfo1R7rtd0nbmR/bLkmSejESL8dIkqTx4xAiSZJ64RAiSZJ64RAiSZJ64RAiSZJ64RAikpyXZGvffUgaXeaI9oRDiDqXZKLvHiSNNnNkPDmE7COSbEnyr0nOSHJZkg8156iYfb93JrkoyVeTvLFZ9rQkH5lxn2ckOau5fGKSzye5OMkHm/MIkOSqJG9I8lngZ2at42eSXJ7k0iSfaZa9OMlfzrjPOUme0ly+K8lbknwpySeTPK75q+vK5kRfkpaBOaK2OYTsWx4OnFpVjwbuAH5lnvv8TlVtBR4NPDnJo4FPA49Ickhzn5OBdyc5GHg98PSqOha4CPj1GbXuraonVNX7Z63jDcCPVdUPAcNs/BuB86rqh4E7gTcBzwB+EviDIX5eUnvMEbXGIWTfcnVVfa65/F4GH7s7288muRj4MvAo4JE1+Fjd9wAnJdkMHMfg9MyPBx4JfC7JJQzOa/DgGbX+doE+PgecnuRlwDC7WHcA/9hc/gpwflVNNpe3DPHzktpjjqg1q/tuQMtq9mf03+d6kocAvwk8tqpuTXI6sK65+d3AR4F7gQ9W1VRzAqNPVNXPL7C+u+dtouqXkvwI8OPAJUmOYXDK55lD8boZlyfrP88vMA1sb+pMJ/E5LC0vc0StcU/IvuVBSY5rLv888NlZt29isMHfnuQBwLN23VBV1zE4++TrgdObxV8Ajk/y/QBJNjRn0VxUkodV1YVV9QYGZ9s8ksFJuo5JsirJkayg02tLug9zRK1x+tu3XAG8KMlfMTgj4ztn3lhVlyb5MvBV4EoGuztnOhM4pKq+1tz/xiQvBt6XZL/mPq8H/m03fbwtyVFAGJwV8tJm+b8z2DW662ycklYec0St8Sy6+4gkW4BzquoH96LGXwJfrqrT2upL0ugwR9Q294RoKEm+xGAX62/03Yuk0WSOaDb3hEiSpF54YKokSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSerF/wdeBW++clechgAAAABJRU5ErkJggg==\n", 180 | "text/plain": [ 181 | "
" 182 | ] 183 | }, 184 | "metadata": { 185 | "needs_background": "light" 186 | }, 187 | "output_type": "display_data" 188 | } 189 | ], 190 | "source": [ 191 | "plot(v)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 9, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "# 带起始探索的同策回合更新\n", 201 | "\n", 202 | "def monte_carlo_with_exploring_start(env, episode_num=500000):\n", 203 | " policy = np.zeros((22, 11, 2, 2))\n", 204 | " policy[:, :, :, 1] = 1.\n", 205 | " q = np.zeros_like(policy)\n", 206 | " c = np.zeros_like(policy)\n", 207 | " \n", 208 | " for _ in tqdm(range(episode_num)):\n", 209 | " # 随机选择起始状态和起始动作\n", 210 | " state = (np.random.randint(12, 22),\n", 211 | " np.random.randint(1, 11),\n", 212 | " np.random.randint(2))\n", 213 | " action = np.random.randint(2)\n", 214 | " \n", 215 | " env.reset()\n", 216 | " if state[2]: # 有A\n", 217 | " env.player = [1, state[0] - 11]\n", 218 | " else: # 没有A\n", 219 | " if state[0] == 21:\n", 220 | " env.player = [10, 9, 2]\n", 221 | " else:\n", 222 | " env.player = [10, state[0] - 10]\n", 223 | " env.dealer[0] = state[1]\n", 224 | " \n", 225 | " state_actions = []\n", 226 | " while True:\n", 227 | " state_actions.append((state, action))\n", 228 | " observation, reward, done, _ = env.step(action)\n", 229 | " \n", 230 | " if done:\n", 231 | " break\n", 232 | " \n", 233 | " state = ob2state(observation)\n", 234 | " action = np.random.choice(env.action_space.n, p=policy[state])\n", 235 | " \n", 236 | " g = reward\n", 237 | " for state, action in state_actions:\n", 238 | " c[state][action] += 1.\n", 239 | " q[state][action] += (g - q[state][action]) / c[state][action]\n", 240 | " a = q[state].argmax()\n", 241 | " policy[state] = 0.\n", 242 | " policy[state][a] = 1.\n", 243 | " \n", 244 | " return policy, q" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 10, 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "application/vnd.jupyter.widget-view+json": { 255 | "model_id": "0c3eb0a7a75c4a398889b3fd014b1b2c", 256 | "version_major": 2, 257 | "version_minor": 0 258 | }, 259 | "text/plain": [ 260 | "HBox(children=(FloatProgress(value=0.0, max=500000.0), HTML(value='')))" 261 | ] 262 | }, 263 | "metadata": {}, 264 | "output_type": "display_data" 265 | }, 266 | { 267 | "name": "stdout", 268 | "output_type": "stream", 269 | "text": [ 270 | "\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "policy, q = monte_carlo_with_exploring_start(env)\n", 276 | "v = q.max(axis=-1)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 11, 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAZ9klEQVR4nO3debSkdX3n8feHbhBBEIWWsLS2QXHLwZa0RkRccMFd46jRkRlFI5rxBDXGBTUm5jjuwRg9o2GCgsq4ISqiCSjKoqOQttlttyDK0qzKIo6s3/mjnjaXu3XdvrfqV1X9fp1zz616nufW8719uj7nc596qp5UFZIkScO2VesBJEnSlskSIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRsgZL8JskfzrP+4iRPHOZMkiaTeaP5WEK2QFV1t6q6CCDJ0UneOYz9Jnlpku8MY1+SRkOrvNF4sIRIkqQmLCETIskhSb465f7Pknx+yv1LkqzubleS+yU5FHgx8MbukOlXpzzk6iTnJbk+yeeSbDvlsV7RPf6vkpyQZPdu+arusZdP2fbUJH+e5EHAx4D9un1dN8/vsT7JjUkuSvLKaeufneScJDck+Y8kT+mW3z3JUUk2JLksyTuTLFvEP6mkOQwzb6btd68k30pybZJrkhybZKcp61cmOT7J1d02H5my7mVdtvw6yUlJ7rOE/yTaTJaQyXEacECSrZLsBmwN7A/QvR57N+C8qT9QVUcCxwLv6w6ZPnPK6hcATwHuC+wDvLR7rAOBd3frdwN+AXx2U8NV1XrgVcD3un3tNMemVwHPAHYEDgE+mGTfbt+PAD4JvAHYCXgMcHH3c8cAtwH3Ax4GPBn4803NJWmzDCVvZhF6+bM78CBgJfB33X6XASfSy6RVwB502ZTkOcBbgOcCK4AzgM9s5u+uJbR805toHFTVRUluBFYDewMn0fvr4oHAfsAZVXXHAh7yn6rqcoDuL5bV3fIXAx+vqnXdusOBXydZtUS/x9em3D0tycnAAcA64OXdvr/Rrb+sm2FX4KnATlX1/4CbknwQOBT456WYS9J/GmLeTN/vz4CfdXevTnIE8Lfd/UfQKydvqKrbumUbz0F7JfDu7o8hkrwLeEuS+1TVLxYwp5aYJWSynAY8jt7RgNOA64DH0guF0xb4WFdMuf1bek9uuu/rNq6oqt8kuZbeXx2XbdbUUyR5Kr1Q2ZvekbrtgPO71SuBr8/yY/eh95fYhiQbl20FXLLYeSTNaRh5cydJ7gX8E70/THag9zz/dbd6JfCLKQVkqvsAH0ryD1Mfjl5uWUIa8uWYybIxFA7obp9GLxQey9yhsNDLKF9O7wkNQJLtgZ3pFZCbusXbTdn+D/rdV5K7AF8EPgDs2r1k83V6YQG9UrHXLD96CXAzsEtV7dR97VhVD+n7t5K0UMPIm+ne3T3GPlW1I3Awd86He089J22KS4BXTsmHnarqrlX1fxc5jxbJEjJZTgMeD9y1qi6l97rnU+iVhLPn+JkrgTnfwz+L/wMckmR1VxreBZxZVRdX1dX0ysjBSZYleRl3Lg1XAnsm2WaOx94GuAtwNXBbd1TkyVPWH9Xt+wnda9F7JHlgVW0ATgb+IcmO3bq9kjx2Ab+XpIUZRt5MtwPwG+C6JHvQOz9so7OADcB7kmyfZNsk+3frPgYcnuQh8PsT2Z+/iDm0RCwhE6SqfkLvCXpGd/8G4CLgu1V1+xw/dhTw4CTXJflyH/s4BfgbekcsNtArGS+csskr6AXDtcBDgKl/aXwLuBC4Isk1szz2jcBhwOfpHWL9r8AJU9afRXeyKnA9vRDceFTmv9MrMT/sfvY4eifOShqAYeTNLN4B7Evv+f814Pgp89wOPJPey0O/BC4F/qxb9yXgvcBnk9wAXEDvPDI1lqrFHh2TJElaOI+ESJKkJiwhkiSpCUuIJElqwhIiSZKaGIsPK9vlnstq1cqtW4+hBfjJedtteiONjN9xE7fUzdn0luPLHNGgbem5tzk5MhYlZNXKrTnrpJWtx9ACHLT7rJ+6rBF1Zp3SeoSBM0c0aFt67m1OjvhyjCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqYmAlJMnHk1yV5IIpy+6Z5BtJftp9v8eg9i9p/Jkj0mQb5JGQo4GnTFv2ZuCUqro/cEp3X5LmcjTmiDSxBlZCqup04FfTFj8bOKa7fQzwnEHtX9L4M0ekyTbsc0J2raoNAN33e821YZJDk6xNsvbqa28f2oCSRp45Ik2IkT0xtaqOrKo1VbVmxc7LWo8jaQyZI9JoG3YJuTLJbgDd96uGvH9J488ckSbEsEvICcBLutsvAb4y5P1LGn/miDQhBvkW3c8A3wMekOTSJC8H3gM8KclPgSd19yVpVuaINNmWD+qBq+pFc6x6wqD2KWmymCPSZBvZE1MlSdJks4RIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWpieesBJEkapoN2X916BHU8EiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmmhSQpK8LsmFSS5I8pkk27aYQ9L4Mkek8Tf0EpJkD+AwYE1V/RGwDHjhsOeQNL7MEWkytHo5Zjlw1yTLge2AyxvNIWl8mSPSmBt6Camqy4APAL8ENgDXV9XJw55D0vgyR6TJ0OLlmHsAzwbuC+wObJ/k4Fm2OzTJ2iRrr7729mGPKWmEmSPSZGjxcswTgZ9X1dVVdStwPPCo6RtV1ZFVtaaq1qzYednQh5Q00swRaQK0KCG/BB6ZZLskAZ4ArG8wh6TxZY5IE6DFOSFnAscB64DzuxmOHPYcksaXOSJNhuUtdlpVfwv8bYt9S5oM5og0/vzEVEmS1IQlRJIkNbHJl2OSPHeWxdcD51fVVUs/kqRJY45Imk0/54S8HNgP+HZ3/3HA94G9k/x9VX1qQLNJmhzmiKQZ+ikhdwAPqqorAZLsCnwU+BPgdMDwkLQp5oikGfo5J2TVxuDoXAXsXVW/Am4dzFiSJow5ImmGfo6EnJHkROAL3f3/ApyeZHvguoFNJmmSmCOSZuinhLyaXmDsDwT4JPDFqirg8QOcTdLkMEckzbDJEtKFxHHdlyQtmDkiaTabPCckyXOT/DTJ9UluSHJjkhuGMZykyWCOSJpNPy/HvA94ZlV5cShJm8sckTRDP++OudLgkLRI5oikGfo5ErI2yeeALwM3b1xYVccPbCpJk8YckTRDPyVkR+C3wJOnLCvA8JDUL3NE0gz9vDvmkGEMImlymSOSZjNnCUnyxqp6X5IP0/uL5U6q6rCBTiZp7JkjkuYz35GQjSeRrR3GIPP5yXnbcdDuq1uPIWnhRiZHJI2eOUtIVX21u3lGVV00pHkkTRBzRNJ8+jkx9egkewD/Tu9ql2dU1fmDHUvShDFHJM3Qz4mpj0myDfBw4HHA15LcraruOejhJE0Gc0TSbDZZQpI8Gjig+9oJOBE4Y8BzSZog5oik2fTzcsxp9E4qezfw9aq6ZbAjSZpA5oikGfopITvTu/z2Y4DDktwBfK+q/magk0maJOaIpBn6OSfkuiQXASuBPYFHAVsPejBJk8MckTSbfs4J+Q/gx8B3gI8Bh3goVdJCmCOSZtPPyzH3r6o7Bj6JpElmjkiaYas+ttk9yZeSXJXkyiRfTLLnwCeTNEnMEUkz9FNCPgGcAOwO7AF8tVsmSf0yRyTN0E8JWVFVn6iq27qvo4EVA55L0mQxRyTN0E8JuSbJwUmWdV8HA9cOejBJE8UckTRDPyXkZcALgCuADcDzumWS1C9zRNIM/XxOyC+BZw1hFkkTyhyRNJt+PidkBfAKYNXU7avKv2Ik9cUckTSbfj4n5Cv0LjT1TeD2wY4jaUKZI5Jm6KeEbFdVbxr4JJImmTkiaYZ+Tkw9McnTlnKnSXZKclySHyVZn2S/pXx8SSPHHJE0w5xHQpLcCBQQ4C1JbgZu7e5XVe24iP1+CPi3qnpekm2A7RbxWJJGlDkiaT5zlpCq2mEQO0yyI73Leb+0288tgBeykiaQOSJpPpt8OSbJ/km2724fnOSIJPdexD7/ELga+ESSs5P8y8bHn7bfQ5OsTbL2Vm5exO4ktTYKOXL1tZ4PK42afs4J+Sjw2yQPBd4I/AL41CL2uRzYF/hoVT0MuAl48/SNqurIqlpTVWu25i6L2J2kEdA8R1bsvGwRu5M0CP2UkNuqqoBnAx+qqg8BiznEeilwaVWd2d0/jl6YSJpc5oikGfopITcmORw4GPhakmXA1pu7w6q6ArgkyQO6RU8Afri5jydpLJgjkmbop4T8GXAz8PLuib8H8P5F7vcvgWOTnAesBt61yMeTNNrMEUkz9HPtmCuAI6bc/yXwycXstKrOAdYs5jEkjQ9zRNJs+jkSIkmStOQsIZIkqYl5S0iSZUk+PaxhJE0ec0TSXOYtIVV1O7Ci+0hkSVowc0TSXPq5iu7FwHeTnEDvA4EAqKoj5vwJSbqzizFHJE3TTwm5vPvaisV9uJCkLZc5ImmGft6i+w6AJNtX1U2b2l6SpjNHJM2mnwvY7Zfkh8D67v5Dk/yvgU8maWKYI5Jm089bdP8ROAi4FqCqzqV3CW1J6pc5ImmGvj4npKoumbbIa2JLWhBzRNJ0/ZyYekmSRwHVvcXuMLpDqpLUJ3NE0gz9HAl5FfBqehecupTehaJePcihJE0cc0TSDP28O+Ya4MVDmEXShDJHJM1mzhKS5MNAzbW+qg4byESSJoY5Imk+8x0JWTu0KSRNKnNE0pzmLCFVdcwwB5E0ecwRSfPZ5DkhSVYAbwIeDGy7cXlVHTjAue5k731+y0knnbPkj3vQ7quX/DElzTQKOSJp9PTz7phj6b2V7r7AO+hdiOrfBziTpMljjkiaoZ8SsnNVHQXcWlWnVdXLgEcOeC5Jk8UckTRDPx9Wdmv3fUOSp9O7EuaegxtJ0gQyRyTN0E8JeWeSuwOvBz4M7Ai8bqBTSZo05oikGfr5sLITu5vXA48f7DiSJpE5Imk2mzwnJMneSU5JckF3f58kbxv8aJImhTkiaTb9nJj6v4HD6V7TrarzgBcOcihJE8cckTRDPyVku6o6a9qy2wYxjKSJZY5ImqGfEnJNkr3orv+Q5HnAhoFOJWnSmCOSZujn3TGvBo4EHpjkMuDnwMEDnUrSpDFHJM3Qz7tjLgKemGR7YKuqunHwY0maJOaIpNnMWUKS/NUcywGoqiMGNJOkCWGOSJrPfEdCdui+PwB4OHBCd/+ZwOmDHErSxDBHJM1pzhJSVe8ASHIysO/Gw6dJ/g74wlCmkzTWzBFJ8+nn3TH3Bm6Zcv8WYNVAppE0qcwRSTP08+6YTwFnJfkSvbfX/SlwzECnkjRpzBFJM/Tz7pj/meRfgQO6RYdU1dmDHUvSJDFHJM2mnyMhVNU6YN1S7jjJMmAtcFlVPWMpH1vS6DFHJE3Xzzkhg/IaYH3D/Usaf+aINMaalJAkewJPB/6lxf4ljT9zRBp/rY6E/CPwRuCORvuXNP7MEWnMDb2EJHkGcFVV/WAT2x2aZG2StVdfe/uQppM0DswRaTK0OBKyP/CsJBcDnwUOTPLp6RtV1ZFVtaaq1qzYedmwZ5Q02swRaQIMvYRU1eFVtWdVrQJeCHyrqryapqS+mSPSZGj57hhJkrQF6+tzQgalqk4FTm05g6TxZo5I48sjIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhEiSpCYsIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWrCEiJJkpoYeglJsjLJt5OsT3JhktcMewZJ480ckSbD8gb7vA14fVWtS7ID8IMk36iqHzaYRdJ4MkekCTD0IyFVtaGq1nW3bwTWA3sMew5J48sckSZDiyMhv5dkFfAw4MxZ1h0KHApw7z0GM+ZJl58zkMcVHLT76tYjaAvROkc0fgaV/ebewjU7MTXJ3YAvAq+tqhumr6+qI6tqTVWtWbHzsuEPKGnkmSPSeGtSQpJsTS84jq2q41vMIGm8mSPS+Gvx7pgARwHrq+qIYe9f0vgzR6TJ0OJIyP7AfwMOTHJO9/W0BnNIGl/miDQBhn6mVlV9B8iw9ytpcpgj0mTwE1MlSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU2kqlrPsElJbgR+3HqOPu0CXNN6iAUYp3nHaVYYr3kfUFU7tB5ikMyRgRqnecdpVhiveRecI8sHNckS+3FVrWk9RD+SrB2XWWG85h2nWWG85k2ytvUMQ2CODMg4zTtOs8J4zbs5OeLLMZIkqQlLiCRJamJcSsiRrQdYgHGaFcZr3nGaFcZr3nGadXON0+84TrPCeM07TrPCeM274FnH4sRUSZI0ecblSIgkSZowlhBJktTEyJWQJB9PclWSC6Yse3+SHyU5L8mXkuzUcsaNZpt1yrq/TlJJdmkx22zmmjfJXyb5cZILk7yv1XxTzfH/YHWS7yc5J8naJI9oOeNGSVYm+XaS9d2/4Wu65fdM8o0kP+2+36P1rDDvvCP5PNsc5shgjFOGgDkySEuWI1U1Ul/AY4B9gQumLHsysLy7/V7gva3nnGvWbvlK4CTgF8AurefcxL/t44FvAnfp7t+r9ZzzzHoy8NTu9tOAU1vP2c2yG7Bvd3sH4CfAg4H3AW/ulr95hP7fzjXvSD7PlvD/z0j+fuOUI+OUIfPMa44Mdt4FPc9G7khIVZ0O/GraspOr6rbu7veBPYc+2Cxmm7XzQeCNwEid9TvHvH8BvKeqbu62uWrog81ijlkL2LG7fXfg8qEONYeq2lBV67rbNwLrgT2AZwPHdJsdAzynzYR3Nte8o/o82xzmyGCMU4aAOTJIS5UjI1dC+vAy4F9bDzGXJM8CLquqc1vP0qe9gQOSnJnktCQPbz3QPF4LvD/JJcAHgMMbzzNDklXAw4AzgV2ragP0nrDAvdpNNrtp80410s+zJTDSv9+Y5cg4ZQiYI0tuMTkyViUkyVuB24BjW88ymyTbAW8F3t56lgVYDtwDeCTwBuDzSdJ2pDn9BfC6qloJvA44qvE8d5LkbsAXgddW1Q2t59mUueYd9efZYo367zeGOTJOGQLmyJJabI6MTQlJ8hLgGcCLq3uxaQTtBdwXODfJxfQOQ61L8gdNp5rfpcDx1XMWcAe9CyaNopcAx3e3vwCMxAllAEm2pvdEPLaqNs54ZZLduvW7ASNzmHqOecflebbZxuT3G7ccGacMAXNkySxFjoxFCUnyFOBNwLOq6ret55lLVZ1fVfeqqlVVtYrek3Pfqrqi8Wjz+TJwIECSvYFtGN0rNl4OPLa7fSDw04az/F73V99RwPqqOmLKqhPoBR7d968Me7bZzDXvuDzPNte4/H5jmCPjlCFgjiyJJcuR1mfYTv8CPgNsAG6l9+R7OfAz4BLgnO7rY63nnGvWaesvZkTOap/n33Yb4NPABcA64MDWc84z66OBHwDn0nvt8Y9bz9nN+mh6J7udN+X/6NOAnYFT6IXcKcA9W8+6iXlH8nm2hP9/RvL3G6ccGacMmWdec2Sw8y7oeebHtkuSpCbG4uUYSZI0eSwhkiSpCUuIJElqwhIiSZKasIRIkqQmLCEiyalJ1rSeQ9L4Mke0OSwhGrgky1rPIGm8mSOTyRKyhUiyKsmPkhyT5Lwkx3XXqJi+3UeTrE1yYZJ3dMuekORLU7Z5UpLju9tPTvK9JOuSfKG7jgBJLk7y9iTfAZ4/bR/PT3JBknOTnN4te2mSj0zZ5sQkj+tu/ybJe5P8IMk3kzyi+6vrou5CX5KGwBzRUrOEbFkeABxZVfsANwD/Y5Zt3lpVa4B9gMcm2Qf4FvCgJCu6bQ4BPpFkF+BtwBOral9gLfBXUx7rd1X16Kr67LR9vB04qKoeCvTz5N8eOLWq/hi4EXgn8CTgT4G/7+PnJS0dc0RLxhKyZbmkqr7b3f40vY/dne4FSdYBZwMPAR5cvY/V/RRwcJKdgP3oXZ75kcCDge8mOYfedQ3uM+WxPjfHHN8Fjk7yCqCfQ6y3AP/W3T4fOK2qbu1ur+rj5yUtHXNES2Z56wE0VNM/o/9O95PcF/hr4OFV9eskRwPbdqs/AXwV+B3whaq6rbuA0Teq6kVz7O+mWYeoelWSPwGeDpyTZDW9Sz5PLcXbTrl9a/3n9QXuAG7uHueOJP4flobLHNGS8UjIluXeSfbrbr8I+M609TvSe8Jfn2RX4KkbV1TV5fSuPvk24Ohu8feB/ZPcDyDJdt1VNOeVZK+qOrOq3k7vapsr6V2ka3WSrZKsZIQury3pTswRLRnb35ZlPfCSJP9M74qMH526sqrOTXI2cCFwEb3DnVMdC6yoqh9221+d5KXAZ5LcpdvmbcBPNjHH+5PcHwi9q0Ke2y3/Ob1Doxuvxilp9JgjWjJeRXcLkWQVcGJV/dEiHuMjwNlVddRSzSVpfJgjWmoeCVFfkvyA3iHW17eeRdJ4Mkc0nUdCJElSE56YKkmSmrCESJKkJiwhkiSpCUuIJElqwhIiSZKa+P+eRTjt1n3WYwAAAABJRU5ErkJggg==\n", 287 | "text/plain": [ 288 | "
" 289 | ] 290 | }, 291 | "metadata": { 292 | "needs_background": "light" 293 | }, 294 | "output_type": "display_data" 295 | }, 296 | { 297 | "data": { 298 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAefklEQVR4nO3de5hkBXnn8e+vu+fCDIwDchGBiFFBMYtIRiLxjkrUeMnFJGZlV9FIzPpEYy5GE9fEPG6MmpiY5FkNWRSirCYqGkQTMRoRXcUgooJoNAQFGe634TbM5d0/6kzS9G2qe87p01V8P8/TT1edqn7P2zV1fvP2qVN1UlVIkiQtt4m+G5AkSfdNDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiH3QUluT/LDC9x+RZKnLWdPksaTeaOFOITcB1XV3lV1OUCS05O8aTnWm+TFST6/HOuStDL0lTcaDQ4hkiSpFw4hYyLJyUk+Nu36d5P83bTrVyY5prlcSR6a5BTghcBrml2mH5tW8pgkX09ya5K/TbJ2Wq2XNfVvSnJ2kgc2yw9vak9Nu+9nk/xSkkcA7wKOb9Z1ywK/x2VJtiS5PMkvz7j9eUkuTnJbkn9L8oxm+f2SnJZkc5IfJHlTksk9eEglzWM582bGeh+S5DNJbkxyQ5Izk2ycdvthSc5Kcn1zn7+cdttLmmy5OcknkzyoxYdES+QQMj7OA56QZCLJwcAq4HEAzeuxewNfn/4DVXUqcCbw1maX6XOm3fzzwDOABwNHAy9uap0AvLm5/WDge8AHdtdcVV0GvBz4YrOujfPc9Trg2cAG4GTgT5Mc26z7OOBvgN8CNgJPBK5ofu4MYDvwUODRwInAL+2uL0lLsix5M4cwyJ8HAo8ADgN+v1nvJHAOg0w6HDiEJpuS/BTwO8DPAAcA5wPvX+LvrhZN7f4uGgVVdXmSLcAxwBHAJxn8dfFw4Hjg/KrauYiSf15VVwM0f7Ec0yx/IfDuqrqoue11wM1JDm/p9/j4tKvnJTkXeAJwEfDSZt2fam7/QdPDQcAzgY1VdRdwR5I/BU4B/qqNviT9p2XMm5nr/S7w3ebq9UneDvxec/04BsPJb1XV9mbZrmPQfhl4c/PHEEn+EPidJA+qqu8tok+1zCFkvJwHPJnB3oDzgFuAJzEIhfMWWeuaaZfvZLBx03y/aNcNVXV7khsZ/NXxgyV1PU2SZzIIlSMY7KlbB3yjufkw4BNz/NiDGPwltjnJrmUTwJV72o+keS1H3txLkgOBP2fwh8k+DLbzm5ubDwO+N20Ame5BwDuS/Mn0cgxyyyGkR74cM152hcITmsvnMQiFJzF/KCz2NMpXM9igAUiyHrg/gwHkjmbxumn3f8Cw60qyBvgw8MfAQc1LNp9gEBYwGCoeMsePXglsBfavqo3N14aqeuTQv5WkxVqOvJnpzU2No6tqA3AS986HH5p+TNo0VwK/PC0fNlbVXlX1//awH+0hh5Dxch7wFGCvqrqKweuez2AwJHx1np+5Fpj3Pfxz+L/AyUmOaYaGPwQuqKorqup6BsPISUkmk7yEew8N1wKHJlk9T+3VwBrgemB7s1fkxGm3n9as+6nNa9GHJHl4VW0GzgX+JMmG5raHJHnSIn4vSYuzHHkz0z7A7cAtSQ5hcHzYLl8GNgN/lGR9krVJHtfc9i7gdUkeCf9xIPvP7UEfaolDyBipqn9lsIGe31y/Dbgc+EJV7Zjnx04DjkpyS5KPDrGOTwP/k8Eei80MhowXTLvLyxgEw43AI4Hpf2l8BrgUuCbJDXPU3gK8Evg7BrtY/ytw9rTbv0xzsCpwK4MQ3LVX5r8zGGK+2fzshxgcOCupA8uRN3N4I3Asg+3/48BZ0/rZATyHwctD3weuAn6hue0jwFuADyS5DbiEwXFk6lmq9nTvmCRJ0uK5J0SSJPXCIUSSJPXCIUSSJPXCIUSSJPViJD6sbPWq9bV2zXyf8r102T7fAdx7qItjfbs6gHjnYj7UcBFWdfPU2rFXR3XXtF9zcp9t7RcFHrrXTa3XvPLK7dx4087s/p6ja9Wa9bVm3b6t153Y3s22me0dbJvp6J+4qyyd6Ojv5IluHocde7V/uqod+3bz2B6xftYbFPfYlVdu56ZF5shIDCFr12zkx45+eet1J2/Y0npNgOzoIDy2zfUhgHuu7rizk7o8YP9Oyt521H6d1L3lYe2Hx8YnX7P7Oy3BRx/53tZrnvis9gNppVmzbl+OecqrWq+79vp7Wq8JMHXLXa3XrKlu/lOfuOHWTurW+r26qbuug786gFsfvqH1mlt+4bbWawJ8ctNft17zJ5aQI74cI0mSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSetHZEJLk3UmuS3LJtGX7JflUku803/ftav2SRp85Io23LveEnA48Y8ay1wKfrqqHAZ9urkvSfE7HHJHGVmdDSFV9DrhpxuLnAWc0l88Afqqr9UsafeaINN6mlnl9B1XVZoCq2pzkwPnumOQU4BSANWs2smNt+63WgRtarwkwefvW1mtm6/bWawJkVTdPgR3rVndSd9u6bubmu//LXa3XfN1DP9F6TYD9J9e3XnNq1v/zK9qScmTV3vty5/6TrTdTE2tarwkwuWFV6zWzs1qvCTCx/7pO6nbV7z0busm9ax/bfs0/+ZGz2y8KHNhBjqxaQo6s2ANTq+rUqtpUVZtWr27/wZI0/qbnyNRac0RaaZZ7CLk2ycEAzffrlnn9kkafOSKNieUeQs4GXtRcfhHw98u8fkmjzxyRxkSXb9F9P/BF4MgkVyV5KfBHwNOTfAd4enNdkuZkjkjjrbMDU6vqF+e56aldrVPSeDFHpPG2Yg9MlSRJ480hRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9WKq7waGUYHte022XndyIq3XBKgO6k7ddnfrNQGo6qbuzm7K3nh0N3V/9qivtl5zn4lu/s12VPsPbtHR82AlCezsIPFuf2D72QQwubX9vxHXX7Oj9ZoA2/bpJksntnXzvLzlod3813f8cd9svea6bG29JsC2av+5sJQccU+IJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqRS9DSJJXJ7k0ySVJ3p9kbR99SBpd5og0+pZ9CElyCPBKYFNV/QgwCbxgufuQNLrMEWk89PVyzBSwV5IpYB1wdU99SBpd5og04qaWe4VV9YMkfwx8H7gLOLeqzl3wZybD9nXtz0s1mdZrAkx1ULZWTbZfFNi5dlUnda//0b07qbvhyJs6qbtu4p5O6nZhJ9V3C71bSo7sXAV3PaD9jXNia+slAZjqIJ9uP7ibHJns6DG4537dZPTW427vpO6GVXd3UrcLE3Tz2C5WHy/H7As8D3gw8EBgfZKT5rjfKUkuTHLhtq3dPGEkjaal5MiOO+5Y7jYl7UYfL8c8Dfj3qrq+qrYBZwE/PvNOVXVqVW2qqk2r1nTzV7WkkbXoHJlcv37Zm5S0sD6GkO8Dj02yLkmApwKX9dCHpNFljkhjYNmHkKq6APgQcBHwjaaHU5e7D0mjyxyRxsOyH5gKUFW/B/xeH+uWNB7MEWn0+YmpkiSpFw4hkiSpF7t9OSbJz8yx+FbgG1V1XfstSRo35oikuQxzTMhLgeOBf26uPxn4EnBEkj+oqvd21Juk8WGOSJplmCFkJ/CIqroWIMlBwDuBHwM+BxgeknbHHJE0yzDHhBy+Kzga1wFHVNVNwLZu2pI0ZswRSbMMsyfk/CTnAB9srv8s8Lkk64FbOutM0jgxRyTNMswQ8goGgfE4IMDfAB+uqgKe0mFvksaHOSJplt0OIU1IfKj5kqRFM0ckzWW3x4Qk+Zkk30lya5LbkmxJcttyNCdpPJgjkuYyzMsxbwWeU1WeHErSUpkjkmYZ5t0x1xockvaQOSJplmH2hFyY5G+BjwJbdy2sqrM660rSuDFHJM0yzBCyAbgTOHHasgIMD0nDMkckzTLMu2NOXo5GJI0vc0TSXOYdQpK8pqremuQvGPzFci9V9cpOO5M08swRSQtZaE/IroPILlyORhaycxLu3jjMMbSLk1mR2JbJ1itOHLK69ZoAO7opy10HpZO622/fq5O6l245uPWa+03d0XpNgEet/lbrNXfOng/asmJypDPdPNWp9mOEezZ00+zqLd08f7Z3s7mz7fpuCl+492Gt1zxi3TWt1wQ4ds2lrdfcsYQcmXcIqaqPNRfPr6rLl9qUpPsuc0TSQoY5MPX0JIcA/8LgbJfnV9U3um1L0pgxRyTNMsyBqU9Mshp4DPBk4ONJ9q6q/bpuTtJ4MEckzWW3Q0iSxwNPaL42AucA53fcl6QxYo5ImsswL8ecx+CgsjcDn6iqe7ptSdIYMkckzTLMEHJ/BqfffiLwyiQ7gS9W1f/stDNJ48QckTTLMMeE3JLkcuAw4FDgx4FVXTcmaXyYI5LmMswxIf8GfBv4PPAu4GR3pUpaDHNE0lyGeTnmYVW1s/NOJI0zc0TSLMN8DOkDk3wkyXVJrk3y4SSHdt6ZpHFijkiaZZgh5D3A2cADgUOAjzXLJGlY5oikWYYZQg6oqvdU1fbm63TggI77kjRezBFJswwzhNyQ5KQkk83XScCNXTcmaayYI5JmGWYIeQnw88A1wGbg+c0ySRqWOSJplmE+J+T7wHOXoRdJY8ockTSXYT4n5ADgZcDh0+9fVf4VI2ko5oikuQzzOSF/z+BEU/8E7Oi2HUljyhyRNMswQ8i6qvrtzjuRNM7MEUmzDHNg6jlJntXmSpNsTPKhJN9KclmS49usL2nFMUckzTLvnpAkW4ACAvxOkq3AtuZ6VdWGPVjvO4B/rKrnJ1kNrNuDWpJWKHNE0kLmHUKqap8uVphkA4PTeb+4Wc89gCeyksaQOSJpIcO8O+ZxwMVVdUfzAUPHAn/WvOVuKX4YuB54T5JHAV8BXlVVd8xY7ynAKQCr1+/L5LYlrm0BE9uq/aId2b42ndRNR6cUW31LN3XvvmV1J3WvP2Dv9ovu235JgJ3V/vO26y1hJeTI1P327eQXneggmwCm7urg33mimxypYV7YX4KpO7upu/bayU7q3nbg2tZr3rx9fes1AbqI/qU8Y4d56rwTuLPZ0F8DfA947xLWtcsUgwB6Z1U9GrgDeO3MO1XVqVW1qao2Ta3t5h9B0rLpPUcm15kj0kozzBCyvaoKeB7wjqp6B7Anu1ivAq6qqgua6x9iECaSxpc5ImmWYYaQLUleB5wEfDzJJLBqqSusqmuAK5Mc2Sx6KvDNpdaTNBLMEUmzDDOE/AKwFXhps+EfArxtD9f7q8CZSb4OHAP84R7Wk7SymSOSZhnm3DHXAG+fdv37wN/syUqr6mJg057UkDQ6zBFJc+nomGZJkqSFOYRIkqReLDiEJJlM8r7lakbS+DFHJM1nwSGkqnYABzQfiSxJi2aOSJrPMGfRvQL4QpKzGXwgEABV9fZ5f0KS7u0KzBFJMwwzhFzdfE2wZx8uJOm+yxyRNMswb9F9I0CS9TPPyyBJwzBHJM1lt++OSXJ8km8ClzXXH5Xkf3femaSxYY5Imsswb9H9M+AngBsBquprDE6hLUnDMkckzTLU54RU1ZUzFu3ooBdJY8wckTTTMAemXpnkx4Fq3mL3SppdqpI0JHNE0izD7Al5OfAKBiecuorBiaJe0WVTksaOOSJplmHeHXMD8MJl6EXSmDJHJM1l3iEkyV8ANd/tVfXKTjqSNDbMEUkLWWhPyIXL1oWkcWWOSJrXvENIVZ2xnI1IGj/miKSF7PaYkCQHAL8NHAWs3bW8qk7osK97K5jYNu8e3SWb2tp+TYAdq9J6za567cq29e0/BgB0VPa2u9e0XvPuGubNZ4u3Y/5XN1aslZAjKZi8u/26U3e1XxNgx5r2n+yrb+3mubOzo1MTpqun+s6O6nZgRw31SRoja5jf7kwGb6V7MPBGBiei+pcOe5I0fswRSbMMM4Tcv6pOA7ZV1XlV9RLgsR33JWm8mCOSZhlmf/G25vvmJD/J4EyYh3bXkqQxZI5ImmWYIeRNSe4H/AbwF8AG4NWddiVp3JgjkmYZ5sPKzmku3go8pdt2JI0jc0TSXHZ7TEiSI5J8OsklzfWjk7y++9YkjQtzRNJchjkw9a+B19G8pltVXwde0GVTksaOOSJplmGGkHVV9eUZy7Z30YyksWWOSJplmCHkhiQPoTn/Q5LnA5s77UrSuDFHJM0yzLtjXgGcCjw8yQ+AfwdO6rQrSePGHJE0yzDvjrkceFqS9cBEVW3pvi1J48QckTSXeYeQJL8+z3IAqurtHfUkaUyYI5IWstCekH2a70cCjwHObq4/B/hcl01JGhvmiKR5zTuEVNUbAZKcCxy7a/dpkt8HPrgs3UkaaeaIpIUM8+6YHwLumXb9HuDwTrqRNK7MEUmzDPPumPcCX07yEQZvr/tp4IxOu5I0bswRSbMM8+6Y/5XkH4AnNItOrqqvdtuWpHFijkiayzB7Qqiqi4CL2lxxkkngQuAHVfXsNmtLWnnMEUkzDXNMSFdeBVzW4/oljT5zRBphvQwhSQ4FfhL4P32sX9LoM0ek0dfXnpA/A14D7Oxp/ZJGnzkijbihjglpU5JnA9dV1VeSPHmB+50CnAKweq+NTN1drfdSk62XBGBqa/uZuHMqrdcEmOzgcQXYuqGbfrOjm7oTHZS9c8ea9osKWFqOrNpnXya2ddBMN5sQEyN0juGJe3Z/n6XI9m4e3Lp/Nzmy9Y7VrdecTDcz9o7q4LFdQs0+9oQ8DnhukiuADwAnJHnfzDtV1alVtamqNq1as/dy9yhpZVt0jkyuW7/cPUrajWUfQqrqdVV1aFUdDrwA+ExVeTZNSUMzR6Tx0Oe7YyRJ0n3Ysh8TMl1VfRb4bJ89SBpt5og0utwTIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSeuEQIkmSejHVdwNDS/slJ7ZV+0U7qjt5dze9Zmc3dafumuyk7sTdHTwRgC4ehlXZ0X5R7ZkOnj47V7dfE2Dtje0/Kbva3ndOdbNdrrqjm37v2tZNv5lov9+r7tq39ZoAq/Zr/zFIFl/TPSGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXyz6EJDksyT8nuSzJpUletdw9SBpt5og0HqZ6WOd24Deq6qIk+wBfSfKpqvpmD71IGk3miDQGln1PSFVtrqqLmstbgMuAQ5a7D0mjyxyRxkMfe0L+Q5LDgUcDF8xx2ynAKQBrp/Zhw1evab+BbdvbrwnUXXd1UPPu1msC7Lzzzk7q7rdqdSd1Nx53VCd1b3zk/Vuv+e4jn9J6TYDJE3e2XvOG7V9oveZyGT5HNnDox65tf/13bW29JsDOG29qvWbWrGm9JsCOm2/upO7E2rWd1N249/pO6t553A+3XvNLjzq69ZoAb3p++/+nbN72yUX/TG8HpibZG/gw8GtVddvM26vq1KraVFWbVk+sW/4GJa14i8qRSXNEWml6GUKSrGIQHGdW1Vl99CBptJkj0ujr490xAU4DLquqty/3+iWNPnNEGg997Al5HPDfgBOSXNx8PauHPiSNLnNEGgPLfmBqVX0eyHKvV9L4MEek8eAnpkqSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF6kqvruYbeSbAG+3XcfQ9ofuKHvJhZhlPodpV5htPo9sqr26buJLpkjnRqlfkepVxitfhedI1NdddKyb1fVpr6bGEaSC0elVxitfkepVxitfpNc2HcPy8Ac6cgo9TtKvcJo9buUHPHlGEmS1AuHEEmS1ItRGUJO7buBRRilXmG0+h2lXmG0+h2lXpdqlH7HUeoVRqvfUeoVRqvfRfc6EgemSpKk8TMqe0IkSdKYcQiRJEm9WHFDSJJ3J7kuySXTlr0tybeSfD3JR5Js7LPHXebqddptv5mkkuzfR29zma/fJL+a5NtJLk3y1r76m26e58ExSb6U5OIkFyY5rs8ed0lyWJJ/TnJZ8xi+qlm+X5JPJflO833fvnuFBftdkdvZUpgj3RilDAFzpEut5UhVragv4InAscAl05adCEw1l98CvKXvPufrtVl+GPBJ4HvA/n33uZvH9inAPwFrmusH9t3nAr2eCzyzufws4LN999n0cjBwbHN5H+BfgaOAtwKvbZa/dgU9b+frd0VuZy0+f1bk7zdKOTJKGbJAv+ZIt/0uajtbcXtCqupzwE0zlp1bVdubq18CDl32xuYwV6+NPwVeA6yoo37n6fdXgD+qqq3Nfa5b9sbmME+vBWxoLt8PuHpZm5pHVW2uqouay1uAy4BDgOcBZzR3OwP4qX46vLf5+l2p29lSmCPdGKUMAXOkS23lyIobQobwEuAf+m5iPkmeC/ygqr7Wdy9DOgJ4QpILkpyX5DF9N7SAXwPeluRK4I+B1/XczyxJDgceDVwAHFRVm2GwwQIH9tfZ3Gb0O92K3s5asKJ/vxHLkVHKEDBHWrcnOTJSQ0iS3wW2A2f23ctckqwDfhd4Q9+9LMIUsC/wWOC3gL9Lkn5bmtevAK+uqsOAVwOn9dzPvSTZG/gw8GtVdVvf/ezOfP2u9O1sT630328Ec2SUMgTMkVbtaY6MzBCS5EXAs4EXVvNi0wr0EODBwNeSXMFgN9RFSR7Qa1cLuwo4qwa+DOxkcMKklehFwFnN5Q8CK+KAMoAkqxhsiGdW1a4er01ycHP7wcCK2U09T7+jsp0t2Yj8fqOWI6OUIWCOtKaNHBmJISTJM4DfBp5bVXf23c98quobVXVgVR1eVYcz2DiPraprem5tIR8FTgBIcgSwmpV7xsargSc1l08AvtNjL/+h+avvNOCyqnr7tJvOZhB4NN//frl7m8t8/Y7KdrZUo/L7jWCOjFKGgDnSitZypO8jbGd+Ae8HNgPbGGx8LwW+C1wJXNx8vavvPufrdcbtV7BCjmpf4LFdDbwPuAS4CDih7z4X6PXxwFeArzF47fFH++6z6fXxDA52+/q05+izgPsDn2YQcp8G9uu71930uyK3sxafPyvy9xulHBmlDFmgX3Ok234XtZ35se2SJKkXI/FyjCRJGj8OIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOISLJZ5Ns6rsPSaPLHNFSOISoc0km++5B0mgzR8aTQ8h9RJLDk3wryRlJvp7kQ805Kmbe751JLkxyaZI3NsuemuQj0+7z9CRnNZdPTPLFJBcl+WBzHgGSXJHkDUk+D/zcjHX8XJJLknwtyeeaZS9O8pfT7nNOkic3l29P8pYkX0nyT0mOa/7qurw50ZekZWCOqG0OIfctRwKnVtXRwG3A/5jjPr9bVZuAo4EnJTka+AzwiCQHNPc5GXhPkv2B1wNPq6pjgQuBX59W6+6qenxVfWDGOt4A/ERVPQoYZuNfD3y2qn4U2AK8CXg68NPAHwzx85LaY46oNQ4h9y1XVtUXmsvvY/CxuzP9fJKLgK8CjwSOqsHH6r4XOCnJRuB4BqdnfixwFPCFJBczOK/Bg6bV+tt5+vgCcHqSlwHD7GK9B/jH5vI3gPOqaltz+fAhfl5Se8wRtWaq7wa0rGZ+Rv+9rid5MPCbwGOq6uYkpwNrm5vfA3wMuBv4YFVtb05g9Kmq+sV51nfHnE1UvTzJjwE/CVyc5BgGp3yePhSvnXZ5W/3n+QV2AlubOjuT+ByWlpc5ota4J+S+5YeSHN9c/kXg8zNu38Bgg781yUHAM3fdUFVXMzj75OuB05vFXwIel+ShAEnWNWfRXFCSh1TVBVX1BgZn2zyMwUm6jkkykeQwVtDptSXdizmi1jj93bdcBrwoyV8xOCPjO6ffWFVfS/JV4FLgcga7O6c7Ezigqr7Z3P/6JC8G3p9kTXOf1wP/ups+3pbkYUAYnBXya83yf2ewa3TX2TglrTzmiFrjWXTvI5IcDpxTVT+yBzX+EvhqVZ3WVl+SRoc5ora5J0RDSfIVBrtYf6PvXiSNJnNEM7knRJIk9cIDUyVJUi8cQiRJUi8cQiRJUi8cQiRJUi8cQiRJUi/+P9tDSUKHRO0LAAAAAElFTkSuQmCC\n", 299 | "text/plain": [ 300 | "
" 301 | ] 302 | }, 303 | "metadata": { 304 | "needs_background": "light" 305 | }, 306 | "output_type": "display_data" 307 | } 308 | ], 309 | "source": [ 310 | "plot(policy.argmax(-1))\n", 311 | "plot(v)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 12, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "# 基于柔性策略的同策回合更新\n", 321 | "\n", 322 | "def monte_carlo_with_soft(env, episode_num=500000, epsilon=0.1):\n", 323 | " policy = np.ones((22, 11, 2, 2)) * 0.5\n", 324 | " q = np.zeros_like(policy)\n", 325 | " c = np.zeros_like(policy)\n", 326 | " \n", 327 | " for _ in tqdm(range(episode_num)):\n", 328 | " state_actions = []\n", 329 | " observation = env.reset()\n", 330 | " \n", 331 | " while True:\n", 332 | " state = ob2state(observation)\n", 333 | " action = np.random.choice(env.action_space.n, p=policy[state])\n", 334 | " state_actions.append([state, action])\n", 335 | " \n", 336 | " observation, reward, done, _ = env.step(action)\n", 337 | " if done:\n", 338 | " break\n", 339 | " \n", 340 | " g = reward\n", 341 | " for state, action in state_actions:\n", 342 | " c[state][action] += 1\n", 343 | " q[state][action] += (g - q[state][action]) / c[state][action]\n", 344 | " a = q[state].argmax()\n", 345 | " policy[state] = epsilon / 2.0\n", 346 | " policy[state][a] += 1.0 - epsilon\n", 347 | " return policy, q" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 13, 353 | "metadata": {}, 354 | "outputs": [ 355 | { 356 | "data": { 357 | "application/vnd.jupyter.widget-view+json": { 358 | "model_id": "2d2dd23f407549b6954d33d9344585ee", 359 | "version_major": 2, 360 | "version_minor": 0 361 | }, 362 | "text/plain": [ 363 | "HBox(children=(FloatProgress(value=0.0, max=500000.0), HTML(value='')))" 364 | ] 365 | }, 366 | "metadata": {}, 367 | "output_type": "display_data" 368 | }, 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "\n" 374 | ] 375 | }, 376 | { 377 | "data": { 378 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAaPklEQVR4nO3debhcdZ3n8feHBMQgiEKkSQjGRnHrByMdbRFxwQV3bUdtHZlRtEV7fBq1bRfU1rYfx71RW5/RZgYFlXFDVES7QVECOgodYtiMWyMKJKwKRGxZv/NHndiXu6WS3Krfrcr79Tz3SdU5dc/53pD68LmnTtVJVSFJkjRsO7QeQJIkbZ8sIZIkqQlLiCRJasISIkmSmrCESJKkJiwhkiSpCUvIdijJb5P88SzrL03yhGHOJGk8mTeajSVkO1RVd6uqSwCSHJ/kncPYb5KXJPnuMPYlaX5olTcaDZYQSZLUhCVkTCQ5IsnXJtz/eZIvTLh/WZIV3e1Kct8kRwIvAt7QHTL92oRNrkhyQZIbknw+yc4TtvXybvu/TnJKkiXd8uXdthdOeOyZSf4yyQOBjwMHdfu6fpafY12SjUkuSfKKSeuflWRtkhuT/HuSJ3fL757kuCQbklyR5J1JFmzDX6mkGQwzbybtd78k305yXZJrk5yYZPcJ65clOTnJNd1jPjph3Uu7bPlNktOS3HsO/0q0lSwh42MVcEiSHZLsDewIHAzQvR57N+CCid9QVccCJwLv6w6ZPmPC6ucDTwbuAxwAvKTb1qHAu7v1ewO/BD63ueGqah3wSuD73b52n+GhVwNPB3YDjgA+mOTAbt8PBz4FvB7YHXg0cGn3fScAtwH3BR4KPAn4y83NJWmrDCVvphF6+bMEeCCwDPj7br8LgFPpZdJyYCldNiV5NvBm4DnAYuBs4LNb+bNrDi3c/EM0CqrqkiQbgRXA/sBp9H67eABwEHB2Vd2xBZv8p6paD9D9xrKiW/4i4BNVtaZbdzTwmyTL5+jn+PqEu6uSnA4cAqwBXtbt+5vd+iu6GfYCngLsXlX/AdyU5IPAkcA/z8Vckv7TEPNm8n5/Dvy8u3tNkmOAt3f3H06vnLy+qm7rlm06B+0VwLu7X4ZI8i7gzUnuXVW/3II5NccsIeNlFfBYekcDVgHXA4+hFwqrtnBbV064/Tt6T266P9dsWlFVv01yHb3fOq7YqqknSPIUeqGyP70jdYuAC7vVy4BvTPNt96b3m9iGJJuW7QBctq3zSJrRMPLmTpLcC/gner+Y7Ervef6bbvUy4JcTCshE9wY+nOQfJ26OXm5ZQhry5ZjxsikUDulur6IXCo9h5lDY0ssor6f3hAYgyS7AHvQKyE3d4kUTHv9H/e4ryV2ALwEfAPbqXrL5Br2wgF6p2G+ab70MuBnYs6p27752q6oH9/1TSdpSw8ibyd7dbeOAqtoNOJw758O+E89Jm+Ay4BUT8mH3qrprVf2/bZxH28gSMl5WAY8D7lpVl9N73fPJ9ErCD2f4nquAGd/DP43/CxyRZEVXGt4FnFNVl1bVNfTKyOFJFiR5KXcuDVcB+yTZaYZt7wTcBbgGuK07KvKkCeuP6/b9+O616KVJHlBVG4DTgX9Mslu3br8kj9mCn0vSlhlG3ky2K/Bb4PokS+mdH7bJucAG4D1Jdkmyc5KDu3UfB45O8mD4w4nsz9uGOTRHLCFjpKp+Su8JenZ3/0bgEuB7VXX7DN92HPCgJNcn+Uof+zgD+Dt6Ryw20CsZL5jwkJfTC4brgAcDE3/T+DZwMXBlkmun2fZG4CjgC/QOsf5X4JQJ68+lO1kVuIFeCG46KvPf6ZWYH3XfexK9E2clDcAw8mYa7wAOpPf8/zpw8oR5bgeeQe/loV8BlwN/0a37MvBe4HNJbgQuoncemRpL1bYeHZMkSdpyHgmRJElNWEIkSVITlhBJktSEJUSSJDUxEh9Wtuc9F9TyZTu2HmMs/fSCRZt/kMbe77mJW+rmbP6Ro8sc0aBt73m6NTkyEiVk+bIdOfe0Za3HGEuHLZn205G1nTmnzmg9wsCZIxq07T1PtyZHfDlGkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDUxsBKS5BNJrk5y0YRl90zyzSQ/6/68x6D2L2n0mSPSeBvkkZDjgSdPWvYm4Iyquh9wRndfkmZyPOaINLYGVkKq6izg15MWPws4obt9AvDsQe1f0ugzR6TxtnDI+9urqjYAVNWGJPea6YFJjgSOBNh36bDH3H6ctn7tQLZ72JIVA9muhDkijY15e2JqVR1bVSurauXiPRa0HkfSCDJHpPlt2CXkqiR7A3R/Xj3k/UsafeaINCaGXUJOAV7c3X4x8NUh71/S6DNHpDExyLfofhb4PnD/JJcneRnwHuCJSX4GPLG7L0nTMkek8TawM7Wq6oUzrHr8oPYpabyYI9J4m7cnpkqSpPFmCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1MTC1gOorcOWrGg9giQNlbk3f3gkRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ10aSEJHltkouTXJTks0l2bjGHpNFljkijb+glJMlS4ChgZVX9CbAAeMGw55A0uswRaTy0ejlmIXDXJAuBRcD6RnNIGl3miDTiFg57h1V1RZIPAL8C/gM4vapOH/Yc6jlt/dqBbPewJSsGsl0JzBFpXLR4OeYewLOA+wBLgF2SHD7N445MsjrJ6muuu33YY0qax8wRaTy0eDnmCcAvquqaqroVOBl45OQHVdWxVbWyqlYu3mPB0IeUNK+ZI9IYaFFCfgU8IsmiJAEeD6xrMIek0WWOSGNg6CWkqs4BTgLWABd2Mxw77DkkjS5zRBoPQz8xFaCq3g68vcW+JY0Hc0QafX5iqiRJasISIkmSmtjsyzFJnjPN4huAC6vq6rkfSdK4MUckTaefc0JeBhwEfKe7/1jgB8D+Sf6hqj49oNkkjQ9zRNIU/ZSQO4AHVtVVAEn2Aj4G/BlwFmB4SNocc0TSFP2cE7J8U3B0rgb2r6pfA7cOZixJY8YckTRFP0dCzk5yKvDF7v5/Ac5Ksgtw/cAmkzROzBFJU/RTQl5FLzAOBgJ8CvhSVRXwuAHOJml8mCOSpthsCelC4qTuS5K2mDkiaTqbPSckyXOS/CzJDUluTLIxyY3DGE7SeDBHJE2nn5dj3gc8o6q8OJSkrWWOSJqin3fHXGVwSNpG5oikKfo5ErI6yeeBrwA3b1pYVScPbCpJ48YckTRFPyVkN+B3wJMmLCvA8JDUL3NE0hT9vDvmiGEMIml8mSOSpjNjCUnyhqp6X5KP0PuN5U6q6qiBTiZp5JkjkmYz25GQTSeRrR7GILP56QWLOGzJitZjSNpy8yZHJM0/M5aQqvpad/PsqrpkSPNIGiPmiKTZ9HNi6vFJlgL/Ru9ql2dX1YWDHUvSmDFHJE3Rz4mpj06yE/Aw4LHA15PcraruOejhJI0Hc0TSdDZbQpI8Cjik+9odOBU4e8BzSRoj5oik6fTzcswqeieVvRv4RlXdMtiRJI0hc0TSFP2UkD3oXX770cBRSe4Avl9VfzfQySSNE3NE0hT9nBNyfZJLgGXAPsAjgR0HPZik8WGOSJpOP+eE/DvwE+C7wMeBIzyUKmlLmCOSptPPyzH3q6o7Bj6JpHFmjkiaYoc+HrMkyZeTXJ3kqiRfSrLPwCeTNE7MEUlT9FNCPgmcAiwBlgJf65ZJUr/MEUlT9FNCFlfVJ6vqtu7reGDxgOeSNF7MEUlT9FNCrk1yeJIF3dfhwHWDHkzSWDFHJE3RTwl5KfB84EpgA/Dcbpkk9csckTRFP58T8ivgmUOYRdKYMkckTaefzwlZDLwcWD7x8VXlbzGS+mKOSJpOP58T8lV6F5r6FnD7YMeRNKbMEUlT9FNCFlXVGwc+iaRxZo5ImqKfE1NPTfLUudxpkt2TnJTkx0nWJTloLrcvad4xRyRNMeORkCQbgQICvDnJzcCt3f2qqt22Yb8fBv61qp6bZCdg0TZsS9I8ZY5Ims2MJaSqdh3EDpPsRu9y3i/p9nML4IWspDFkjkiaTT/vjjkYWFtVN3UfMHQg8KHuLXdb44+Ba4BPJnkIcB7w6qq6adJ+jwSOBNjZX3CkkTYfcmTfpf2cAqftwWnr1w5ku4ctWTGQ7Y6zfs4J+Rjwu+6J/gbgl8Cnt2GfC+kF0Meq6qHATcCbJj+oqo6tqpVVtXJH7rINu5M0DzTPkcV7LNiG3UkahH5KyG1VVcCzgA9X1YeBbTnEejlweVWd090/iV6YSBpf5oikKfopIRuTHA0cDnw9yQJgx63dYVVdCVyW5P7doscDP9ra7UkaCeaIpCn6KSF/AdwMvKx74i8F3r+N+/1r4MQkFwArgHdt4/YkzW/miKQp+rl2zJXAMRPu/wr41LbstKrWAiu3ZRuSRoc5Imk6/RwJkSRJmnOWEEmS1MSsJSTJgiSfGdYwksaPOSJpJrOWkKq6HVjcfSSyJG0xc0TSTPr5CMFLge8lOYXeBwIBUFXHzPgdknRnl2KOSJqknxKyvvvagW37cCFJ2y9zRNIU/bxF9x0ASXaZfF0GSeqHOSJpOpt9d0ySg5L8CFjX3X9Ikv818MkkjQ1zRNJ0+nmL7oeAw4DrAKrqfHqX0Jakfpkjkqbo63NCquqySYtuH8AsksaYOSJpsn5OTL0sySOB6t5idxTdIVVJ6pM5ImmKfo6EvBJ4Fb0LTl1O70JRrxrkUJLGjjkiaYp+3h1zLfCiIcwiaUyZI5KmM2MJSfIRoGZaX1VHDWQiSWPDHJE0m9mOhKwe2hSSxpU5ImlGM5aQqjphmINIGj/miKTZbPackCSLgTcCDwJ23rS8qg4d4FySxsg458hhS1YMZLunrV87kO1qcP/NtOX6eXfMifTeSncf4B30LkT1bwOcSdL4MUckTdFPCdmjqo4Dbq2qVVX1UuARA55L0ngxRyRN0c+Hld3a/bkhydPoXQlzn8GNJGkMmSOSpuinhLwzyd2B1wEfAXYDXjvQqSSNG3NE0hT9fFjZqd3NG4DHDXYcSePIHJE0nc2eE5Jk/yRnJLmou39AkrcOfjRJ48IckTSdfk5M/d/A0XSv6VbVBcALBjmUpLFjjkiaop8Ssqiqzp207LZBDCNpbJkjkqbop4Rcm2Q/uus/JHkusGGgU0kaN+aIpCn6eXfMq4BjgQckuQL4BXD4QKeSNG7MEUlT9PPumEuAJyTZBdihqjYOfixJ48QckTSdGUtIkr+ZYTkAVXXMgGaSNCbMEUmzme1IyK7dn/cHHgac0t1/BnDWIIeSNDbMEUkzmrGEVNU7AJKcDhy46fBpkr8HvjiU6SSNNHNE0mz6eXfMvsAtE+7fAiwfyDSSxpU5ImmKft4d82ng3CRfpvf2uj8HThjoVJLGjTkiaYp+3h3zP5P8C3BIt+iIqvrhYMeSNE7MEUnT6edICFW1BlgzlztOsgBYDVxRVU+fy21Lmn/MEUmT9XNOyKC8GljXcP+SRp85Io2wJiUkyT7A04D/02L/kkafOSKNvlZHQj4EvAG4o9H+JY0+c0QacX2dEzKXkjwduLqqzkvy2FkedyRwJMC+Sxdy2uq1cz7LYUtWzPk2JQ3e1ubIIJy2fu6zSYM1qP9m/j9ly7U4EnIw8MwklwKfAw5N8pnJD6qqY6tqZVWtXLzHgmHPKGl+M0ekMTD0ElJVR1fVPlW1HHgB8O2q8mqakvpmjkjjoeW7YyRJ0nZs6OeETFRVZwJntpxB0mgzR6TR5ZEQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTQy9hCRZluQ7SdYluTjJq4c9g6TRZo5I42Fhg33eBryuqtYk2RU4L8k3q+pHDWaRNJrMEWkMDP1ISFVtqKo13e2NwDpg6bDnkDS6zBFpPLQ4EvIHSZYDDwXOmWbdkcCRAPsuHcyYp61fO5DtanAOW7Ki9QiaZ1rniEaPOTJ/NDsxNcndgC8Br6mqGyevr6pjq2plVa1cvMeC4Q8oad4zR6TR1qSEJNmRXnCcWFUnt5hB0mgzR6TR1+LdMQGOA9ZV1THD3r+k0WeOSOOhxZGQg4H/BhyaZG339dQGc0gaXeaINAaGfqZWVX0XyLD3K2l8mCPSePATUyVJUhOWEEmS1IQlRJIkNWEJkSRJTVhCJElSE5YQSZLUhCVEkiQ1YQmRJElNWEIkSVITlhBJktSEJUSSJDVhCZEkSU1YQiRJUhOWEEmS1IQlRJIkNWEJkSRJTaSqWs+wWUk2Aj9pPUef9gSubT3EFhileUdpVhitee9fVbu2HmKQzJGBGqV5R2lWGK15tzhHFg5qkjn2k6pa2XqIfiRZPSqzwmjNO0qzwmjNm2R16xmGwBwZkFGad5RmhdGad2tyxJdjJElSE5YQSZLUxKiUkGNbD7AFRmlWGK15R2lWGK15R2nWrTVKP+MozQqjNe8ozQqjNe8WzzoSJ6ZKkqTxMypHQiRJ0pixhEiSpCbmXQlJ8okkVye5aMKy9yf5cZILknw5ye4tZ9xkulknrPvbJJVkzxazTWemeZP8dZKfJLk4yftazTfRDP8OViT5QZK1SVYneXjLGTdJsizJd5Ks6/4OX90tv2eSbyb5WffnPVrPCrPOOy+fZ1vDHBmMUcoQMEcGac5ypKrm1RfwaOBA4KIJy54ELOxuvxd4b+s5Z5q1W74MOA34JbBn6zk383f7OOBbwF26+/dqPecss54OPKW7/VTgzNZzdrPsDRzY3d4V+CnwIOB9wJu65W+aR/9uZ5p3Xj7P5vDfz7z8+UYpR0YpQ2aZ1xwZ7Lxb9Dybd0dCquos4NeTlp1eVbd1d38A7DP0waYx3aydDwJvAObVWb8zzPtXwHuq6ubuMVcPfbBpzDBrAbt1t+8OrB/qUDOoqg1Vtaa7vRFYBywFngWc0D3sBODZbSa8s5nmna/Ps61hjgzGKGUImCODNFc5Mu9KSB9eCvxL6yFmkuSZwBVVdX7rWfq0P3BIknOSrErysNYDzeI1wPuTXAZ8ADi68TxTJFkOPBQ4B9irqjZA7wkL3KvdZNObNO9E8/p5Ngfm9c83YjkyShkC5sic25YcGakSkuQtwG3Aia1nmU6SRcBbgLe1nmULLATuATwCeD3whSRpO9KM/gp4bVUtA14LHNd4njtJcjfgS8BrqurG1vNszkzzzvfn2baa7z/fCObIKGUImCNzaltzZGRKSJIXA08HXlTdi03z0H7AfYDzk1xK7zDUmiR/1HSq2V0OnFw95wJ30Ltg0nz0YuDk7vYXgXlxQhlAkh3pPRFPrKpNM16VZO9u/d7AvDlMPcO8o/I822oj8vONWo6MUoaAOTJn5iJHRqKEJHky8EbgmVX1u9bzzKSqLqyqe1XV8qpaTu/JeWBVXdl4tNl8BTgUIMn+wE7M3ys2rgce090+FPhZw1n+oPut7zhgXVUdM2HVKfQCj+7Prw57tunMNO+oPM+21qj8fCOYI6OUIWCOzIk5y5HWZ9hO/gI+C2wAbqX35HsZ8HPgMmBt9/Xx1nPONOuk9ZcyT85qn+XvdifgM8BFwBrg0NZzzjLro4DzgPPpvfb4p63n7GZ9FL2T3S6Y8G/0qcAewBn0Qu4M4J6tZ93MvPPyeTaH/37m5c83SjkyShkyy7zmyGDn3aLnmR/bLkmSmhiJl2MkSdL4sYRIkqQmLCGSJKkJS4gkSWrCEiJJkpqwhIgkZyZZ2XoOSaPLHNHWsIRo4JIsaD2DpNFmjownS8h2IsnyJD9OckKSC5Kc1F2jYvLjPpZkdZKLk7yjW/b4JF+e8JgnJjm5u/2kJN9PsibJF7vrCJDk0iRvS/Jd4HmT9vG8JBclOT/JWd2ylyT56ITHnJrksd3t3yZ5b5LzknwrycO737ou6S70JWkIzBHNNUvI9uX+wLFVdQBwI/A/pnnMW6pqJXAA8JgkBwDfBh6YZHH3mCOATybZE3gr8ISqOhBYDfzNhG39vqoeVVWfm7SPtwGHVdVDgH6e/LsAZ1bVnwIbgXcCTwT+HPiHPr5f0twxRzRnLCHbl8uq6nvd7c/Q+9jdyZ6fZA3wQ+DBwIOq97G6nwYOT7I7cBC9yzM/AngQ8L0ka+ld1+DeE7b1+Rnm+B5wfJKXA/0cYr0F+Nfu9oXAqqq6tbu9vI/vlzR3zBHNmYWtB9BQTf6M/jvdT3If4G+Bh1XVb5IcD+zcrf4k8DXg98AXq+q27gJG36yqF86wv5umHaLqlUn+DHgasDbJCnqXfJ5YineecPvW+s/rC9wB3Nxt544k/huWhssc0ZzxSMj2Zd8kB3W3Xwh8d9L63eg94W9IshfwlE0rqmo9vatPvhU4vlv8A+DgJPcFSLKou4rmrJLsV1XnVNXb6F1tcxm9i3StSLJDkmXMo8trS7oTc0Rzxva3fVkHvDjJP9O7IuPHJq6sqvOT/BC4GLiE3uHOiU4EFlfVj7rHX5PkJcBnk9yle8xbgZ9uZo73J7kfEHpXhTy/W/4LeodGN12NU9L8Y45ozngV3e1EkuXAqVX1J9uwjY8CP6yq4+ZqLkmjwxzRXPNIiPqS5Dx6h1hf13oWSaPJHNFkHgmRJElNeGKqJElqwhIiSZKasIRIkqQmLCGSJKkJS4gkSWri/wOtSEsCCY5dxwAAAABJRU5ErkJggg==\n", 379 | "text/plain": [ 380 | "
" 381 | ] 382 | }, 383 | "metadata": { 384 | "needs_background": "light" 385 | }, 386 | "output_type": "display_data" 387 | }, 388 | { 389 | "data": { 390 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAemUlEQVR4nO3deZhkBXnv8e9vumdhBoYBWYKADsGAS4JIRuO+4BIlLkluFnPlXreIyfWJxixuMSbm8caoiYlJnqvhXiJEuZqoaBBNxGhE9LoEERElRoPINuzbwMAwy3v/qDNJ09tU95zTp6v4fp6nn6k6Vf2et7rr/ObtU6fqpKqQJElaaiv6bkCSJN03OYRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOITcByW5I8kPz3P75UmetpQ9SRpP5o3m4xByH1RV+1bVZQBJTk/ylqVYb5IXJfnCUqxL0vLQV95oNDiESJKkXjiEjIkkL07y8SnXv5fk76ZcvzLJ8c3lSvKgJKcALwBe0+wy/fiUkscnuTjJbUn+NsmaKbVe1tS/OcnZSe7fLN/Y1J6cct/PJfnlJA8B3gM8plnXrfM8jkuTbElyWZKXT7v9eUkuSnJ7kn9P8sxm+f5JTkuyOcnVSd6SZGIvfqSS5rCUeTNtvUcn+WySm5LcmOTMJBum3H5kkrOS3NDc5y+n3PaSJltuSfKpJA9s8UeiRXIIGR/nAU9IsiLJYcBK4HEAzeux+wIXT/2GqjoVOBN4e7PL9DlTbv4F4JnAUcBxwIuaWicCb21uPwz4AfDBPTVXVZcCvwJ8qVnXhjnuej3wbGA98GLgT5Oc0Kz7UcDfAL8NbACeCFzefN8ZwA7gQcAjgGcAv7ynviQtypLkzSzCIH/uDzwEOBL4/Wa9E8A5DDJpI3A4TTYl+WngDcDPAgcD5wMfWORjV4sm93wXjYKquizJFuB44BjgUwz+ungw8Bjg/KratYCSf15V1wA0f7Ec3yx/AfDXVXVhc9vrgVuSbGzpcXxiytXzkpwLPAG4EHhps+5PN7df3fRwKPAsYENV3QXcmeRPgVOAv2qjL0n/aQnzZvp6vwd8r7l6Q5J3Ar/XXH8Ug+Hkt6tqR7Ns9zFoLwfe2vwxRJI/BN6Q5IFV9YMF9KmWOYSMl/OAJzPYG3AecCvwJAahcN4Ca1075fJWBhs3zb8X7r6hqu5IchODvzquXlTXUyR5FoNQOYbBnrq1wDebm48EPjnLtz2QwV9im5PsXrYCuHJv+5E0p6XIm3tJcgjw5wz+MNmPwXZ+S3PzkcAPpgwgUz0QeFeSP5lajkFuOYT0yJdjxsvuUHhCc/k8BqHwJOYOhYWeRvkaBhs0AEnWAfdjMIDc2SxeO+X+PzTsupKsBj4C/DFwaPOSzScZhAUMhoqjZ/nWK4FtwEFVtaH5Wl9VDxv6UUlaqKXIm+ne2tQ4rqrWAydz73x4wNRj0qa4Enj5lHzYUFX7VNX/28t+tJccQsbLecBTgH2q6ioGr3s+k8GQ8PU5vuc6YM738M/i/wIvTnJ8MzT8IfCVqrq8qm5gMIycnGQiyUu499BwHXBEklVz1F4FrAZuAHY0e0WeMeX205p1P7V5LfrwJA+uqs3AucCfJFnf3HZ0kict4HFJWpilyJvp9gPuAG5NcjiD48N2+yqwGfijJOuSrEnyuOa29wCvT/Iw+I8D2X9+L/pQSxxCxkhV/RuDDfT85vrtwGXAF6tq5xzfdhrw0CS3JvnYEOv4DPC7DPZYbGYwZDx/yl1exiAYbgIeBkz9S+OzwLeAa5PcOEvtLcArgb9jsIv1vwJnT7n9qzQHqwK3MQjB3Xtl/juDIebbzfd+mMGBs5I6sBR5M4s3Aycw2P4/AZw1pZ+dwHMYvDx0BXAV8IvNbR8F3gZ8MMntwCUMjiNTz1K1t3vHJEmSFs49IZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcj8WFlqybX1prVc33K9+Klq2Nydy3kgwKH1NUBxF3VnezmqbVjn25OB7NrrjcN74XJ9dvbLwo8aM3Nrde84sod3HTzruz5nqNr5ap1tWafA1qvm11dbZvtl+yu127q1oqOnpLppu6Ote3/XV/r53qj0d45Zu0Nrde88qqd3LzAHBmJIWTN6g08+mEv3/MdFyj3zPbBei3U3bqt/ZrbuvkPjR3d/Ax2HXpgJ3Vvfcj6TurevrH98DjkxL3+ANlZfewhezxVz4Kd+KzrW6+53KzZ5wBOeOyvtV538s5utqEV97T/n08XNaG7fNq1poO/DoBd+3TzX99NP7Z2z3daoG0/eXvrNQE+uan9M1qcdNKMT17YI1+OkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvehsCEny10muT3LJlGUHJvl0ku82/x7Q1foljT5zRBpvXe4JOR145rRlrwM+U1U/AnymuS5Jczkdc0QaW50NIVX1eeDmaYufB5zRXD4D+Omu1i9p9Jkj0nibXOL1HVpVmwGqanOSQ+a6Y5JTgFMAVq/ZwPb9V7fezOSd3cxgK5LWa9ba9h8/QLZu66TuzrUrO6m7Y037P1uA7Q+/o/Warz3qH1uvCbD/in1arzkxWoeHLSpHVq07gLsOaj/yJtZPtF4TYJ8bt7dec9vBa1qvCbBrspvtcuLuXZ3U3blPN8/3m0/Y0XrNt/3oOa3XBDhsct/Wa67MLQv+nmWbPFV1alVtqqpNK1eu67sdSSNoao5MrjZHpOVmqYeQ65IcBtD8e/0Sr1/S6DNHpDGx1EPI2cALm8svBP5+idcvafSZI9KY6PItuh8AvgQcm+SqJC8F/gh4epLvAk9vrkvSrMwRabx1dmBqVf3SHDc9tat1Shov5og03pbtgamSJGm8OYRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReTPbdwDBqItyz/0i0CsCKlRPt19y2o/WaABN3b++k7o59V3ZS95aHVSd1T3rQpa3X3LBia+s1AXZWWq9ZdPNzXU5SMLmt/ce59aBu/pbbeuia1mtu+F4323s6+nN26yHd5MgdR7a/DQH82IMva73m4ZO3tF4TYGcHm/xicsQ9IZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRcOIZIkqRe9DCFJXp3kW0kuSfKBJGv66EPS6DJHpNG35ENIksOBVwKbqupHgQng+Uvdh6TRZY5I46Gvl2MmgX2STAJrgWt66kPS6DJHpBE3udQrrKqrk/wxcAVwF3BuVZ077zcFdk2m9V627zvRek2AyTvbr1kTK9svCmR7N3uwbzlmVSd19z/2xk7qHrJqS+s1765ufmewo6O6o2MxObJzFdz+wG62+S6suKf9mjf+WDfPyTU3VSd17z6w/dwHWPHIWzupe8Ta9uteu2ND6zUBdqy+qfWai3kW9PFyzAHA84CjgPsD65KcPMv9TklyQZILtm/r4H91SSNrMTmyc6s5Ii03fbwc8zTg+1V1Q1VtB84CHjv9TlV1alVtqqpNK1evW/ImJS1rC86RibXmiLTc9DGEXAE8OsnaJAGeClzaQx+SRpc5Io2BJR9CquorwIeBC4FvNj2cutR9SBpd5og0Hpb8wFSAqvo94Pf6WLek8WCOSKPPT0yVJEm9cAiRJEm92OPLMUl+dpbFtwHfrKrr229J0rgxRyTNZphjQl4KPAb45+b6k4EvA8ck+YOqel9HvUkaH+aIpBmGGUJ2AQ+pqusAkhwKvBv4CeDzgOEhaU/MEUkzDHNMyMbdwdG4Hjimqm4GtnfTlqQxY45ImmGYPSHnJzkH+FBz/b8An0+yDujmA/gljRtzRNIMwwwhr2AQGI8DAvwN8JGqKuApHfYmaXyYI5Jm2OMQ0oTEh5svSVowc0TSbPZ4TEiSn03y3SS3Jbk9yZYkty9Fc5LGgzkiaTbDvBzzduA5VeXJoSQtljkiaYZh3h1zncEhaS+ZI5JmGGZPyAVJ/hb4GLBt98KqOquzriSNG3NE0gzDDCHrga3AM6YsK8DwkDQsc0TSDMO8O+bFS9GIpPFljkiazZxDSJLXVNXbk/wFg79Y7qWqXtlpZ5JGnjkiaT7z7QnZfRDZBUvRyHx2TcC2/dN63Vox0XrNQeH269YwhxAvwoodqzupe/f9OinL9i1rO6l78e2Ht17zAatubL0mwLbV17Rec8Z00J5lkyM1CXcf1P4jnbyj/WwCmOhim++mVe4+sJvC6eiJueX6dZ3UvfHg9uv+4J6DWq8JcPfa6/Z8pwWqRSTJnENIVX28uXh+VV222KYk3XeZI5LmM8yBqacnORz4FwZnuzy/qr7ZbVuSxow5ImmGYQ5MfWKSVcAjgScDn0iyb1Ud2HVzksaDOSJpNnscQpI8HnhC87UBOAc4v+O+JI0Rc0TSbIZ5OeY8BgeVvRX4ZFXd021LksaQOSJphmGGkPsxOP32E4FXJtkFfKmqfrfTziSNE3NE0gzDHBNya5LLgCOBI4DHAiu7bkzS+DBHJM1mmGNC/h34DvAF4D3Ai92VKmkhzBFJsxnm5ZgfqapdnXciaZyZI5JmGOYz+e6f5KNJrk9yXZKPJDmi884kjRNzRNIMwwwh7wXOBu4PHA58vFkmScMyRyTNMMwQcnBVvbeqdjRfpwMHd9yXpPFijkiaYZgh5MYkJyeZaL5OBm7qujFJY8UckTTDMEPIS4BfAK4FNgM/1yyTpGGZI5JmGOZzQq4AnrsEvUgaU+aIpNkM8zkhBwMvAzZOvX9V+VeMpKGYI5JmM8znhPw9gxNN/ROws9t2JI0pc0TSDMMMIWur6rWddyJpnJkjkmYY5sDUc5Kc1OZKk2xI8uEk/5rk0iSPabO+pGXHHJE0w5x7QpJsAQoI8IYk24DtzfWqqvV7sd53Af9YVT+XZBWwdi9qSVqmzBFJ85lzCKmq/bpYYZL1DE7n/aJmPfcAnshKGkPmiKT5DPPumMcBF1XVnc0HDJ0A/FnzlrvF+GHgBuC9SR4OfA14VVXdOW29pwCnAKxadwDp4lC2jk6nVRPt11yxvf2aXZq4u5u62+7p4IcL3LZtn9Zr3rxz39ZrAmzv4LjOolqvOdVyyJHJ9Qew6rYs+jHMZXJr6yUHde9q/3eybUP7jx9g9W3dPH9qmAMGFmHylmEOh1y4b26+f+s1j1t/des1Abbuaj9HdtXCnwfD/IrfDWxtNvTXAD8A3rfgNf2nSQYB9O6qegRwJ/C66XeqqlOralNVbZrcZ91erE7SMtB/jqw1R6TlZpghZEdVFfA84F1V9S5gb3axXgVcVVVfaa5/mEGYSBpf5oikGYYZQrYkeT1wMvCJJBPAysWusKquBa5Mcmyz6KnAtxdbT9JIMEckzTDMEPKLwDbgpc2Gfzjwjr1c768BZya5GDge+MO9rCdpeTNHJM0wzLljrgXeOeX6FcDf7M1Kq+oiYNPe1JA0OswRSbPp6NhjSZKk+TmESJKkXsw7hCSZSPL+pWpG0vgxRyTNZd4hpKp2Agc3H4ksSQtmjkiayzAfG3c58MUkZzP4QCAAquqdc36HJN3b5ZgjkqYZZgi5pvlawd59uJCk+y5zRNIMw7xF980ASdZNPy+DJA3DHJE0mz2+OybJY5J8G7i0uf7wJP+r884kjQ1zRNJshnmL7p8BPwncBFBV32BwCm1JGpY5ImmGoT4npKqunLao/XMASxpr5oik6YY5MPXKJI8FqnmL3StpdqlK0pDMEUkzDLMn5FeAVzA44dRVDE4U9Youm5I0dswRSTMM8+6YG4EXLEEvksaUOSJpNnMOIUn+Aqi5bq+qV3bSkaSxYY5Ims98e0IuWLIuJI0rc0TSnOYcQqrqjKVsRNL4MUckzWePx4QkORh4LfBQYM3u5VV1Yod93VtBdrVfNnPuJN47K7Z1VHiETN6VTurW1olO6m7bOcwbxRZmVw31Dvj7hOWQIxPbYP/L2g+SNTdtb70mwM3Hrm695n5XdBCkwN0HdrO979inm7pdhX86qHv9PeN9loNhUvJMBm+lOwp4M4MTUf1Lhz1JGj/miKQZhhlC7ldVpwHbq+q8qnoJ8OiO+5I0XswRSTMMsw96977GzUl+isGZMI/oriVJY8gckTTDMEPIW5LsD/wm8BfAeuDVnXYladyYI5JmGObDys5pLt4GPKXbdiSNI3NE0mz2eExIkmOSfCbJJc3145K8sfvWJI0Lc0TSbIY5MPV/A6+neU23qi4Gnt9lU5LGjjkiaYZhhpC1VfXVact2dNGMpLFljkiaYZgh5MYkR9Oc/yHJzwGbO+1K0rgxRyTNMMy7Y14BnAo8OMnVwPeBkzvtStK4MUckzTDMu2MuA56WZB2woqq2dN+WpHFijkiazZxDSJLfmGM5AFX1zo56kjQmzBFJ85lvT8jus+YcCzwSOLu5/hzg8102JWlsmCOS5jTnEFJVbwZIci5wwu7dp0l+H/jQknQnaaSZI5LmM8y7Yx4A3DPl+j3Axk66kTSuzBFJMwzz7pj3AV9N8lEGb6/7GeCMTruSNG7MEUkzDPPumP+Z5B+AJzSLXlxVX++2LUnjxByRNJth9oRQVRcCF7a54iQTwAXA1VX17DZrS1p+zBFJ0w1zTEhXXgVc2uP6JY0+c0QaYb0MIUmOAH4K+D99rF/S6DNHpNHX156QPwNeA+zqaf2SRp85Io24oY4JaVOSZwPXV9XXkjx5nvudApwCsGrtAUzeVa33Uh2NYOkgEnd19Jua3Nb+zxVoTlPWvmzv5pe2bedEJ3W7sL3af4J19OvqzGJyZHL9Adx6dPvPnxUbV7deE2Dl7e3/Vu46qJvtp6t8mtjWTd01N6STuncdsbL1moesGu8zHPSxJ+RxwHOTXA58EDgxyfun36mqTq2qTVW1aeXqdUvdo6TlbcE5MrnWHJGWmyUfQqrq9VV1RFVtBJ4PfLaqPJumpKGZI9J46PPdMZIk6T5syY8JmaqqPgd8rs8eJI02c0QaXe4JkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvXAIkSRJvZjsu4GhpPlq2eTd1X7Rjkxu66bXWtHBDxbIzk7KdlZ3Rdr/+e7s4kmrRasJ2L5f+7/ntZu7+T2v2NFBzbu6yZGJe7qpm12dlOWOw7v5+3vV6u2t17xl+9rWawJ08d/fYn5d7gmRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9WPIhJMmRSf45yaVJvpXkVUvdg6TRZo5I42Gyh3XuAH6zqi5Msh/wtSSfrqpv99CLpNFkjkhjYMn3hFTV5qq6sLm8BbgUOHyp+5A0uswRaTz0sSfkPyTZCDwC+Most50CnAKwZmI/Djj/ivYbWNHNDFZbt7Zf8872awLs2ratk7prqzqpe+BTf7yTujd9/4dar/nuYw5uvSbA9ie1v9nesOMLrddcKkPnCGs56g1fan39E+vXt14TYFcHObJi/2563XnTzZ3U7cqGAw7opO6Wy49pvea5Rz+69ZoAO5/f/v9/1+/41IK/p7cDU5PsC3wE+PWqun367VV1alVtqqpNq1bss/QNSlr2FpIjK1m99A1KmlcvQ0iSlQyC48yqOquPHiSNNnNEGn19vDsmwGnApVX1zqVev6TRZ45I46GPPSGPA/4bcGKSi5qvk3roQ9LoMkekMbDkB6ZW1ReALPV6JY0Pc0QaD35iqiRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6kWqqu8e9ijJFuA7ffcxpIOAG/tuYgFGqd9R6hVGq99jq2q/vpvokjnSqVHqd5R6hdHqd8E5MtlVJy37TlVt6ruJYSS5YFR6hdHqd5R6hdHqN8kFffewBMyRjoxSv6PUK4xWv4vJEV+OkSRJvXAIkSRJvRiVIeTUvhtYgFHqFUar31HqFUar31HqdbFG6TGOUq8wWv2OUq8wWv0uuNeRODBVkiSNn1HZEyJJksaMQ4gkSerFshtCkvx1kuuTXDJl2TuS/GuSi5N8NMmGPnvcbbZep9z2W0kqyUF99DabufpN8mtJvpPkW0ne3ld/U83xPDg+yZeTXJTkgiSP6rPH3ZIcmeSfk1za/Axf1Sw/MMmnk3y3+feAvnuFeftdltvZYpgj3RilDAFzpEut5UhVLasv4InACcAlU5Y9A5hsLr8NeFvffc7Va7P8SOBTwA+Ag/rucw8/26cA/wSsbq4f0nef8/R6LvCs5vJJwOf67rPp5TDghObyfsC/AQ8F3g68rln+umX0vJ2r32W5nbX4/FmWj2+UcmSUMmSefs2Rbvtd0Ha27PaEVNXngZunLTu3qnY0V78MHLHkjc1itl4bfwq8BlhWR/3O0e+vAn9UVdua+1y/5I3NYo5eC1jfXN4fuGZJm5pDVW2uqguby1uAS4HDgecBZzR3OwP46X46vLe5+l2u29limCPdGKUMAXOkS23lyLIbQobwEuAf+m5iLkmeC1xdVd/ou5chHQM8IclXkpyX5JF9NzSPXwfekeRK4I+B1/fczwxJNgKPAL4CHFpVm2GwwQKH9NfZ7Kb1O9Wy3s5asKwf34jlyChlCJgjrdubHBmpISTJ7wA7gDP77mU2SdYCvwO8qe9eFmASOAB4NPDbwN8lSb8tzelXgVdX1ZHAq4HTeu7nXpLsC3wE+PWqur3vfvZkrn6X+3a2t5b74xvBHBmlDAFzpFV7myMjM4QkeSHwbOAF1bzYtAwdDRwFfCPJ5Qx2Q12Y5Id67Wp+VwFn1cBXgV0MTpi0HL0QOKu5/CFgWRxQBpBkJYMN8cyq2t3jdUkOa24/DFg2u6nn6HdUtrNFG5HHN2o5MkoZAuZIa9rIkZEYQpI8E3gt8Nyq2tp3P3Opqm9W1SFVtbGqNjLYOE+oqmt7bm0+HwNOBEhyDLCK5XvGxmuAJzWXTwS+22Mv/6H5q+804NKqeueUm85mEHg0//79Uvc2m7n6HZXtbLFG5fGNYI6MUoaAOdKK1nKk7yNsp38BHwA2A9sZbHwvBb4HXAlc1Hy9p+8+5+p12u2Xs0yOap/nZ7sKeD9wCXAhcGLffc7T6+OBrwHfYPDa44/33WfT6+MZHOx28ZTn6EnA/YDPMAi5zwAH9t3rHvpdlttZi8+fZfn4RilHRilD5unXHOm23wVtZ35suyRJ6sVIvBwjSZLGj0OIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOISPK5JJv67kPS6DJHtBgOIepckom+e5A02syR8eQQch+RZGOSf01yRpKLk3y4OUfF9Pu9O8kFSb6V5M3Nsqcm+eiU+zw9yVnN5Wck+VKSC5N8qDmPAEkuT/KmJF8Afn7aOn4+ySVJvpHk882yFyX5yyn3OSfJk5vLdyR5W5KvJfmnJI9q/uq6rDnRl6QlYI6obQ4h9y3HAqdW1XHA7cD/mOU+v1NVm4DjgCclOQ74LPCQJAc393kx8N4kBwFvBJ5WVScAFwC/MaXW3VX1+Kr64LR1vAn4yap6ODDMxr8O+FxV/TiwBXgL8HTgZ4A/GOL7JbXHHFFrHELuW66sqi82l9/P4GN3p/uFJBcCXwceBjy0Bh+r+z7g5CQbgMcwOD3zo4GHAl9MchGD8xo8cEqtv52jjy8Cpyd5GTDMLtZ7gH9sLn8TOK+qtjeXNw7x/ZLaY46oNZN9N6AlNf0z+u91PclRwG8Bj6yqW5KcDqxpbn4v8HHgbuBDVbWjOYHRp6vql+ZY352zNlH1K0l+Avgp4KIkxzM45fPUoXjNlMvb6z/PL7AL2NbU2ZXE57C0tMwRtcY9IfctD0jymObyLwFfmHb7egYb/G1JDgWetfuGqrqGwdkn3wic3iz+MvC4JA8CSLK2OYvmvJIcXVVfqao3MTjb5pEMTtJ1fJIVSY5kGZ1eW9K9mCNqjdPffculwAuT/BWDMzK+e+qNVfWNJF8HvgVcxmB351RnAgdX1beb+9+Q5EXAB5Ksbu7zRuDf9tDHO5L8CBAGZ4X8RrP8+wx2je4+G6ek5cccUWs8i+59RJKNwDlV9aN7UeMvga9X1Wlt9SVpdJgjapt7QjSUJF9jsIv1N/vuRdJoMkc0nXtCJElSLzwwVZIk9cIhRJIk9cIhRJIk9cIhRJIk9cIhRJIk9eL/A+jYVxelCAPxAAAAAElFTkSuQmCC\n", 391 | "text/plain": [ 392 | "
" 393 | ] 394 | }, 395 | "metadata": { 396 | "needs_background": "light" 397 | }, 398 | "output_type": "display_data" 399 | } 400 | ], 401 | "source": [ 402 | "policy, q = monte_carlo_with_soft(env)\n", 403 | "v = q.max(axis=-1)\n", 404 | "plot(policy.argmax(-1))\n", 405 | "plot(v)" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 14, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# 重要性采样策略评估\n", 415 | "\n", 416 | "def evaluate_monte_carlo_with_importance_resample(env, policy, behavior_policy, episode_num=500000):\n", 417 | " q = np.zeros_like(policy)\n", 418 | " c = np.zeros_like(policy)\n", 419 | " \n", 420 | " for _ in tqdm(range(episode_num)):\n", 421 | " state_actions = []\n", 422 | " observation = env.reset()\n", 423 | " \n", 424 | " while True:\n", 425 | " state = ob2state(observation)\n", 426 | " action = np.random.choice(env.action_space.n, p=behavior_policy[state])\n", 427 | " state_actions.append([state, action])\n", 428 | " \n", 429 | " observation, reward, done, _ = env.step(action)\n", 430 | " if done:\n", 431 | " break\n", 432 | " \n", 433 | " g = reward\n", 434 | " rho = 1.0 # 重要性采样比率\n", 435 | " for state, action in state_actions:\n", 436 | " c[state][action] += rho\n", 437 | " q[state][action] += (rho / c[state][action] * (g - q[state][action]))\n", 438 | " rho *= (policy[state][action] / behavior_policy[state][action])\n", 439 | " if rho == 0:\n", 440 | " break\n", 441 | " return q" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 15, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "application/vnd.jupyter.widget-view+json": { 452 | "model_id": "4e4bbd925eef4420b90a066c60bb73f5", 453 | "version_major": 2, 454 | "version_minor": 0 455 | }, 456 | "text/plain": [ 457 | "HBox(children=(FloatProgress(value=0.0, max=500000.0), HTML(value='')))" 458 | ] 459 | }, 460 | "metadata": {}, 461 | "output_type": "display_data" 462 | }, 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "\n" 468 | ] 469 | }, 470 | { 471 | "data": { 472 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAeYklEQVR4nO3deZhkB1nv8e+ve7ZMkkkIJJGEmMFo2HwgxAGJYQ2LgCzqdcFL7oWARL08grggKKL4cEVAUdR7wXgjiZALCgRNgksQJAQuBENIQiAiGEK2IQvZZ5JML+/9o85gZ3qZ6plz+nTVfD/P089Unap+z9vVdX7z9qlTdVJVSJIkrbSJvhuQJEn7JocQSZLUC4cQSZLUC4cQSZLUC4cQSZLUC4cQSZLUC4eQfVCSu5N8zxK3X53kGSvZk6TxZN5oKQ4h+6CqOqCqrgJIckaSN6/EepO8NMmnV2JdklaHvvJGo8EhRJIk9cIhZEwkOSXJuXOufz3J38y5fm2S45rLleR7k5wKvBh4bbPL9Nw5JY9LcnmSO5L8dZINc2q9oql/a5JzkhzRLN/c1F4z576fTPKzSR4BvBs4oVnX7Uv8HFcmuSvJVUl+bpfbX5jk0iR3JvmPJM9ulh+U5PQkW5Ncn+TNSSb34iGVtIiVzJtd1ntMkk8k+XaSW5KcleTgObcfleTsJDc39/mzObe9rMmW25L8U5KjW3xItIccQsbHBcCTkkwkeTCwFjgRoHk99gDg8rnfUFWnAWcBb2t2mT5/zs0/BTwbeCjwaOClTa2TgLc0tz8Y+Cbwgd01V1VXAj8PfLZZ18GL3PUm4HnAJuAU4I+SHN+s+/HAXwG/BhwMPBm4uvm+M4Fp4HuBxwLPAn52d31J2iMrkjcLCIP8OQJ4BHAU8DvNeieB8xhk0mbgSJpsSvKjwG8APw4cClwIvH8Pf3a1aM3u76JRUFVXJbkLOA44FvgnBn9dPBw4AbiwqmaXUfJPquoGgOYvluOa5S8G/rKqLmluez1wW5LNLf0cH51z9YIk5wNPAi4BXt6s+2PN7dc3PRwOPAc4uKruAbYl+SPgVODP2+hL0n9awbzZdb1fB77eXL05yTuA326uP57BcPJrVTXdLNt5DNrPAW9p/hgiye8Bv5Hk6Kr65jL6VMscQsbLBcBTGewNuAC4HXgKg1C4YJm1vjXn8nYGGzfNv5fsvKGq7k7ybQZ/dVy/R13PkeQ5DELlWAZ76jYCX2puPgr4+wW+7WgGf4ltTbJz2QRw7d72I2lRK5E395PkMOBPGPxhciCD7fy25uajgG/OGUDmOhp4Z5I/nFuOQW45hPTIl2PGy85QeFJz+QIGofAUFg+F5Z5G+QYGGzQASfYHHshgANnWLN445/7fNey6kqwHPgz8AXB485LN3zMICxgMFccs8K3XAvcBD6qqg5uvTVX1qKF/KknLtRJ5s6u3NDUeXVWbgJO5fz5899xj0ua4Fvi5OflwcFXtV1X/by/70V5yCBkvFwBPA/arqusYvO75bAZDwhcX+Z4bgUXfw7+A/wuckuS4Zmj4PeCiqrq6qm5mMIycnGQyycu4/9BwI/CQJOsWqb0OWA/cDEw3e0WeNef205t1P715LfrIJA+vqq3A+cAfJtnU3HZMkqcs4+eStDwrkTe7OhC4G7g9yZEMjg/b6fPAVuD3k+yfZEOSE5vb3g28Psmj4DsHsv/kXvShljiEjJGq+ncGG+iFzfU7gauAz1TVzCLfdjrwyCS3J/nbIdbxceC3GOyx2MpgyHjRnLu8gkEwfBt4FDD3L41PAF8GvpXklgVq3wW8CvgbBrtY/ytwzpzbP09zsCpwB4MQ3LlX5r8zGGK+0nzvhxgcOCupAyuRNwt4E3A8g+3/o8DZc/qZAZ7P4OWha4DrgJ9ubvsI8FbgA0nuBK5gcByZepaqvd07JkmStHzuCZEkSb1wCJEkSb1wCJEkSb1wCJEkSb0YiQ8rW7dmY+239qD2C3d1UG4nZTvqdbarx2C06tbscj7ccTgTD+tm83ro+gVPu7NXrrl2mm/fOpvd33N0rZvsJkdqspuHLV081bva3rvKp3Tz2NZEN39/Z6b9HJk9upvH9pgNt7Ze89o9yJGRGEL2W3sQJxzzsvYLTy30wXp7L13U7eDJDVA7dnRSlx1TnZTtqt/Zbdt2f6dlWv8X37X7O+2B9x7zkdZrnvScm1qvudrst/YgTtj8ktbrzm7ar/WaAJla7F2ue1Hzno629w6GeADWdvNf1OwBC54fb69N3LG99Zr3/K9u/p86++Htnzrn6c+9ednf48sxkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpFw4hkiSpF50NIUn+MslNSa6Ys+yQJB9L8rXm3wd0tX5Jo88ckcZbl3tCzgCevcuy1wEfr6rvAz7eXJekxZyBOSKNrc6GkKr6FHDrLotfCJzZXD4T+NGu1i9p9Jkj0nhbs8LrO7yqtgJU1dYkhy12xySnAqcCbFh7ELV2sv1u1ozOITHJTDd1WddJXdZ09NSq2U7KpoN+f/Wof2y9JsBBE/u1XnNytA4P2/Mc2bi+9WZ2HLKh9ZoAs2vTes39rru79ZqdSvuPAQDT3eTI1OGbWq95ylHntl4TYNNE+8/bSZb/+1q1yVNVp1XVlqrasm7Nxr7bkTSCzBFpdVvpIeTGJA8GaP69aYXXL2n0mSPSmFjpIeQc4CXN5ZcAf7fC65c0+swRaUx0+Rbd9wOfBR6W5LokLwd+H3hmkq8Bz2yuS9KCzBFpvHV2YGpV/cwiNz29q3VKGi/miDTeVu2BqZIkabw5hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF44hEiSpF6s6buBYdREmNm4rvW6k/dMtV4ToDaub7/o9vvarwmwZrKTstnR0WM7Nd1JXdL+PL4pHf3O6OD5tQ+Y2TDJncduar3u2rtnWq8JMLWx/efk2gO7ee6suf3eTurSTTzB2m4Kbzui/cf34eu3tl4TYDJrW68ZsuzvcU+IJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqhUOIJEnqRS9DSJLXJPlykiuSvD/Jhj76kDS6zBFp9K34EJLkSOBVwJaq+n5gEnjRSvchaXSZI9J46OvlmDXAfknWABuBG3rqQ9LoMkekEbdmpVdYVdcn+QPgGuAe4PyqOn/JbwrUZFrvZfqAda3XBJjcNtV+0fXd9JrZ2U7qcvf2Tspmw/pO6s5ub7/fe6ubzWuqZlqvWVTrNbu0JzkyMTXLxq33td7LXUd385xcf3v72+a9h3XTKx3VnZjq5nm54cZ7Oqm75t72f2e3z2xsveZAB/9P7YE+Xo55APBC4KHAEcD+SU5e4H6nJrk4ycU7pratdJuSVjFzRBoPfbwc8wzgG1V1c1VNAWcDP7TrnarqtKraUlVb1q3df8WblLSqmSPSGOhjCLkGeEKSjUkCPB24soc+JI0uc0QaAys+hFTVRcCHgEuALzU9nLbSfUgaXeaINB5W/MBUgKr6beC3+1i3pPFgjkijz09MlSRJvXAIkSRJvdjtyzFJfnyBxXcAX6qqm9pvSdK4MUckLWSYY0JeDpwA/Etz/anA54Bjk/xuVb23o94kjQ9zRNI8wwwhs8AjqupGgCSHA+8CfhD4FGB4SNodc0TSPMMcE7J5Z3A0bgKOrapbWS2f+ypptTNHJM0zzJ6QC5OcB3ywuf5fgE8l2R+4vbPOJI0Tc0TSPMMMIa9kEBgnAgH+CvhwVRXwtA57kzQ+zBFJ8+x2CGlC4kPNlyQtmzkiaSG7PSYkyY8n+VqSO5LcmeSuJHeuRHOSxoM5Imkhw7wc8zbg+VXlyaEk7SlzRNI8w7w75kaDQ9JeMkckzTPMnpCLk/w18LfAfTsXVtXZnXUladyYI5LmGWYI2QRsB541Z1kBhoekYZkjkuYZ5t0xp6xEI5LGlzkiaSGLDiFJXltVb0vypwz+YrmfqnpVp51JGnnmiKSlLLUnZOdBZBevRCO7NZnWS2bHbOs1ATLbft3MzLReE4DpjuquX9dN3W3bu6nbgWumD+mk7mPX39J6zXnTQXtWTY5Mb5zglsfs13rdialuHr3p9ZOt10xHv+iJjj54f2Kmm4a3HX5AJ3W7eByu2nFY+0WB29Z/rfWa0yz//75Fh5CqOre5eGFVXbWnTUnad5kjkpYyzIGpZyQ5EvhXBme7vLCqvtRtW5LGjDkiaZ5hDkx9cpJ1wOOApwIfTXJAVXWzr1nS2DFHJC1kt0NIkicCT2q+DgbOAy7suC9JY8QckbSQYV6OuYDBQWVvAf6+qnZ025KkMWSOSJpnmCHkgQxOv/1k4FVJZoHPVtVvddqZpHFijkiaZ5hjQm5PchVwFPAQ4IeAtV03Jml8mCOSFjLMMSH/AXwV+DTwbuAUd6VKWg5zRNJChnk55vuqqptP9ZK0rzBHJM0zMcR9jkjykSQ3JbkxyYeTPKTzziSNE3NE0jzDDCHvAc4BjgCOBM5tlknSsMwRSfMMM4QcWlXvqarp5usM4NCO+5I0XswRSfMMM4TckuTkJJPN18nAt7tuTNJYMUckzTPMEPIy4KeAbwFbgZ9olknSsMwRSfMM8zkh1wAvWIFeJI0pc0TSQob5nJBDgVcAm+fev6r8K0bSUMwRSQsZ5nNC/o7Biab+GZjpth1JY8ockTTPMEPIxqr69c47kTTOzBFJ8wxzYOp5SZ7b5kqTHJzkQ0n+LcmVSU5os76kVccckTTPontCktwFFBDgN5LcB0w116uqNu3Fet8J/GNV/USSdcDGvaglaZUyRyQtZdEhpKoO7GKFSTYxOJ33S5v17AA8kZU0hswRSUsZ5t0xJwKXVtW25gOGjgf+uHnL3Z74HuBm4D1JHgN8AXh1VW3bZb2nAqcCrN9wMNMbJvdwdYtbO12t1wSote33ykw3vWa6o2MEp6a7qTuRTspmTftnld8+u771mgAz1c1zoUurIUfWHvAA0sE2P7umm+fkunva77W6aZUa5oX9PbD2zm7Oebjm3m4eiKmN7de9t9rPJoD1GeaQ0OWZYPk//zBPnXcB25sN/bXAN4H3LntN/2kNgwB6V1U9FtgGvG7XO1XVaVW1paq2rF23/16sTtIq0HuOrNnPHJFWm2GGkOmqKuCFwDur6p3A3uxivQ64rqouaq5/iEGYSBpf5oikeYYZQu5K8nrgZOCjSSaBPd4/VFXfAq5N8rBm0dOBr+xpPUkjwRyRNM8wQ8hPA/cBL282/COBt+/len8ROCvJ5cBxwO/tZT1Jq5s5ImmeYc4d8y3gHXOuXwP81d6stKouBbbsTQ1Jo8MckbSQjo5pliRJWppDiCRJ6sWSQ0iSySTvW6lmJI0fc0TSYpYcQqpqBji0+UhkSVo2c0TSYob5yLSrgc8kOYfBBwIBUFXvWPQ7JOn+rsYckbSLYYaQG5qvCfbuw4Uk7bvMEUnzDPMW3TcBJNl/1/MySNIwzBFJC9ntu2OSnJDkK8CVzfXHJPnfnXcmaWyYI5IWMsxbdP8Y+GHg2wBVdRmDU2hL0rDMEUnzDPU5IVV17S6LOjr/u6RxZY5I2tUwB6Zem+SHgGreYvcqml2qkjQkc0TSPMPsCfl54JUMTjh1HYMTRb2yy6YkjR1zRNI8w7w75hbgxSvQi6QxZY5IWsiiQ0iSPwVqsdur6lWddCRpbJgjkpay1J6Qi1esC0njyhyRtKhFh5CqOnMlG5E0fswRSUvZ7TEhSQ4Ffh14JLBh5/KqOqnDvu6vYGJq0T26e1G3g5pAdky3XnNi+72t1+zUmslu6k52U7dm2n+36L2za1uvCXDr7I7Wa07XbOs151oNOTIxDRtua3+bv+/g1ksCUGm/5tp7usm8Ndu7ef7MrO/gQejQ+tvbfxxum96/9ZoAd9dU6zVnFn/ldVHDvDvmLAZvpXso8CYGJ6L612WvSdK+zByRNM8wQ8gDq+p0YKqqLqiqlwFP6LgvSePFHJE0zzAfVrZzn83WJD/C4EyYD+muJUljyByRNM8wQ8ibkxwE/Arwp8Am4DWddiVp3JgjkuYZ5sPKzmsu3gE8rdt2JI0jc0TSQnZ7TEiSY5N8PMkVzfVHJ3lD961JGhfmiKSFDHNg6l8Ar6d5TbeqLgde1GVTksaOOSJpnmGGkI1V9fldlrX/QRiSxpk5ImmeYYaQW5IcQ3P+hyQ/AWzttCtJ48YckTTPMO+OeSVwGvDwJNcD3wBO7rQrSePGHJE0zzDvjrkKeEaS/YGJqrqr+7YkjRNzRNJCFh1CkvzyIssBqKp3dNSTpDFhjkhaylJ7Qg5s/n0Y8DjgnOb684FPddmUpLFhjkha1KJDSFW9CSDJ+cDxO3efJvkd4IMr0p2kkWaOSFrKMO+O+W5g7rnDdwCbO+lG0rgyRyTNM8y7Y94LfD7JRxi8ve7HgDM77UrSuDFHJM0zzLtj/meSfwCe1Cw6paq+2G1bksaJOSJpIcPsCaGqLgEuaXPFSSaBi4Hrq+p5bdaWtPqYI5J2NcwxIV15NXBlj+uXNPrMEWmE9TKEJHkI8CPA/+lj/ZJGnzkijb6+9oT8MfBaYLan9UsafeaINOKGOiakTUmeB9xUVV9I8tQl7ncqcCrA+vUHk6r2e5nuJrtqcrL9muvXtl4TgKluTmSaqfZ/XwC1Y6qTuszOtF7y3urmd3bgRPvPr8nmE0xHxZ7kyLr9Dmayg+fl+jtaLwnA5I72e63R+jWzdls3GX3vIe1vQwA1OToP8EwH/6eyBzX72BNyIvCCJFcDHwBOSvK+Xe9UVadV1Zaq2rJu3f4r3aOk1W3ZObJ2/QEr3aOk3VjxIaSqXl9VD6mqzcCLgE9UlWfTlDQ0c0QaD32+O0aSJO3DVvyYkLmq6pPAJ/vsQdJoM0ek0eWeEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1Is1fTcwlCoyXa2XTfslB3VnZ9svOtNBTSDTM53Ure3bO6mbyY7m5onJ1ktuyFTrNQHuq/afC908u1aX2TWw/UHtP3/SzSbEhg4yb2Kqm9Cb2ZBO6tZ0J2XZ75ZuCs+sb//5NVPjva9gvH86SZK0ajmESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXjiESJKkXqz4EJLkqCT/kuTKJF9O8uqV7kHSaDNHpPGwpod1TgO/UlWXJDkQ+EKSj1XVV3roRdJoMkekMbDie0KqamtVXdJcvgu4EjhypfuQNLrMEWk89LEn5DuSbAYeC1y0wG2nAqcCbJg8kHXfuKn19dd9O1qvCZDJ9me72W3bW68JwLq1nZSte+7tpC6zs93UrfbrvvXTz229JsDUie1vtrfNfL71mitl6BzJ/hz2vsvaX/8Rh7deE2D26mtbrzmxcWPrNQFm7ryzk7qTmzZ1UrdmZjqpO3HIA1qv+cFzn9h6TYANL5xqveYtMxcu+3t6OzA1yQHAh4Ffqqp5z+CqOq2qtlTVlnUT+618g5JWvWXlSDasfIOSltTLEJJkLYPgOKuqzu6jB0mjzRyRRl8f744JcDpwZVW9Y6XXL2n0mSPSeOhjT8iJwH8DTkpyafPVzYvnksaVOSKNgRU/MLWqPg1kpdcraXyYI9J48BNTJUlSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSLxxCJElSL1JVffewW0nuAr7adx9DehBwS99NLMMo9TtKvcJo9fuwqjqw7ya6ZI50apT6HaVeYbT6XXaOrOmqk5Z9taq29N3EMJJcPCq9wmj1O0q9wmj1m+TivntYAeZIR0ap31HqFUar3z3JEV+OkSRJvXAIkSRJvRiVIeS0vhtYhlHqFUar31HqFUar31HqdU+N0s84Sr3CaPU7Sr3CaPW77F5H4sBUSZI0fkZlT4gkSRozDiGSJKkXq24ISfKXSW5KcsWcZW9P8m9JLk/ykSQH99njTgv1Oue2X01SSR7UR28LWazfJL+Y5KtJvpzkbX31N9ciz4PjknwuyaVJLk7y+D573CnJUUn+JcmVzWP46mb5IUk+luRrzb8P6LtXWLLfVbmd7QlzpBujlCFgjnSptRypqlX1BTwZOB64Ys6yZwFrmstvBd7ad5+L9dosPwr4J+CbwIP67nM3j+3TgH8G1jfXD+u7zyV6PR94TnP5ucAn++6z6eXBwPHN5QOBfwceCbwNeF2z/HWr6Hm7WL+rcjtr8fmzKn++UcqRUcqQJfo1R7rtd1nb2arbE1JVnwJu3WXZ+VU13Vz9HPCQFW9sAQv12vgj4LXAqjrqd5F+fwH4/aq6r7nPTSve2AIW6bWATc3lg4AbVrSpRVTV1qq6pLl8F3AlcCTwQuDM5m5nAj/aT4f3t1i/q3U72xPmSDdGKUPAHOlSWzmy6oaQIbwM+Ie+m1hMkhcA11fVZX33MqRjgScluSjJBUke13dDS/gl4O1JrgX+AHh9z/3Mk2Qz8FjgIuDwqtoKgw0WOKy/zha2S79zrertrAWr+ucbsRwZpQwBc6R1e5MjIzWEJPlNYBo4q+9eFpJkI/CbwBv77mUZ1gAPAJ4A/BrwN0nSb0uL+gXgNVV1FPAa4PSe+7mfJAcAHwZ+qaru7Luf3Vms39W+ne2t1f7zjWCOjFKGgDnSqr3NkZEZQpK8BHge8OJqXmxahY4BHgpcluRqBruhLknyXb12tbTrgLNr4PPALIMTJq1GLwHObi5/EFgVB5QBJFnLYEM8q6p29nhjkgc3tz8YWDW7qRfpd1S2sz02Ij/fqOXIKGUImCOtaSNHRmIISfJs4NeBF1TV9r77WUxVfamqDquqzVW1mcHGeXxVfavn1pbyt8BJAEmOBdaxes/YeAPwlObyScDXeuzlO5q/+k4Hrqyqd8y56RwGgUfz79+tdG8LWazfUdnO9tSo/HwjmCOjlCFgjrSitRzp+wjbXb+A9wNbgSkGG9/Lga8D1wKXNl/v7rvPxXrd5farWSVHtS/x2K4D3gdcAVwCnNR3n0v0+kTgC8BlDF57/IG++2x6fSKDg90un/McfS7wQODjDELu48Ahffe6m35X5XbW4vNnVf58o5Qjo5QhS/RrjnTb77K2Mz+2XZIk9WIkXo6RJEnjxyFEkiT1wiFEkiT1wiFEkiT1wiFEkiT1wiFEJPlkki199yFpdJkj2hMOIepcksm+e5A02syR8eQQso9IsjnJvyU5M8nlST7UnKNi1/u9K8nFSb6c5E3Nsqcn+cic+zwzydnN5Wcl+WySS5J8sDmPAEmuTvLGJJ8GfnKXdfxkkiuSXJbkU82ylyb5szn3OS/JU5vLdyd5a5IvJPnnJI9v/uq6qjnRl6QVYI6obQ4h+5aHAadV1aOBO4H/scB9frOqtgCPBp6S5NHAJ4BHJDm0uc8pwHuSPAh4A/CMqjoeuBj45Tm17q2qJ1bVB3ZZxxuBH66qxwDDbPz7A5+sqh8A7gLeDDwT+DHgd4f4fkntMUfUGoeQfcu1VfWZ5vL7GHzs7q5+KsklwBeBRwGPrMHH6r4XODnJwcAJDE7P/ATgkcBnklzK4LwGR8+p9deL9PEZ4IwkrwCG2cW6A/jH5vKXgAuqaqq5vHmI75fUHnNErVnTdwNaUbt+Rv/9rid5KPCrwOOq6rYkZwAbmpvfA5wL3At8sKqmmxMYfayqfmaR9W1bsImqn0/yg8CPAJcmOY7BKZ/nDsUb5lyeqv88v8AscF9TZzaJz2FpZZkjao17QvYt353khObyzwCf3uX2TQw2+DuSHA48Z+cNVXUDg7NPvgE4o1n8OeDEJN8LkGRjcxbNJSU5pqouqqo3Mjjb5lEMTtJ1XJKJJEexik6vLel+zBG1xulv33Il8JIkf87gjIzvmntjVV2W5IvAl4GrGOzunOss4NCq+kpz/5uTvBR4f5L1zX3eAPz7bvp4e5LvA8LgrJCXNcu/wWDX6M6zcUpafcwRtcaz6O4jkmwGzquq79+LGn8GfLGqTm+rL0mjwxxR29wToqEk+QKDXay/0ncvkkaTOaJduSdEkiT1wgNTJUlSLxxCJElSLxxCJElSLxxCJElSLxxCJElSL/4/4GBwVLyHn+UAAAAASUVORK5CYII=\n", 473 | "text/plain": [ 474 | "
" 475 | ] 476 | }, 477 | "metadata": { 478 | "needs_background": "light" 479 | }, 480 | "output_type": "display_data" 481 | } 482 | ], 483 | "source": [ 484 | "policy = np.zeros((22, 11, 2, 2))\n", 485 | "policy[20:, :, :, 0] = 1 # >= 20 时收手\n", 486 | "policy[:20, :, :, 1] = 1 # < 20 时继续\n", 487 | "behavior_policy = np.ones_like(policy) * 0.5\n", 488 | "q = evaluate_monte_carlo_with_importance_resample(env, policy, behavior_policy)\n", 489 | "v = (q * policy).sum(axis=-1)\n", 490 | "plot(v)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 16, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [ 499 | "def monte_carlo_importance_resample(env, episode_num=500000):\n", 500 | " policy = np.zeros((22, 11, 2, 2))\n", 501 | " policy[:, :, :, 0] = 1.0\n", 502 | " behavior_policy = np.ones_like(policy) * 0.5\n", 503 | " q = np.zeros_like(policy)\n", 504 | " c = np.zeros_like(policy)\n", 505 | " \n", 506 | " for _ in tqdm(range(episode_num)):\n", 507 | " state_actions = []\n", 508 | " observation = env.reset()\n", 509 | " \n", 510 | " while True:\n", 511 | " state = ob2state(observation)\n", 512 | " action = np.random.choice(env.action_space.n, p=behavior_policy[state])\n", 513 | " state_actions.append([state, action])\n", 514 | " \n", 515 | " observation, reward, done, _ = env.step(action)\n", 516 | " if done:\n", 517 | " break\n", 518 | " \n", 519 | " g = reward\n", 520 | " rho = 1.0\n", 521 | " for state, action in state_actions:\n", 522 | " c[state][action] += rho\n", 523 | " q[state][action] += (rho / c[state][action] * (g - q[state][action]))\n", 524 | " a = q[state].argmax()\n", 525 | " policy[state] = 0.0\n", 526 | " policy[state][a] = 1.0\n", 527 | " if a != action:\n", 528 | " break\n", 529 | " rho /= behavior_policy[state][action]\n", 530 | "\n", 531 | " return policy, q" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 17, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "application/vnd.jupyter.widget-view+json": { 542 | "model_id": "b18769817b104b9c81e177a6259ae9be", 543 | "version_major": 2, 544 | "version_minor": 0 545 | }, 546 | "text/plain": [ 547 | "HBox(children=(FloatProgress(value=0.0, max=500000.0), HTML(value='')))" 548 | ] 549 | }, 550 | "metadata": {}, 551 | "output_type": "display_data" 552 | }, 553 | { 554 | "name": "stdout", 555 | "output_type": "stream", 556 | "text": [ 557 | "\n" 558 | ] 559 | }, 560 | { 561 | "data": { 562 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAaXElEQVR4nO3debSkdX3n8feHbhZBEAUkLK1tUNxysCXtgrjigrvGUaMjM4pGNOMJaoy7MTHHcY9L9IyGGRRUxg1REU0aRVl0FNK0zWa7BVGgW0CURYzI8p0/6mlzuVtXd9+q332q369z7rlVz1O3nk/dc+tzvvepp+pJVSFJkjRu27UOIEmStk0OIZIkqQmHEEmS1IRDiCRJasIhRJIkNeEQIkmSmnAI2QYl+U2SP55n/SVJHjPOTJImk32j+TiEbIOq6vZVdTFAkuOSvHUc203ygiTfGse2JC0OrfpG/eAQIkmSmnAImRBJjkzy5SnXf5Lks1OuX5pkRXe5ktw9yVHA84DXdLtMvzzlLlckOT/JtUk+k2SnKff14u7+f5Xk5CT7dsuXd/e9dMptT0/yF0nuDXwEOKTb1jXzPI51Sa5PcnGSl0xb/7Qka5Ncl+Tfkzy+W36HJMcm2ZDk8iRvTbJkK36lkuYwzr6Ztt0DknwjydVJfpnkhCS7T1m/LMlJSa7qbvOhKete2HXLr5OsSnLXBfyVaAs5hEyOM4CHJdkuyT7A9sChAN3rsbcHzp/6A1V1DHAC8K5ul+lTpqx+NvB44G7AQcALuvs6DHh7t34f4GfApzcVrqrWAS8FvtNta/c5bnol8GRgN+BI4H1JDu62/UDg48Crgd2BhwOXdD93PHAzcHfg/sDjgL/YVC5JW2QsfTOLMOiffYF7A8uAv++2uwQ4hUEnLQf2o+umJE8H3gA8A9gLOAv41BY+di2gpZu+ifqgqi5Ocj2wAjgQWMXgv4t7AYcAZ1XVrZtxl/9UVesBuv9YVnTLnwd8tKrWdOteD/w6yfIFehxfmXL1jCSnAg8D1gAv6rb9tW795V2GvYEnALtX1X8ANyR5H3AU8M8LkUvSfxpj30zf7k+An3RXr0ryXuDvuusPZDCcvLqqbu6WbTwG7SXA27t/hkjyNuANSe5aVT/bjJxaYA4hk+UM4JEM9gacAVwDPIJBKZyxmff1iymXf8vgyU33fc3GFVX1myRXM/iv4/ItSj1FkicwKJUDGeyp2xm4oFu9DPjqLD92Vwb/iW1IsnHZdsClW5tH0pzG0Te3keTOwD8x+MdkVwbP8193q5cBP5sygEx1V+ADSf5x6t0x6C2HkIZ8OWaybCyFh3WXz2BQCo9g7lLY3NMor2fwhAYgyS7AHgwGkBu6xTtPuf0fDbutJDsCnwfeA+zdvWTzVQZlAYOh4oBZfvRS4EZgz6ravfvararuO/SjkrS5xtE30729u4+Dqmo34Ahu2w93mXpM2hSXAi+Z0g+7V9Xtqur/bWUebSWHkMlyBvAo4HZVdRmD1z0fz2BI+N4cP3MFMOd7+Gfxf4Ejk6zohoa3AWdX1SVVdRWDYeSIJEuSvJDbDg1XAPsn2WGO+94B2BG4Cri52yvyuCnrj+22/ejutej9ktyrqjYApwL/mGS3bt0BSR6xGY9L0uYZR99MtyvwG+CaJPsxOD5so3OADcA7kuySZKckh3brPgK8Psl94Q8Hsj9rK3JogTiETJCq+hGDJ+hZ3fXrgIuBb1fVLXP82LHAfZJck+SLQ2zjNOBvGeyx2MBgyHjOlJu8mEExXA3cF5j6n8Y3gIuAXyT55Sz3fT1wNPBZBrtY/ytw8pT159AdrApcy6AEN+6V+e8Mhpjvdz97IoMDZyWNwDj6ZhZvAQ5m8Pz/CnDSlDy3AE9h8PLQz4HLgD/v1n0BeCfw6STXARcyOI5MjaVqa/eOSZIkbT73hEiSpCYcQiRJUhMOIZIkqQmHEEmS1EQvPqxsh+xYO7FL6xjSxPodN/D7ujGbvmV/7XmnJbV82fatY2gR+NH5O2/6RtpsW9IjvRhCdmIXHpRHt44hTayz67TWEUZu+bLtOWfVstYxtAgcvu+snwqvrbQlPeLLMZIkqQmHEEmS1IRDiCRJasIhRJIkNeEQIkmSmnAIkSRJTTiESJKkJhxCJElSEw4hkiSpCYcQSZLUhEOIJElqwiFEkiQ14RAiSZKacAiRJElNOIRIkqQmHEIkSVITDiGSJKmJkQ0hST6a5MokF05ZdqckX0vy4+77HUe1fUn9Z49Ik22Ue0KOAx4/bdnrgNOq6h7Aad11SZrLcdgj0sQa2RBSVWcCv5q2+GnA8d3l44Gnj2r7kvrPHpEm27iPCdm7qjYAdN/vPNcNkxyVZHWS1Tdx49gCSlr0tqhHrrr6lrEFlDScRXtgalUdU1Urq2rl9uzYOo6kHpraI3vtsaR1HEnTjHsIuSLJPgDd9yvHvH1J/WePSBNi3EPIycDzu8vPB7405u1L6j97RJoQo3yL7qeA7wD3THJZkhcB7wAem+THwGO765I0K3tEmmxLR3XHVfXcOVY9elTblDRZ7BFpsi3aA1MlSdJkcwiRJElNOIRIkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWrCIUSSJDXhECJJkppwCJEkSU04hEiSpCYcQiRJUhMOIZIkqQmHEEmS1IRDiCRJasIhRJIkNeEQIkmSmnAIkSRJTTiESJKkJhxCJElSEw4hkiSpCYcQSZLUhEOIJElqwiFEkiQ14RAiSZKacAiRJElNOIRIkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWrCIUSSJDXhECJJkppwCJEkSU00GUKSvDLJRUkuTPKpJDu1yCGpv+wRqf/GPoQk2Q84GlhZVX8CLAGeM+4ckvrLHpEmQ6uXY5YCt0uyFNgZWN8oh6T+skeknls67g1W1eVJ3gP8HPgP4NSqOnXcOST1lz2ybTh83xWtI2jEWrwcc0fgacDdgH2BXZIcMcvtjkqyOsnqm7hx3DElLWJb0iNXXX3LuGNK2oQWL8c8BvhpVV1VVTcBJwEPmX6jqjqmqlZW1crt2XHsISUtapvdI3vtsWTsISXNr8UQ8nPgwUl2ThLg0cC6Bjkk9Zc9Ik2AsQ8hVXU2cCKwBrigy3DMuHNI6i97RJoMYz8wFaCq/g74uxbbljQZ7BGp//zEVEmS1IRDiCRJamKTL8ckecYsi68FLqiqKxc+kqRJY49Ims0wx4S8CDgE+GZ3/ZHAd4EDk/xDVX1iRNkkTQ57RNIMwwwhtwL3rqorAJLsDXwYeBBwJmB5SNoUe0TSDMMcE7J8Y3F0rgQOrKpfATeNJpakCWOPSJphmD0hZyU5Bfhcd/2/AGcm2QW4ZmTJJE0Se0TSDMMMIS9jUBiHAgE+Dny+qgp41AizSZoc9oikGTY5hHQlcWL3JUmbzR6RNJtNHhOS5BlJfpzk2iTXJbk+yXXjCCdpMtgjkmYzzMsx7wKeUlWeHErSlrJHJM0wzLtjrrA4JG0le0TSDMPsCVmd5DPAF4EbNy6sqpNGlkrSpLFHJM0wzBCyG/Bb4HFTlhVgeUgalj0iaYZh3h1z5DiCSJpc9oik2cw5hCR5TVW9K8kHGfzHchtVdfRIk0nqPXtE0nzm2xOy8SCy1eMIImki2SNb6PB9V7SOMLRV69e2jqCemnMIqaovdxfPqqqLx5RH0gSxRyTNZ5gDU49Lsh/wbwzOdnlWVV0w2liSJow9ImmGYQ5MfXiSHYAHAI8EvpLk9lV1p1GHkzQZ7BFJs9nkEJLkocDDuq/dgVOAs0acS9IEsUckzWaYl2POYHBQ2duBr1bV70cbSdIEskckzTDMELIHg9NvPxw4OsmtwHeq6m9HmkzSJLFHJM0wzDEh1yS5GFgG7A88BNh+1MEkTQ57RNJshjkm5N+BHwLfAj4CHOmuVEmbwx6RNJthXo65R1XdOvIkkiaZPSJphu2GuM2+Sb6Q5MokVyT5fJL9R55M0iSxRyTNMMwQ8jHgZGBfYD/gy90ySRqWPSJphmGGkL2q6mNVdXP3dRyw14hzSZos9oikGYYZQn6Z5IgkS7qvI4CrRx1M0kSxRyTNMMwQ8kLg2cAvgA3AM7tlkjQse0TSDMN8TsjPgaeOIYukCWWPSJrNMJ8TshfwYmD51NtXlf/FSBqKPSJpNsN8TsiXGJxo6uvALaONI2lC2SOSZhhmCNm5ql478iSSJpk9ImmGYQ5MPSXJExdyo0l2T3Jikh8kWZfkkIW8f0mLjj0iaYY594QkuR4oIMAbktwI3NRdr6rabSu2+wHgX6vqmUl2AHbeivuStEjZI5LmM+cQUlW7jmKDSXZjcDrvF3Tb+T3giaykCWSPSJrPMO+OORRYW1U3dB8wdDDw/u4td1vij4GrgI8luR9wLvDyqrph2naPAo4C2Ml/cKReWyw9cvi+K7b4Mcxl1fq1C36fo9KnrNo2DHNMyIeB33ZP9NcAPwM+sRXbXMqggD5cVfcHbgBeN/1GVXVMVa2sqpXbs+NWbE7SImCPSJphmCHk5qoq4GnAB6rqA8DW7GK9DLisqs7urp/IoEwkTS57RNIMwwwh1yd5PXAE8JUkS4Dtt3SDVfUL4NIk9+wWPRr4/pben6ResEckzTDMEPLnwI3Ai7on/n7Au7dyu38FnJDkfGAF8LatvD9Ji5s9ImmGYc4d8wvgvVOu/xz4+NZstKrWAiu35j4k9Yc9Imk2w+wJkSRJWnAOIZIkqYl5h5AkS5J8clxhJE0ee0TSXOYdQqrqFmCv7iORJWmz2SOS5jLMWXQvAb6d5GQGHwgEQFW9d86fkKTbugR7RNI0wwwh67uv7di6DxeStO2yRyTNMMxbdN8CkGSX6edlkKRh2COSZrPJd8ckOSTJ94F13fX7JflfI08maWLYI5JmM8xbdN8PHA5cDVBV5zE4hbYkDcsekTTDUJ8TUlWXTlt0ywiySJpg9oik6YY5MPXSJA8BqnuL3dF0u1QlaUj2iKQZhtkT8lLgZQxOOHUZgxNFvWyUoSRNHHtE0gzDvDvml8DzxpBF0oSyRyTNZs4hJMkHgZprfVUdPZJEkiaGPSJpPvPtCVk9thSSJpU9ImlOcw4hVXX8OINImjz2iKT5bPKYkCR7Aa8F7gPstHF5VR02wlySJshi6JEDD/otq1atHdfmttqq9f3JKm2pYd4dcwKDt9LdDXgLgxNR/dsIM0maPPaIpBmGGUL2qKpjgZuq6oyqeiHw4BHnkjRZ7BFJMwzzYWU3dd83JHkSgzNh7j+6SJImkD0iaYZhhpC3JrkD8Crgg8BuwCtHmkrSpLFHJM0wzIeVndJdvBZ41GjjSJpE9oik2WzymJAkByY5LcmF3fWDkrxp9NEkTQp7RNJshjkw9X8Dr6d7TbeqzgeeM8pQkiaOPSJphmGGkJ2r6pxpy24eRRhJE8sekTTDMEPIL5McQHf+hyTPBDaMNJWkSWOPSJphmHfHvAw4BrhXksuBnwJHjDSVpEljj0iaYZh3x1wMPCbJLsB2VXX96GNJmiT2iKTZzDmEJPnrOZYDUFXvHVEmSRPCHpE0n/n2hOzafb8n8ADg5O76U4AzRxlK0sSwRyTNac4hpKreApDkVODgjbtPk/w98LmxpJPUa/aIpPkM8+6YuwC/n3L998DykaSRNKnsEUkzDPPumE8A5yT5AoO31/0ZcPxIU0maNPaIpBmGeXfM/0zyL8DDukVHVtX3RhtL0iSxRyTNZpg9IVTVGmDNQm44yRJgNXB5VT15Ie9b0uJjj0iabphjQkbl5cC6htuX1H/2iNRjTYaQJPsDTwL+T4vtS+o/e0Tqv1Z7Qt4PvAa4tdH2JfWfPSL13FDHhCykJE8Grqyqc5M8cp7bHQUcBbATO48pnaQ+2JIeuct+Y6+7RefwfVe0jrBZVq1fO5L77dvvYZK12BNyKPDUJJcAnwYOS/LJ6TeqqmOqamVVrdyeHcedUdLittk9stceS8adUdImjH0IqarXV9X+VbUceA7wjarybJqShmaPSJOh5btjJEnSNqzpi6RVdTpwessMkvrNHpH6yz0hkiSpCYcQSZLUhEOIJElqwiFEkiQ14RAiSZKacAiRJElNOIRIkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWrCIUSSJDXhECJJkppwCJEkSU04hEiSpCYcQiRJUhNLWweQJM10+L4rWkcY2qr1a0dyv336HWjLuCdEkiQ14RAiSZKacAiRJElNOIRIkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWrCIUSSJDXhECJJkppwCJEkSU04hEiSpCYcQiRJUhMOIZIkqQmHEEmS1IRDiCRJasIhRJIkNeEQIkmSmhj7EJJkWZJvJlmX5KIkLx93Bkn9Zo9Ik2Fpg23eDLyqqtYk2RU4N8nXqur7DbJI6id7RJoAY98TUlUbqmpNd/l6YB2w37hzSOove0SaDC32hPxBkuXA/YGzZ1l3FHAUwF32W8qq1WvHmm1rHL7vitYRpG3G5vRIn6xa35/OG5VR/Q7s6MWj2YGpSW4PfB54RVVdN319VR1TVSurauVeeywZf0BJi549IvVbkyEkyfYMiuOEqjqpRQZJ/WaPSP3X4t0xAY4F1lXVe8e9fUn9Z49Ik6HFnpBDgf8GHJZkbff1xAY5JPWXPSJNgLEfqVVV3wIy7u1Kmhz2iDQZ/MRUSZLUhEOIJElqwiFEkiQ14RAiSZKacAiRJElNOIRIkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWrCIUSSJDXhECJJkppwCJEkSU04hEiSpCYcQiRJUhMOIZIkqYlUVesMm5TkeuCHrXMMaU/gl61DbIY+5e1TVuhX3ntW1a6tQ4ySPTJSfcrbp6zQr7yb3SNLR5Vkgf2wqla2DjGMJKv7khX6lbdPWaFfeZOsbp1hDOyREelT3j5lhX7l3ZIe8eUYSZLUhEOIJElqoi9DyDGtA2yGPmWFfuXtU1boV94+Zd1SfXqMfcoK/crbp6zQr7ybnbUXB6ZKkqTJ05c9IZIkacI4hEiSpCYW3RCS5KNJrkxy4ZRl707ygyTnJ/lCkt1bZtxotqxT1v1NkkqyZ4tss5krb5K/SvLDJBcleVerfFPN8XewIsl3k6xNsjrJA1tm3CjJsiTfTLKu+x2+vFt+pyRfS/Lj7vsdW2eFefMuyufZlrBHRqNPHQL2yCgtWI9U1aL6Ah4OHAxcOGXZ44Cl3eV3Au9snXOurN3yZcAq4GfAnq1zbuJ3+yjg68CO3fU7t845T9ZTgSd0l58InN46Z5dlH+Dg7vKuwI+A+wDvAl7XLX/dIvq7nSvvonyeLeDfz6J8fH3qkT51yDx57ZHR5t2s59mi2xNSVWcCv5q27NSqurm7+l1g/7EHm8VsWTvvA14DLKqjfufI+5fAO6rqxu42V4492CzmyFrAbt3lOwDrxxpqDlW1oarWdJevB9YB+wFPA47vbnY88PQ2CW9rrryL9Xm2JeyR0ehTh4A9MkoL1SOLbggZwguBf2kdYi5JngpcXlXntc4ypAOBhyU5O8kZSR7QOtA8XgG8O8mlwHuA1zfOM0OS5cD9gbOBvatqAwyesMCd2yWb3bS8Uy3q59kCWNSPr2c90qcOAXtkwW1Nj/RqCEnyRuBm4ITWWWaTZGfgjcCbW2fZDEuBOwIPBl4NfDZJ2kaa018Cr6yqZcArgWMb57mNJLcHPg+8oqqua51nU+bKu9ifZ1trsT++HvZInzoE7JEFtbU90pshJMnzgScDz6vuxaZF6ADgbsB5SS5hsBtqTZI/appqfpcBJ9XAOcCtDE6YtBg9Hzipu/w5YFEcUAaQZHsGT8QTqmpjxiuS7NOt3wdYNLup58jbl+fZFuvJ4+tbj/SpQ8AeWTAL0SO9GEKSPB54LfDUqvpt6zxzqaoLqurOVbW8qpYzeHIeXFW/aBxtPl8EDgNIciCwA4v3jI3rgUd0lw8Dftwwyx90//UdC6yrqvdOWXUyg8Kj+/6lcWebzVx5+/I821J9eXw97JE+dQjYIwtiwXqk9RG207+ATwEbgJsYPPleBPwEuBRY2319pHXOubJOW38Ji+So9nl+tzsAnwQuBNYAh7XOOU/WhwLnAucxeO3xT1vn7LI+lMHBbudP+Rt9IrAHcBqDkjsNuFPrrJvIuyifZwv497MoH1+feqRPHTJPXntktHk363nmx7ZLkqQmevFyjCRJmjwOIZIkqQmHEEmS1IRDiCRJasIhRJIkNeEQIpKcnmRl6xyS+sse0ZZwCNHIJVnSOoOkfrNHJpNDyDYiyfIkP0hyfJLzk5zYnaNi+u0+nGR1kouSvKVb9ugkX5hym8cmOam7/Lgk30myJsnnuvMIkOSSJG9O8i3gWdO28awkFyY5L8mZ3bIXJPnQlNuckuSR3eXfJHlnknOTfD3JA7v/ui7uTvQlaQzsES00h5Btyz2BY6rqIOA64H/Mcps3VtVK4CDgEUkOAr4B3DvJXt1tjgQ+lmRP4E3AY6rqYGA18NdT7ut3VfXQqvr0tG28GTi8qu4HDPPk3wU4var+FLgeeCvwWODPgH8Y4uclLRx7RAvGIWTbcmlVfbu7/EkGH7s73bOTrAG+B9wXuE8NPlb3E8ARSXYHDmFweuYHA/cBvp1kLYPzGtx1yn19Zo4c3waOS/JiYJhdrL8H/rW7fAFwRlXd1F1ePsTPS1o49ogWzNLWATRW0z+j/zbXk9wN+BvgAVX16yTHATt1qz8GfBn4HfC5qrq5O4HR16rquXNs74ZZQ1S9NMmDgCcBa5OsYHDK56lD8U5TLt9U/3l+gVuBG7v7uTWJf8PSeNkjWjDuCdm23CXJId3l5wLfmrZ+NwZP+GuT7A08YeOKqlrP4OyTbwKO6xZ/Fzg0yd0BkuzcnUVzXkkOqKqzq+rNDM62uYzBSbpWJNkuyTIW0em1Jd2GPaIF4/S3bVkHPD/JPzM4I+OHp66sqvOSfA+4CLiYwe7OqU4A9qqq73e3vyrJC4BPJdmxu82bgB9tIse7k9wDCIOzQp7XLf8pg12jG8/GKWnxsUe0YDyL7jYiyXLglKr6k624jw8B36uqYxcql6T+sEe00NwToqEkOZfBLtZXtc4iqZ/sEU3nnhBJktSEB6ZKkqQmHEIkSVITDiGSJKkJhxBJktSEQ4gkSWri/wMPvU6z2368vgAAAABJRU5ErkJggg==\n", 563 | "text/plain": [ 564 | "
" 565 | ] 566 | }, 567 | "metadata": { 568 | "needs_background": "light" 569 | }, 570 | "output_type": "display_data" 571 | }, 572 | { 573 | "data": { 574 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiEAAAEWCAYAAACwgEcPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAewklEQVR4nO3deZhkBXnv8e9vuntmmIFxQJAgIENUXMhjkIwL7uISNS5JbhZzJXGLJLk+ccliNDEm5vHGqImJSe7VkItClKtxwYiaxR2X6xJEQBS3EBRklE1g2Gbpfu8fdcY0vU31zDl9uorv53n6mVrf81ZNnV+/fepUnVQVkiRJK21N3w1IkqQ7JocQSZLUC4cQSZLUC4cQSZLUC4cQSZLUC4cQSZLUC4eQO6AkNyX50SWuvyzJY1eyJ0njybzRUhxC7oCq6sCquhQgyRlJXrUSy03yrCSfXollSVod+sobjQaHEEmS1AuHkDGR5NlJ3j/r/LeSvHPW+cuTnNCcriT3SHIq8AzgJc0m0/fPKnlCkouS3JDkH5Osn1XreU3965Kck+SuzeVbmtqTs277iSS/muQ+wJuAk5plXb/E47gkyfYklyb5tTnXPy3JBUluTPIfSZ7QXH6nJKcn2Zbku0lelWRiP55SSYtYybyZs9y7J/lYkmuTXJPkrCSbZ11/dJKzk1zd3OZvZ133nCZbfpDk35Ic0+JTon3kEDI+zgUenmRNkiOAKeChAM37sQcCF82+Q1WdBpwFvLbZZPqUWVf/AvAE4FjgfsCzmlonA69urj8C+Dbwjr01V1WXAL8OfLZZ1uZFbnoV8GRgE/Bs4C+TnNgs+4HAPwC/C2wGHgFc1tzvTGA3cA/g/sDjgV/dW1+S9smK5M0CwiB/7grcBzga+ONmuRPABxhk0hbgSJpsSvLTwO8DPwscBnwKePs+Pna1aHLvN9EoqKpLk2wHTgCOA/6NwV8X9wZOAj5VVTPLKPnXVXUlQPMXywnN5c8A3lxV5zfXvQz4QZItLT2OD846e26SDwEPB84Hntss+8PN9d9tejgceCKwuapuBW5O8pfAqcDftdGXpP+ygnkzd7nfAr7VnL06yeuBP2rOP5DBcPK7VbW7uWzPPmi/Bry6+WOIJH8K/H6SY6rq28voUy1zCBkv5wKPYrA14FzgeuCRDELh3GXW+t6s07cwWLlp/j1/zxVVdVOSaxn81fHdfep6liRPZBAqxzHYUrcB+HJz9dHAPy9wt2MY/CW2Lcmey9YAl+9vP5IWtRJ5cztJ7gL8NYM/TA5isJ7/oLn6aODbswaQ2Y4B3pDkL2aXY5BbDiE98u2Y8bInFB7enD6XQSg8ksVDYbmHUb6SwQoNQJKNwJ0ZDCA3NxdvmHX7Hxl2WUnWAe8B/hw4vHnL5p8ZhAUMhoq7L3DXy4EdwKFVtbn52VRVxw/9qCQt10rkzVyvbmrcr6o2Aadw+3y42+x90ma5HPi1WfmwuaoOqKr/t5/9aD85hIyXc4FHAwdU1RUM3vd8AoMh4UuL3Of7wKKf4V/A/wWeneSEZmj4U+DzVXVZVV3NYBg5JclEkudw+6Hh+8BRSdYuUnstsA64GtjdbBV5/KzrT2+W/Zjmvegjk9y7qrYBHwL+Ismm5rq7J3nkMh6XpOVZibyZ6yDgJuD6JEcy2D9sjy8A24A/S7IxyfokD22uexPwsiTHww93ZP/5/ehDLXEIGSNV9Q0GK+inmvM3ApcCn6mq6UXudjpw3yTXJ/mnIZbxUeAPGWyx2MZgyHj6rJs8j0EwXAscD8z+S+NjwFeA7yW5ZoHa24EXAO9ksIn1vwPnzLr+CzQ7qwI3MAjBPVtlfoXBEPPV5r7vZrDjrKQOrETeLOCVwIkM1v8PAmfP6mcaeAqDt4e+A1wB/GJz3XuB1wDvSHIjcDGD/cjUs1Tt79YxSZKk5XNLiCRJ6oVDiCRJ6oVDiCRJ6oVDiCRJ6sVIfFnZ2okD6oCpO3VQOXu/yWop29kOxN08BzXZzXw7vb6juot9aHg/TB24q/2iwN3WX9d6zSuv2M0PrpvpaIVYHbrKkZro5hBFmVnOF44OyQ8iADB9QDe/+qbXtr8Krdm00Hev7b97HDDvA4r77fIrprlumTkyEkPIAVN34qS7/Ur7hdd0tCFoooO6uxf7xNt+Sje/d3YfemAndW+4x4a932gf3Lil/efh8Idd2XpNgP91XPuHvHj6k69qveZqc8DUnXjIUb/cet3pOx/Uek2ANTfe2nrN7OrmF1p1kXnQ2dB00/GHdVL3hmPb/5W69rHtDwsA59zvza3XfNKTlt+rb8dIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqReOIRIkqRedDaEJHlzkquSXDzrskOSfDjJN5t/D+5q+ZJGnzkijbcut4ScATxhzmUvBT5aVfcEPtqcl6TFnIE5Io2tzoaQqvokcN2ci58GnNmcPhP46a6WL2n0mSPSeJtc4eUdXlXbAKpqW5K7LHbDJKcCpwKsn9oEa6dab6YmO5rBZtovmfZLAlBTE53UnVnbTd2dB3bzTOy6162t13zRsR9pvSbAQWumW685QbVes0P7lCPr1m1mxzGHtN7M5PU7Wq8JsPOITa3XnLx5V+s1AXJrN3WnD1rXSd1dG7vJ/u0n3tZ6zb+/z3tarwlwxOSBrdecyg+WfZ9Vu2NqVZ1WVVurauvaiQ19tyNpBN0uR9Zu7LsdSXOs9BDy/SRHADT/XrXCy5c0+swRaUys9BByDvDM5vQzgfet8PIljT5zRBoTXX5E9+3AZ4F7JbkiyXOBPwMel+SbwOOa85K0IHNEGm+d7ZhaVb+0yFWP6WqZksaLOSKNt1W7Y6okSRpvDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXDiGSJKkXk303MIyaWMP0getar7tmx+7WawLMbGj/ac2uidZrAp2NodPru+n3xuNmOql78j2+0XrNNXTT6/qk9ZrpoOZqU2tg94b2X5drr+omR1JrW69ZHf03z2xqP58BZtZ1kyPX/lg3T8TPHH9B6zVvnunmuZ2um1qvWdSy7+OWEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1AuHEEmS1ItehpAkL07ylSQXJ3l7kvV99CFpdJkj0uhb8SEkyZHAC4CtVfVjwATw9JXuQ9LoMkek8dDX2zGTwAFJJoENwJU99SFpdJkj0oibXOkFVtV3k/w58B3gVuBDVfWhle4DYPrAdZ3UrXRQdO1EB0Vh4rbdndS99vipTure+Z5Xd1L38HU3tl5z+8wBrdcEmKL9F1gXL9ku7UuOzEyEHZvaX49m7nmn1msCZLr9mpOT3fzdOXFrNzlyw5ZuMnrqXu2v7wA7ZtrPvc0Tt7ReE+Cm2tF6zRlq2ffp4+2Yg4GnAccCdwU2JjllgdudmuS8JOft2nXzSrcpaRXblxzZfZs5Iq02fbwd81jgP6vq6qraBZwNPGTujarqtKraWlVbp6Y2rniTkla1ZefI5HpzRFpt+hhCvgM8OMmGJAEeA1zSQx+SRpc5Io2BFR9CqurzwLuB84EvNz2cttJ9SBpd5og0HlZ8x1SAqvoj4I/6WLak8WCOSKPPb0yVJEm9cAiRJEm92OvbMUl+doGLbwC+XFVXtd+SpHFjjkhayDD7hDwXOAn4eHP+UcDngOOS/ElVvbWj3iSND3NE0jzDDCEzwH2q6vsASQ4H3gg8CPgkYHhI2htzRNI8w+wTsmVPcDSuAo6rquuAXd20JWnMmCOS5hlmS8inknwAeFdz/r8Bn0yyEbi+s84kjRNzRNI8wwwhz2cQGA9lcJyrfwDeU1UFPLrD3iSND3NE0jx7HUKakHh38yNJy2aOSFrIXvcJSfKzSb6Z5IYkNybZnqSb4yBLGkvmiKSFDPN2zGuBp1SVB4eStK/MEUnzDPPpmO8bHJL2kzkiaZ5htoScl+QfgX8Cduy5sKrO7qwrSePGHJE0zzBDyCbgFuDxsy4rwPCQNCxzRNI8w3w65tkr0Yik8WWOSFrIokNIkpdU1WuT/A2Dv1hup6pe0GlnkkaeOSJpKUttCdmzE9l5K9HIUnZtXMP3Tjqo9boTO+dlYitqTToo2n7JLu1q/78LgJuu7abw1zYd3nrNw6e6+QTq1TPtvxh2d/f6WjU5MrMWtt9tmH3xl2f9NR2s70AN82b5Mq3ZNdF+UaCytpO6t9y1m+f2tpvWdVJ3cs106zUvvPWY1msC3Hfqq63X3JcYWfRlXlXvb05+qqou3beWJN2RmSOSljLMrH1GkiOBf2dwtMtPVdWXu21L0pgxRyTNM8yOqY9IshZ4APAo4INJDqyqQ7puTtJ4MEckLWSvQ0iShwEPb342Ax8APtVxX5LGiDkiaSHDvB1zLoOdyl4N/HNV7ey2JUljyByRNM8wQ8idGRx++xHAC5LMAJ+tqj/stDNJ48QckTTPMPuEXJ/kUuBo4CjgIcBU141JGh/miKSFDLNPyH8AXwc+DbwJeLabUiUthzkiaSHDvB1zz6qa6bwTSePMHJE0zzBfH3jXJO9NclWS7yd5T5KjOu9M0jgxRyTNM8wQ8hbgHOCuwJHA+5vLJGlY5oikeYYZQg6rqrdU1e7m5wzgsI77kjRezBFJ8wwzhFyT5JQkE83PKcC1XTcmaayYI5LmGWYIeQ7wC8D3gG3AzzWXSdKwzBFJ8wzzPSHfAZ66Ar1IGlPmiKSFDPM9IYcBzwO2zL59VflXjKShmCOSFjLM94S8j8GBpj4CTHfbjqQxZY5ImmeYIWRDVf1e551IGmfmiKR5htkx9QNJntTmQpNsTvLuJF9LckmSk9qsL2nVMUckzbPolpAk24ECAvx+kh3AruZ8VdWm/VjuG4B/raqfS7IW2LAftSStUuaIpKUsOoRU1UFdLDDJJgaH835Ws5ydgAeyksaQOSJpKcN8OuahwAVVdXPzBUMnAn/VfORuX/wocDXwliQ/DnwReGFV3TxnuacCpwJMHXgwEztqHxe3uJpI6zUBapg3uZZpze72Hz9ArenmOcjuTspSOzt4coErtm9uveZtBw+zy9XybZ+Zar3mNN28DvZYDTmybv1mDr1o1z4/hkV185LktoMnWq+59qZujiF4y53b7xVg7fWdlOW27e2vQwAfv/yerde893HbWq8J0MUrYV9+Sw2z+rwRuKVZ0V8CfBt46z4sa49JBgH0xqq6P3Az8NK5N6qq06pqa1VtnTxg434sTtIq0HuOTK01R6TVZpghZHdVFfA04A1V9QZgfzaxXgFcUVWfb86/m0GYSBpf5oikeYYZQrYneRlwCvDBJBPAPm/LqqrvAZcnuVdz0WOAr+5rPUkjwRyRNM8wQ8gvAjuA5zYr/pHA6/Zzub8JnJXkIuAE4E/3s56k1c0ckTTPMMeO+R7w+lnnvwP8w/4stKouALbuTw1Jo8MckbSQjvbrliRJWppDiCRJ6sWSQ0iSiSRvW6lmJI0fc0TSYpYcQqpqGjis+UpkSVo2c0TSYob5SsfLgM8kOYfBFwIBUFWvX/QeknR7l2GOSJpjmCHkyuZnDfv35UKS7rjMEUnzDPMR3VcCJNk497gMkjQMc0TSQvb66ZgkJyX5KnBJc/7Hk/zvzjuTNDbMEUkLGeYjun8F/CRwLUBVXcjgENqSNCxzRNI8Q31PSFVdPuei6Q56kTTGzBFJcw2zY+rlSR4CVPMRuxfQbFKVpCGZI5LmGWZLyK8Dz2dwwKkrGBwo6vldNiVp7JgjkuYZ5tMx1wDPWIFeJI0pc0TSQhYdQpL8DVCLXV9VL+ikI0ljwxyRtJSltoSct2JdSBpX5oikRS06hFTVmSvZiKTxY45IWspe9wlJchjwe8B9gfV7Lq+qkzvs6/Y9FEzsbL9uZdGtxPtlckf7daen0nrNgW6eg90bOup351CfKl+26Zn260509NxuyO7Wa67pqNc9VkOOTK8NNx01zAcCV4fq4KW+Y1NHj7+j1X1mqpu6XfV790Ouab3mhjU7Wq8JsLPaX+drH2oO8zI/i8FH6Y4FXsngQFT/vuwlSbojM0ckzTPMEHLnqjod2FVV51bVc4AHd9yXpPFijkiaZ5htc7uaf7cl+SkGR8I8qruWJI0hc0TSPMMMIa9Kcifgt4G/ATYBL+60K0njxhyRNM8wX1b2gebkDcCju21H0jgyRyQtZK/7hCQ5LslHk1zcnL9fkpd335qkcWGOSFrIMDum/j3wMpr3dKvqIuDpXTYlaeyYI5LmGWYI2VBVX5hzWftfVCBpnJkjkuYZZgi5Jsndab7VKsnPAds67UrSuDFHJM0zzKdjng+cBtw7yXeB/wRO6bQrSePGHJE0zzCfjrkUeGySjcCaqtrefVuSxok5Imkhiw4hSX5rkcsBqKrXd9STpDFhjkhaylJbQg5q/r0X8ADgnOb8U4BPdtmUpLFhjkha1KJDSFW9EiDJh4AT92w+TfLHwLtWpDtJI80ckbSUYT4dczdg56zzO4EtnXQjaVyZI5LmGebTMW8FvpDkvQw+XvczwJmddiVp3JgjkuYZ5tMx/zPJvwAPby56dlV9qdu2JI0Tc0TSQobZEkJVnQ+c3+aCk0wA5wHfraont1lb0upjjkiaa5h9QrryQuCSHpcvafSZI9II62UISXIU8FPA/+lj+ZJGnzkijb6+toT8FfASYKan5UsafeaINOKG2iekTUmeDFxVVV9M8qglbncqcCrA2o0Hs2ZX+73MdPToZybTes3J26r1mgBrpjspy22Htf8cAKzZ2c3cPDnR/hOxfXp96zU1sC85MnXQwUyva/91ue76bmagqVvaX+d3r+9mvewi8wAmr+rmub3p2E7Kctn1h7Re82sH3bX1mgCP2nBZJ3WXq48tIQ8FnprkMuAdwMlJ3jb3RlV1WlVtraqtk+s3rnSPkla35efIAeaItNqs+BBSVS+rqqOqagvwdOBjVeXRNCUNzRyRxkOfn46RJEl3YCu+T8hsVfUJ4BN99iBptJkj0uhyS4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSeqFQ4gkSerFZN8NDKVgzXS1XnbtTe3XBJiZSvs1O/qfmp7qpu7kLd3UpZv/Mnbunmi95o2717deE+A+aze0XvOAjP/fI5mGqQ7W+XXXz7ReE2DXge3/nxx02a2t1wTYuXltJ3Vv/pFugm/Nzm6CZN3U7tZrTne0raCL6N+X33zjnzySJGlVcgiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9cAiRJEm9WPEhJMnRST6e5JIkX0nywpXuQdJoM0ek8TDZwzJ3A79dVecnOQj4YpIPV9VXe+hF0mgyR6QxsOJbQqpqW1Wd35zeDlwCHLnSfUgaXeaINB762BLyQ0m2APcHPr/AdacCpwKsz0Y2v+/LrS+/duxovSZAzVTrNbMmrdcEqN27O6k7eVQ3vw82H39EJ3Wvvv+hrdd871EHt14TYMeDplqv+Z1d17Vec6UMnSNs4OAzPtv68ic2bWq9JsBMB/mUiYnWawKs39VNjmzYeEAndTd/80c7qXvL59rPkXc+7pDWawKsf9Cu1mtePf2JZd+ntx1TkxwIvAd4UVXdOPf6qjqtqrZW1da1Wb/yDUpa9ZaTI1OsW/kGJS2plyEkyRSD4Dirqs7uowdJo80ckUZfH5+OCXA6cElVvX6lly9p9Jkj0njoY0vIQ4FfBk5OckHz86Qe+pA0uswRaQys+I6pVfVpoJu9LCXdIZgj0njwG1MlSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvHEIkSVIvUlV997BXSbYDX++7jyEdClzTdxPLMEr9jlKvMFr93quqDuq7iS6ZI50apX5HqVcYrX6XnSOTXXXSsq9X1da+mxhGkvNGpVcYrX5HqVcYrX6TnNd3DyvAHOnIKPU7Sr3CaPW7Lzni2zGSJKkXDiGSJKkXozKEnNZ3A8swSr3CaPU7Sr3CaPU7Sr3uq1F6jKPUK4xWv6PUK4xWv8vudSR2TJUkSeNnVLaESJKkMeMQIkmSerHqhpAkb05yVZKLZ132uiRfS3JRkvcm2dxnj3ss1Ous634nSSU5tI/eFrJYv0l+M8nXk3wlyWv76m+2RV4HJyT5XJILkpyX5IF99rhHkqOTfDzJJc1z+MLm8kOSfDjJN5t/D+67V1iy31W5nu0Lc6Qbo5QhYI50qbUcqapV9QM8AjgRuHjWZY8HJpvTrwFe03efi/XaXH408G/At4FD++5zL8/to4GPAOua83fpu88lev0Q8MTm9JOAT/TdZ9PLEcCJzemDgG8A9wVeC7y0ufylq+h1u1i/q3I9a/H1syof3yjlyChlyBL9miPd9rus9WzVbQmpqk8C18257ENVtbs5+zngqBVvbAEL9dr4S+AlwKra63eRfn8D+LOq2tHc5qoVb2wBi/RawKbm9J2AK1e0qUVU1baqOr85vR24BDgSeBpwZnOzM4Gf7qfD21us39W6nu0Lc6Qbo5QhYI50qa0cWXVDyBCeA/xL300sJslTge9W1YV99zKk44CHJ/l8knOTPKDvhpbwIuB1SS4H/hx4Wc/9zJNkC3B/4PPA4VW1DQYrLHCX/jpb2Jx+Z1vV61kLVvXjG7EcGaUMAXOkdfuTIyM1hCT5A2A3cFbfvSwkyQbgD4BX9N3LMkwCBwMPBn4XeGeS9NvSon4DeHFVHQ28GDi9535uJ8mBwHuAF1XVjX33szeL9bva17P9tdof3wjmyChlCJgjrdrfHBmZISTJM4EnA8+o5s2mVejuwLHAhUkuY7AZ6vwkP9JrV0u7Aji7Br4AzDA4YNJq9Ezg7Ob0u4BVsUMZQJIpBiviWVW1p8fvJzmiuf4IYNVspl6k31FZz/bZiDy+UcuRUcoQMEda00aOjMQQkuQJwO8BT62qW/ruZzFV9eWquktVbamqLQxWzhOr6ns9t7aUfwJOBkhyHLCW1XvExiuBRzanTwa+2WMvP9T81Xc6cElVvX7WVecwCDyaf9+30r0tZLF+R2U921ej8vhGMEdGKUPAHGlFaznS9x62c3+AtwPbgF0MVr7nAt8CLgcuaH7e1Hefi/U65/rLWCV7tS/x3K4F3gZcDJwPnNx3n0v0+jDgi8CFDN57/Im++2x6fRiDnd0umvUafRJwZ+CjDELuo8Ahffe6l35X5XrW4utnVT6+UcqRUcqQJfo1R7rtd1nrmV/bLkmSejESb8dIkqTx4xAiSZJ64RAiSZJ64RAiSZJ64RAiSZJ64RAiknwiyda++5A0uswR7QuHEHUuyUTfPUgabebIeHIIuYNIsiXJ15KcmeSiJO9ujlEx93ZvTHJekq8keWVz2WOSvHfWbR6X5Ozm9OOTfDbJ+Une1RxHgCSXJXlFkk8DPz9nGT+f5OIkFyb5ZHPZs5L87azbfCDJo5rTNyV5TZIvJvlIkgc2f3Vd2hzoS9IKMEfUNoeQO5Z7AadV1f2AG4H/scBt/qCqtgL3Ax6Z5H7Ax4D7JDmsuc2zgbckORR4OfDYqjoROA/4rVm1bquqh1XVO+Ys4xXAT1bVjwPDrPwbgU9U1U8A24FXAY8Dfgb4kyHuL6k95oha4xByx3J5VX2mOf02Bl+7O9cvJDkf+BJwPHDfGnyt7luBU5JsBk5icHjmBwP3BT6T5AIGxzU4Zlatf1ykj88AZyR5HjDMJtadwL82p78MnFtVu5rTW4a4v6T2mCNqzWTfDWhFzf2O/tudT3Is8DvAA6rqB0nOANY3V78FeD9wG/CuqtrdHMDow1X1S4ss7+YFm6j69SQPAn4KuCDJCQwO+Tx7KF4/6/Su+q/jC8wAO5o6M0l8DUsryxxRa9wScsdytyQnNad/Cfj0nOs3MVjhb0hyOPDEPVdU1ZUMjj75cuCM5uLPAQ9Ncg+AJBuao2guKcndq+rzVfUKBkfbPJrBQbpOSLImydGsosNrS7odc0Stcfq7Y7kEeGaSv2NwRMY3zr6yqi5M8iXgK8ClDDZ3znYWcFhVfbW5/dVJngW8Pcm65jYvB76xlz5el+SeQBgcFfLC5vL/ZLBpdM/ROCWtPuaIWuNRdO8gkmwBPlBVP7YfNf4W+FJVnd5WX5JGhzmitrklRENJ8kUGm1h/u+9eJI0mc0RzuSVEkiT1wh1TJUlSLxxCJElSLxxCJElSLxxCJElSLxxCJElSL/4/kC9IaOeA0gEAAAAASUVORK5CYII=\n", 575 | "text/plain": [ 576 | "
" 577 | ] 578 | }, 579 | "metadata": { 580 | "needs_background": "light" 581 | }, 582 | "output_type": "display_data" 583 | } 584 | ], 585 | "source": [ 586 | "policy, q = monte_carlo_importance_resample(env)\n", 587 | "v = q.max(axis=-1)\n", 588 | "plot(policy.argmax(-1))\n", 589 | "plot(v)" 590 | ] 591 | } 592 | ], 593 | "metadata": { 594 | "kernelspec": { 595 | "display_name": "Python 3", 596 | "language": "python", 597 | "name": "python3" 598 | }, 599 | "language_info": { 600 | "codemirror_mode": { 601 | "name": "ipython", 602 | "version": 3 603 | }, 604 | "file_extension": ".py", 605 | "mimetype": "text/x-python", 606 | "name": "python", 607 | "nbconvert_exporter": "python", 608 | "pygments_lexer": "ipython3", 609 | "version": "3.7.6" 610 | } 611 | }, 612 | "nbformat": 4, 613 | "nbformat_minor": 4 614 | } 615 | -------------------------------------------------------------------------------- /Chapter7-回合更新策略梯度方法/7.5-案例:车杆平衡.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 33, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "" 12 | ] 13 | }, 14 | "execution_count": 33, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "import gym\n", 21 | "import torch\n", 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "import torch.nn as nn\n", 25 | "import torch.optim as optim\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "import torch.nn.functional as F\n", 28 | "\n", 29 | "from tqdm.notebook import tqdm\n", 30 | "\n", 31 | "np.random.seed(0)\n", 32 | "torch.manual_seed(0)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 4, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/plain": [ 43 | "[0]" 44 | ] 45 | }, 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "output_type": "execute_result" 49 | } 50 | ], 51 | "source": [ 52 | "env = gym.make('CartPole-v0')\n", 53 | "env.seed(0)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 7, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "class DQN(nn.Module):\n", 63 | " \n", 64 | " def __init__(self, input_size, hidden_sizes, output_size):\n", 65 | " super(DQN, self).__init__()\n", 66 | " \n", 67 | " neurons = [input_size] + hidden_sizes\n", 68 | " layers = []\n", 69 | " for i in range(len(neurons) - 1):\n", 70 | " layers.append(nn.Linear(neurons[i], neurons[i + 1]))\n", 71 | " layers.append(nn.ReLU(inplace=True))\n", 72 | " layers.append(nn.Linear(neurons[-1], output_size))\n", 73 | " layers.append(nn.Softmax())\n", 74 | " self.net = nn.Sequential(*layers)\n", 75 | " return\n", 76 | "\n", 77 | " def forward(self, x):\n", 78 | " return self.net(x)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 50, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "# 同策策略梯度算法智能体类\n", 88 | "\n", 89 | "class VPGAgent(object):\n", 90 | " \n", 91 | " def __init__(self, env, policy_kwargs, baseline_kwargs=None, gamma=0.99):\n", 92 | " observation_dim = env.observation_space.shape[0]\n", 93 | " self.action_n = env.action_space.n\n", 94 | " self.gamma = gamma\n", 95 | " \n", 96 | " self.trajectory = []\n", 97 | "\n", 98 | " self.policy_net = DQN(observation_dim, policy_kwargs['hidden_sizes'], self.action_n)\n", 99 | " self.policy_opt = optim.Adam(self.policy_net.parameters(), lr=policy_kwargs['learning_rate'])\n", 100 | " \n", 101 | " if baseline_kwargs:\n", 102 | " self.baseline_net = DQN(observation_dim, baseline_kwargs['hidden_sizes'], 1)\n", 103 | " self.baseline_opt = optim.Adam(self.baseline_net.parameters(), lr=baseline_kwargs['learning_rate'])\n", 104 | " return\n", 105 | "\n", 106 | " @staticmethod\n", 107 | " def __tensor2numpy(tensor):\n", 108 | " return tensor.cpu().detach().numpy()\n", 109 | " \n", 110 | " def decide(self, observation):\n", 111 | " self.policy_net.eval()\n", 112 | " probs = self.policy_net(torch.tensor(observation[np.newaxis]).float())\n", 113 | " probs = self.__tensor2numpy(probs)[0]\n", 114 | " action = np.random.choice(self.action_n, p=probs)\n", 115 | " return action\n", 116 | " \n", 117 | " def learn(self, observation, action, reward, done):\n", 118 | " self.trajectory.append((observation, action, reward))\n", 119 | " \n", 120 | " if done:\n", 121 | " df = pd.DataFrame(data=self.trajectory, columns=['observation', 'action', 'reward'])\n", 122 | " df['discount'] = self.gamma ** df.index.to_series()\n", 123 | " df['discounted_reward'] = df['discount'] * df['reward']\n", 124 | " df['discounted_return'] = df['discounted_reward'][::-1].cumsum()\n", 125 | " df['psi'] = df['discounted_return']\n", 126 | " \n", 127 | " x = torch.tensor(np.stack(df['observation'])).float()\n", 128 | " if hasattr(self, 'baseline_net'):\n", 129 | " self.baseline_net.eval()\n", 130 | " df['baseline'] = self.__tensor2numpy(self.baseline_net(x))\n", 131 | " df['psi'] -= df['baseline'] * df['discount']\n", 132 | " df['return'] = df['discount_return'] / df['discount']\n", 133 | " y = torch.tensor(df['return'].values[:, np.newaxis]).float()\n", 134 | " self.baseline_net.train()\n", 135 | " y_hat = self.baseline_net(x)\n", 136 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n", 137 | " self.baseline_opt.zero_grad()\n", 138 | " loss.backward()\n", 139 | " self.baseline_opt.step()\n", 140 | " \n", 141 | " y = torch.tensor(np.eye(self.action_n)[df['action']] * df['psi'].values[:, np.newaxis]).float()\n", 142 | " self.policy_net.train()\n", 143 | " y_hat = self.policy_net(x)\n", 144 | " loss = F.binary_cross_entropy_with_logits(y_hat, y)\n", 145 | " self.policy_opt.zero_grad()\n", 146 | " loss.backward()\n", 147 | " self.policy_opt.step()\n", 148 | "\n", 149 | " self.trajectory = []\n", 150 | " return" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 16, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# 智能体和环境交互一个回合的代码\n", 160 | "\n", 161 | "def play_montecarlo(env, agent, render=False, train=False):\n", 162 | " episode_reward = 0.0\n", 163 | " observation = env.reset()\n", 164 | " \n", 165 | " while True:\n", 166 | " if render:\n", 167 | " env.render()\n", 168 | " \n", 169 | " action = agent.decide(observation)\n", 170 | " next_observation, reward, done, _ = env.step(action)\n", 171 | " episode_reward += reward\n", 172 | " \n", 173 | " if train:\n", 174 | " agent.learn(observation, action, reward, done)\n", 175 | " if done:\n", 176 | " break\n", 177 | " \n", 178 | " observation = next_observation\n", 179 | " \n", 180 | " return episode_reward" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 51, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "application/vnd.jupyter.widget-view+json": { 191 | "model_id": "7bd25b5f62084b0fbcef1f099464b9f4", 192 | "version_major": 2, 193 | "version_minor": 0 194 | }, 195 | "text/plain": [ 196 | "HBox(children=(FloatProgress(value=0.0, max=2000.0), HTML(value='')))" 197 | ] 198 | }, 199 | "metadata": {}, 200 | "output_type": "display_data" 201 | }, 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "\n" 207 | ] 208 | }, 209 | { 210 | "name": "stderr", 211 | "output_type": "stream", 212 | "text": [ 213 | "d:\\programdata\\miniconda3\\envs\\rl\\lib\\site-packages\\torch\\nn\\modules\\container.py:100: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n", 214 | " input = module(input)\n" 215 | ] 216 | }, 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "平均回合奖励 = 950.0 / 100 = 9.5\n" 222 | ] 223 | }, 224 | { 225 | "data": { 226 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAftUlEQVR4nO3deXhU9d338fc3K2GJbIFSQAF3am8DRmqt2s2Fiq1LXUp9Kk+1tb3v9lJ7L62199O7u7TVamvdqxWfy7rUpfq4tUhFRCgYIGyyJEAQJEAgskNCkt/zx5wJk1kyM8ks+dnP67pyZebMWb7nd8585jdnzpwx5xwiIuKfgnwXICIi3aMAFxHxlAJcRMRTCnAREU8pwEVEPFWUy4UNHTrUjRkzJpeLFBHx3qJFi3Y45yqih+c0wMeMGUN1dXUuFyki4j0z2xhvuA6hiIh4SgEuIuIpBbiIiKcU4CIinlKAi4h4SgEuIuIpBbiIiKe8CfDq+ibWbN2b7zJERHqNnH6Rpycuv28+APXTp+S5EhGR3sGbHriIiHSmABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPJQ1wM+tjZgvNbKmZrTSzHwfDHzGzDWZWE/xVZr9cEREJS+UXeZqBzzjn9plZMTDXzF4JHvsv59zT2StPREQSSRrgzjkH7AvuFgd/LptFiYhIcikdAzezQjOrAbYDM51zC4KHfm5my8zsDjMrTTDt9WZWbWbVjY2NGSpbRERSCnDnXJtzrhIYBUwys1OA7wMnAacDg4HvJZj2AedclXOuqqKiIkNli4hIWmehOOd2AbOByc65BhfSDPwRmJSF+kREJIFUzkKpMLOBwe0y4FxgtZmNCIYZcAmwIpuFiohIZ6mchTICmGFmhYQC/ynn3Itm9nczqwAMqAG+mcU6RUQkSipnoSwDJsQZ/pmsVCQiIinRNzFFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPKcBFRDylABcR8ZQCXETEUwpwERFPJQ1wM+tjZgvNbKmZrTSzHwfDx5rZAjOrNbMnzawk++WKiEhYKj3wZuAzzrlTgUpgspmdAfwSuMM5dzzwPnBd9soUEZFoSQPchewL7hYHfw74DPB0MHwGcElWKhQRkbhSOgZuZoVmVgNsB2YC64BdzrnWYJTNwMgE015vZtVmVt3Y2JiJmkVEhBQD3DnX5pyrBEYBk4CT442WYNoHnHNVzrmqioqK7lcqIiKdpHUWinNuFzAbOAMYaGZFwUOjgC2ZLU1ERLqSylkoFWY2MLhdBpwLrAJeBy4PRpsGPJ+tIkVEJFZR8lEYAcwws0JCgf+Uc+5FM3sHeMLMfgYsAR7KYp0iIhIlaYA755YBE+IMX0/oeLiIiOSBvokpIuIpBbiIiKcU4CIinlKAi4h4SgEuIuIpBbiIiKcU4CIinlKAi4h4SgEuIuIpBbiIiKcU4CIinlKAi4h4SgEuIuIpBbiIiKcU4CIinlKAi4h4SgEuIuIpBbiIiKcU4CIinvIuwPceOpzvEkREegXvAvzbf1qS7xJERHoF7wK8bvu+fJcgItIreBfgIiISogAXEfFU0gA3s9Fm9rqZrTKzlWZ2YzD8R2b2npnVBH8XZr9cEREJK0phnFbgP5xzi81sALDIzGYGj93hnLste+WJiEgiSQPcOdcANAS395rZKmBktguLdLClLZeLExHxQlrHwM1sDDABWBAM+raZLTOzh81sUIJprjezajOrbmxs7FaRNZt2dWs6EZEPspQD3Mz6A88ANznn9gD3AscClYR66LfHm84594Bzrso5V1VRUdG9Ii2yjm7NQkTkAyelADezYkLh/Zhz7lkA59w251ybc64deBCYlLUiC5TaIiLRUjkLxYCHgFXOud9EDB8RMdqlwIrMlxei/BYRiZXKWSifAL4CLDezmmDYLcBUM6sEHFAPfCMrFQKm4yYiIjFSOQtlLhAvQV/OfDnxFSjARURiePFNTMW3iEgsLwJcPXARkVh+BHhElcpyEZEQPwJcqS0iEkMBLiLiKS8CXEREYnkR4A7Xcdt0ToqICOBLgLvk44iI/LNRgIuIeMqPAEcJLiISzY8AV36LiMTwIsAj6YxCEZEQLwJcPXARkVh+BHjEMfCNOw/ksRIRkd7DjwBXD1xEJIYfAZ7vAkREeiE/AlxdcBGRGH4EeL4LEBHphfwIcCW4iEgMLwJcfXARkVheBLh64CIisfwI8HwXICLSC/kR4EpwEZEYngS4ElxEJFrSADez0Wb2upmtMrOVZnZjMHywmc00s9rg/6BsFan4FhGJlUoPvBX4D+fcycAZwLfMbDxwMzDLOXc8MCu4nxXqgIuIxEoa4M65Bufc4uD2XmAVMBK4GJgRjDYDuCRbRc5esz1bsxYR8VZax8DNbAwwAVgADHfONUAo5IFhCaa53syqzay6sbGxW0XeP2d9t6YTEfkgSznAzaw/8Axwk3NuT6rTOececM5VOeeqKioqulOjiIjEkVKAm1kxofB+zDn3bDB4m5mNCB4fAeg4h4hIDqVyFooBDwGrnHO/iXjoBWBacHsa8HzmyxMRkUSKUhjnE8BXgOVmVhMMuwWYDjxlZtcB7wJXZKdEERGJJ2mAO+fmAol+SvizmS1HRERS5cU3MauOydp3hEREvOVFgJ//keH5LkFEpNfxIsD1TUwRkVh+BHi+CxAR6YX8CHAluIhIDC8CXEREYnkR4E4HUUREYvgR4MpvEZEYXgS4iIjEUoCLiHjKiwDXb2KKiMTyJMDzXYGISO/jRYCLiEgsLwJcHXARkVh+BLgSXEQkhh8Brj64iEgMLwJcRERieRHgOoQiIhLLjwDPdwEiIr2QFwGuLriISCw/AlxERGJ4EeDqf4uIxPIjwJXgIiIxvAhwERGJlTTAzexhM9tuZisihv3IzN4zs5rg78JsFqkv8oiIxEqlB/4IMDnO8Ducc5XB38uZLaszHUIREYmVNMCdc3OAphzUkriGfC5cRKSX6skx8G+b2bLgEMugRCOZ2fVmVm1m1Y2NjT1YnIiIROpugN8LHAtUAg3A7YlGdM494Jyrcs5VVVRUdGthOoQiIhKrWwHunNvmnGtzzrUDDwKTMltW1PJ0EEVEJEa3AtzMRkTcvRRYkWjcjFB+i4jEKEo2gpk9DnwKGGpmm4H/AT5lZpWEorUe+EYWaxQRkTiSBrhzbmqcwQ9loZbENeRyYSIinvDim5hOn2KKiMTwJMDzXYGISO/jRYCLiEgsLwJcHXARkVh+BLgSXEQkhhcBLiIisbwI8OhvYlbX5/XaWiIivYIfAR51COXy++bnpxARkV7EiwAHOKqsmOvOGpvvMkREeg1vAtwMWtva812GiEiv4UWAh7+J2dqu01FERML8CHDAgNY2BbiISJgfAe7AzDjcrkMoIiJhXgQ4qAcuIhLNiwD/8seO5rYrT6VVPXARkQ5eBPjJI8r59InDOKweuIhIBy8CPEynEYqIHOFXgOs0QhGRDl4F+GH1wEVEOngV4JFnoexrbgVg9dY9PF/zHgBPvb2JDTv2pzXPt+p2MLd2BwA79jXz4Jz13DO7rmP+T1VvYn3jPt6ub+Kahxeyfe8hIPTlogfnrOf9/S0d82rc28xDczd0+gm4J99+l/ouatrf3Mrdr9fR1sW7i01NB/j8XXPZ1HQg5rF9za3cO3sds1ZtY+GG2It8bd97iIeDmhZtfJ+Z72xL0iKJPfV2qC3umlXLra+sorm1rcvx/756G799rZY3axuBUJvdNauWFe/tTjjNnxa822k9D7a0cffrdXEPnzXsPshvX6vlwTnrY352r6vpwl5YuoVVDXsAWLB+J2+sbexyfeat28GctY3Mrd3BW3U7Yh5f/G5s+7a1O+6ZXcej8+up274vZppXljewbPMuAF57Zxs/emFlR03NraF1aGntvA5vrG1k/rqdHfdnrdpGdX0T97+xjt0HDne5Djv2NXPjE0t4dcXWhOMcOhx/uQBrtu7lL0veiztd5L4Goe19/xvr2H2w65oS2d/cyj2z69h94DD3zl5Hew/fgbe2tfOtxxbz9KLNnYaH1zedDuJzSzbzVt0OLv79XL7+aHXCn310zvHAnHXsOtAS9/GeSvqjxr3J4YgNOPOdrVw6YRST73wTgIsrR/LdZ5YxoLSI5T++IOV5Xv2HBQDUT5/CTU/UMDd4Ym5qOsitl32U7z69jNKiApqDnflbjy3mz988k0Ub3+fnL6/i7fomHrimCoAbn1jCvHU7Oeu4oZz4oQG0tzu+98xyBvYtpuaH58dd/q9eXc2M+Rs5enBfPn/qh+OOc9Fdc9l98DBfvHceC39wbqfHbn15FY8teLfjfv30KZ0ev+mJGuat28mZxw3hi/fOiztOqr77zLJO9weWlfCvnzo24fjXPlLdqa6d+1u4feZanl+6hdf+/ZMx4ze3tnHLc8sZXl7KgltC63nna2u5f856hg0o5Yqq0Z3G/9qMalZuCYXdxGMGctoxgzse+/3rtdz9+joG9yth6qSj49Z3w+NLOmq76oF/dNxO5MsPLuh0P3rcy+6Jbd+/rdzKr15dA0CBwfpbO0/zr48t7pjma4+G2uuRefXUT5/CH97cwK//uobSogK+dva4jmmmPbyw03Kum3Gknd9p2MNvvzQh4Tp858ka3qzdwfM1WxKu6x/eXM9tf1tLWXEh10Zdf+iCO+cAcMmEkTHT3fD4Ev6xvomzjh/KCcMH8FbdTm59ZTUrt+zhd1MT15TIr/+6hkfm1XP33+vY39LGccP6c9744WnPJ+yZxZt5aXkDLy1v4PLTRnUMv2f2On43q5bysmK+csYxKc3rO08ujbi3m/nrdnLmcUNjxlu4oYlfvLyamk27uOfq07pdeyKe9cCPvEIebu38ihd+dd4b9Jy7Y2dEb3pPRK+hOaInEh4nPGzvoSPLC/c0wj3T8BePdnXRKwpPf+hw4t5seL7vx3kV359kfcPzbz7cs8NP8XoYXdUcT/hdRryeaGgZof879h1Zz/A7oUNxeoOR7doc9fiBllBtydon21oi9tl0O5AHg3UIr0sqkq3vnhR6w91tu+h9raWtLRjevR54eNvvD+pJ9o4vmUTteCC8j6XRztESfT4X3i+7+y4kGa8CvKvDDG0Z+NmeyJBqdy7uW7bwKOH/ZkceKyywYNrQ/Uyf9mhYzLCCgthhkcI19fQD4HhtX5hk2anMI97j7Sluy07Lj5qk0MLbIr8ffBdYem0UKTxpOquQbHnJ9heI3Y9TVdSxr2Xms6o0d6+cideZ6cl27gmvAryrY1TJwiEVkfNoa3dxXxTC44R/ZCJyu4U3YniclE57DKa3VHaAOKMUJpmuqCAzQRavLTIe4B3HTlObX+TyoycJP5bvz73TbaNI4Smjf9CkJ8tLtr9AxH7cxYbo6gU9Uy+a0aEYrwOTjkxFbLzOUKJmD69CT2tPJGmAm9nDZrbdzFZEDBtsZjPNrDb4Pygr1UXpqheZiZ2mLaoHHm8n7QjwcA88YsNE78AtOUiPpE/YcK+oh+8G4nWq0u11JNtG6X5I1akDHjVpQYbDpLt61DMLpk2rB55kf0irB97F9ojXmSoqCMVJpi55kVKnJg/irXu+ak2lB/4IMDlq2M3ALOfc8cCs4H7WRe4Y7c51eisTGbYu4rHwTtje7mKGR0/THt0Dj7MDhwMh8pH29tC04edGuM7oT/Ej63VR9YdfMKLr7vR2zYXut0U8Fv2EDK9HeLqiwq7f1obHDa9D9HLDj8WbvsA6t2v0+kWKbs/oaULLSDw9EesVrrNzD7xz3eGH2qLWK+H847RHuL5EQRbd1pHzCIt+ge20zdvj3+4YL2IbJGrj6GUXmnV5RkSi/I5c3462i5h/9DwPt7V32meccx29zdaOfSh2+fFqiv4LPw+ia418JxLvee2i6o3XZvFqcR3/O69r9LzD6xrv7JzwurfH7OPB8AR19JSlMlMzGwO86Jw7Jbi/BviUc67BzEYAs51zJyabT1VVlauurk42WkJX3j8/7qlykp766VO4+ZllPPH2pozP+5SR5ax4b0/G55uqPsUFHOrhB7aZ9PFxQ5i/fmfyESUv6qdPYczNL+VkWQ9eU9Xts2jMbJFzrip6eHePgQ93zjUABP+HdbHg682s2syqGxu7Ps82mfv/V+ZPw/lnlY3wBvIa3kCvCm9A4S0d+pdm/qztrH+I6Zx7wDlX5Zyrqqio6NG8BvUryVBVIiK5dfzw/hmfZ3cDfFtw6ITg//bMlSQi8sFTXJj5/nJ35/gCMC24PQ14PjPliIh8MJXkI8DN7HFgPnCimW02s+uA6cB5ZlYLnBfcFxGRBIoLM3+qYdKj6s65qQke+myGa5EceXjuhnyXINIrfPq22TlbVk++1JWIV9/EBLgszkV0JD0/efGdfJcg0iuke/XS7hrSryQrX/bxLsB/c1Ul9dOnMGnMkSvPnTd+OCvTuALhtI8fQ/30Kdx5VWXMYxOPHtjltN/45LhO91++4Wxqf/65jvvjR5R33P6vC5KeGt+hqMConz6FFT++gLsSXLkt/Ar+p699LOF8XrrhrISP3XLhSV3WUBexHl1Z87Po73Ul9o1zxnHmsUMSPv7Hr56e1rYDGFfRj9U/ncyqn6ReRyrOOSH+WVIfPqoPd1x1akaXFfbcv50ZM+yVG89OadoPlffh7YirUw7tX8JtV/S8zsrRXT8HIrf/kODMsIemxZyi3G0r0twfMiHePnjphJF8dORRHffrfv453vnJBSxNcGXReO646lTW/uxzzPv+ZzJSZzSvLicbKfIr0uV9iumXxjmW4XHjvSD2KS7sdN+s8zfKjior7vR4SVFBp0+X+xQfud2dDy36lxZRHrWMsNKiAg60tFFclHi+5X3iTwvJPwUvSrHe0qLC5CMFkn11u7SwIO6261tSmPDqcWXFhTHbKRMSHaLsW1qU1jqnI95X7ctSXLfiIqO87EjblRQWpDxtV4qSbbOItgiXP7Bv5k7xzcax4mTi7U9FBcbgiFOXiwoLQs+RNFZ1YFkJJV08X3vKux54WOR1S7r7ziSVK+xFf1E1+ivP0cvOxnGusHC93T0dKR9Xa0h28aREF0zqKqCzddmJRNvOyMzF0npaR8x4Zp3aN1Nv0btzZc/SDIZUcUHuYylem7c51+PncyrXnunR/LM69yzq6a9zQPwnZbKLD0VfxyZ67GxeVjL8riMfPZTuSrYDJwrGTAZCqrradrm8KFaqT/qCAstKh6E7T61M9jKzHXqpiry+UXelcvXHnvA2wDNx/e94T8pkT4jo5Ub3erIZ4OGwy8b5pNl655BsB04UjPm4vnJXbZDTHniK615olpUPxrrTOcrGl1TyLRMXVcz2mwlvWz3yOFy6r/7haeMefyzp/NY9+nhg9P3oJ1tpxDHwojR6ypHLTXQMsm9J6HhnV8dju3o+d3XsfFAGj2FG1tCnuKDrwyEJDuwM6JP4M41MHOeNO9+S+PMd0Kcoay9w0ftggaX+pB9WXtrpfllJYUbqTOf5FG6zZMfNfVRcYAk/U0q1E5XtHri3H2LeNXUCNzy+hPqdB/je5NDZFY9eO4mNTQd4cekW3tmyh9PGDOJgSxtnHjsUs9BPMr1/4DBTJ4V+W/HiypH8edFmzj15GAda2ljXuJ8ff+EjnDBsAG+t28GE0QP5+jnjuOXZ5cxe08iLN5zF6EF92dfcinOOtdv2MXpwGQD3Xj2R0uICKkcPYuJPZ3LuycOYOuloJo0dzL89tpiNOw8waexglm/eTdWYQXx05FGUFRdy+8y1nH38UP7n8+M71u3j44ZwZdUonqrezJvf/TRbdh3k3aYDnDp6IG+saWT04DK+OHEU7x9o4ZQPl1NWUsTW3Qe5omo0IweWcc4JFcxZ20i/ksKOn6O69hNjufy0UWzceYCrTh/Nr15dzdbdhzh5RDmvrtzKn7/58Y42/O7Ty7jryxO45dnlXHvWWA4dbmPS2ME07W+hKfhJuSevP4MZ8+t5eflWTh8ziNZ2x+G2dj570nAu+pcR/L9lDbyzZQ/TzhzDZRNHMWNePWOH9mPEwD4s3bSbX/91Ne3uyJkft11xKne/Xsdvv1RJzaZdfPrEYTxVvYm12/by15XbKCowrjp9NEP6l3ZsPwidBfT955YzsKyYNVv3MqR/CSu37OGlG86ibvs+3qzdwcvLG3ho2uk8vWgzzywO/aDtnVdVsrC+iQmjB/LLV9dw4of688OLxnPhKSNobXfUbtvLueOH88hb9Vx5+ij+ZdRA3mnYQ9/iInbub2ZVwx4G9S3h6+eM44r75nP++OFUjRnEm7U7uPpjR9PS5hg3tB/3zK7jQEsb6xv3M+3MMfy5ehOrt+7lpA8NYFxFP04ZWc5dUydQXlZM3fZ9fOK4IVT0L+Xy00axcEMTRQXGf15wIks37aKkqIAPHdWHZxe/R2u74/9cFNpnfnjReBZtfJ/vTT6JDw/sQ3mfIvZE/NTfFaeN4oxxQ3ho7gbGVfSjvKyYPy14l29/+jheWdHAkH6lHDOkLyu37GH73kPcNXUCTftbuOiuuVw2cSQvLWvg9itP5f39LVSODl36/w/XVNHmHCcOH8BLyxsYPbgvUz46gkH9QicUlBUXcudrtfzi0o9y3LD+fPWPC/nUScO4pHIkX3+0mlsuPImKAaXc+vJqzhs/nLrt+ygvK+aTwf5w51WVbNx5gHnrdvCd806grLiQrz7yNk37W7hs4khGD+rLb2fVcu/VE9nb3Ep5nyL++y8r+OnFp7Biy24+PLCMJ9/exKHDbazdFvoJv/+ecjI79rXw2IKN7D3UyuWnjeLpRZs7fq/zlgtP4hcvr+aP//t0Fmxo4hvnjGPXwcM8u+Q9ng6eHx373Y1nc8V987jxs8dz7xvr+PKkY9i5v5lLJ4ykpbWdv9Rsob3d8ZGIs1iyIaXLyWZKTy8nKyLyzyjTl5MVEZE8U4CLiHhKAS4i4ikFuIiIpxTgIiKeUoCLiHhKAS4i4ikFuIiIp3L6RR4zawQ2dnPyocCODJaTKaorPaorPb21Lui9tX0Q6zrGORdzwfqcBnhPmFl1vG8i5ZvqSo/qSk9vrQt6b23/THXpEIqIiKcU4CIinvIpwB/IdwEJqK70qK709Na6oPfW9k9TlzfHwEVEpDOfeuAiIhJBAS4i4ikvAtzMJpvZGjOrM7Obc7jc0Wb2upmtMrOVZnZjMPxHZvaemdUEfxdGTPP9oM41ZnZBluurN7PlQQ3VwbDBZjbTzGqD/4OC4WZmvwtqW2ZmE7NU04kR7VJjZnvM7KZ8tJmZPWxm281sRcSwtNvHzKYF49ea2bQs1fVrM1sdLPs5MxsYDB9jZgcj2u2+iGlOC7Z/XVB7j36/K0FdaW+3TD9fE9T1ZERN9WZWEwzPZXslyofc7WPOuV79BxQC64BxQAmwFBifo2WPACYGtwcAa4HxwI+A/4wz/vigvlJgbFB3YRbrqweGRg37FXBzcPtm4JfB7QuBVwADzgAW5GjbbQWOyUebAecAE4EV3W0fYDCwPvg/KLg9KAt1nQ8UBbd/GVHXmMjxouazEPh4UPMrwOeyUFda2y0bz9d4dUU9fjvwwzy0V6J8yNk+5kMPfBJQ55xb75xrAZ4ALs7Fgp1zDc65xcHtvcAqYGQXk1wMPOGca3bObQDqCNWfSxcDM4LbM4BLIoY/6kL+AQw0sxFZruWzwDrnXFffvs1amznn5gBNcZaXTvtcAMx0zjU5594HZgKTM12Xc+5vzrnwD1n+AxjV1TyC2sqdc/NdKAUejViXjNXVhUTbLePP167qCnrRVwKPdzWPLLVXonzI2T7mQ4CPBDZF3N9M1yGaFWY2BpgALAgGfTt4G/Rw+C0Sua/VAX8zs0Vmdn0wbLhzrgFCOxgwLE+1AXyJzk+s3tBm6bZPPtrtWkI9tbCxZrbEzN4ws7ODYSODWnJRVzrbLdftdTawzTlXGzEs5+0VlQ8528d8CPB4x6lyeu6jmfUHngFucs7tAe4FjgUqgQZCb+Eg97V+wjk3Efgc8C0zO6eLcXNam5mVAF8A/hwM6i1tlkiiOnLdbj8AWoHHgkENwNHOuQnAvwN/MrPyHNaV7nbL9facSudOQs7bK04+JBw1QQ3drs2HAN8MjI64PwrYkquFm1kxoY3zmHPuWQDn3DbnXJtzrh14kCNv+XNaq3NuS/B/O/BcUMe28KGR4P/2fNRG6EVlsXNuW1Bjr2gz0m+fnNUXfHh1EXB18Daf4BDFzuD2IkLHl08I6oo8zJKVurqx3XLZXkXAZcCTEfXmtL3i5QM53Md8CPC3gePNbGzQq/sS8EIuFhwcX3sIWOWc+03E8Mhjx5cC4U/HXwC+ZGalZjYWOJ7QByfZqK2fmQ0I3yb0IdiKoIbwp9jTgOcjarsm+CT8DGB3+G1elnTqGfWGNotYXjrt81fgfDMbFBw+OD8YllFmNhn4HvAF59yBiOEVZlYY3B5HqH3WB7XtNbMzgv30moh1yWRd6W63XD5fzwVWO+c6Do3ksr0S5QO53Md68ilsrv4IfXq7ltCr6Q9yuNyzCL2VWQbUBH8XAv8XWB4MfwEYETHND4I619DDT7mT1DaO0Cf8S4GV4XYBhgCzgNrg/+BguAF3B7UtB6qyWFtfYCdwVMSwnLcZoReQBuAwoV7Odd1pH0LHpOuCv69mqa46QsdBw/vZfcG4Xwy271JgMfD5iPlUEQrUdcDvCb5ZneG60t5umX6+xqsrGP4I8M2ocXPZXonyIWf7mL5KLyLiKR8OoYiISBwKcBERTynARUQ8pQAXEfGUAlxExFMKcBERTynARUQ89f8Bt5mGubOT7Q4AAAAASUVORK5CYII=\n", 227 | "text/plain": [ 228 | "
" 229 | ] 230 | }, 231 | "metadata": { 232 | "needs_background": "light" 233 | }, 234 | "output_type": "display_data" 235 | } 236 | ], 237 | "source": [ 238 | "# 不带基线的简单策略梯度算法\n", 239 | "\n", 240 | "policy_kwargs = dict(hidden_sizes=[128,], learning_rate=0.01)\n", 241 | "agent = VPGAgent(env, policy_kwargs=policy_kwargs)\n", 242 | "\n", 243 | "# 训练\n", 244 | "episodes = 2000\n", 245 | "episode_rewards = []\n", 246 | "for episode in tqdm(range(episodes)):\n", 247 | " episode_reward = play_montecarlo(env, agent, train=True)\n", 248 | " episode_rewards.append(episode_reward)\n", 249 | "plt.plot(episode_rewards)\n", 250 | "\n", 251 | "# 测试\n", 252 | "episode_rewards = [play_montecarlo(env, agent, train=False) for _ in range(100)]\n", 253 | "print('平均回合奖励 = {} / {} = {}'.format(sum(episode_rewards), len(episode_rewards), np.mean(episode_rewards)))" 254 | ] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.6" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 4 278 | } 279 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RL-Python-Pytorch 2 | 3 |
4 | 5 | 《强化学习-原理与Python实现》原书使用```Numpy```、```Keras```和```Tensorflow```实现强化学习方法,见[rl-book](https://github.com/ZhiqingXiao/rl-book)。 6 | 7 | 这里使用```Pytorch```将原书中深度强化学习方法实现一遍。 8 | 9 | - [x] 1 - 初识强化学习 10 | - [x] 2 - Markov决策过程 11 | - [x] 3 - 有模型数值迭代 12 | - [x] 4 - 回合更新价值迭代 13 | - [x] 5 - 时序差分价值迭代 14 | - [x] 6 - 函数近似方法 15 | - [ ] 7 - 回合更新策略梯度方法 16 | - [ ] 8 - 执行者/评论者方法 17 | - [ ] 9 - 连续动作空间的确定性策略 18 | - [ ] 10 - 综合案例:电动游戏 19 | - [ ] 11 - 综合案例:棋盘游戏 20 | - [ ] 12 - 综合案例:自动驾驶 21 | 22 |
23 | 24 | --- 25 | 26 |
27 | 28 | ## 环境配置 29 | 30 | | Package | Version | Installation | 31 | | ---------- | ------- | ------------------------------------------------------------ | 32 | | python | 3.8.6 | conda create --name rl python=3.8.6 | 33 | | numpy | 1.19.4 | pip install numpy==1.19.4 | 34 | | scipy | 1.5.4 | pip install scipy==1.5.4 | 35 | | pandas | 1.1.4 | pip install pandas==1.1.4 | 36 | | sympy | 1.7 | pip install sympy==1.7 | 37 | | gym | 0.17.3 | pip install gym==0.17.3 | 38 | | tqdm | 4.54.0 | pip install tqdm==4.54.0 | 39 | | matplotlib | 3.3.3 | pip install matplotlib==3.3.3 | 40 | | notebook | 6.1.5 | pip install notebook==6.1.5 | 41 | | pytorch | 1.7.0 | cpu:conda install pytorch\==1.7.0 cpuonly -c pytorch
gpu:conda install pytorch\==1.7.0 cudatoolkit=10.2 -c pytorch | 42 | 43 | --------------------------------------------------------------------------------