The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── LICENSE
├── README.md
├── rl_utils.py
├── 第10章-Actor-Critic算法.ipynb
├── 第11章-TRPO算法.ipynb
├── 第12章-PPO算法.ipynb
├── 第13章-DDPG算法.ipynb
├── 第14章-SAC算法.ipynb
├── 第15章-模仿学习.ipynb
├── 第16章-模型预测控制.ipynb
├── 第17章-基于模型的策略优化.ipynb
├── 第18章-离线强化学习.ipynb
├── 第19章-目标导向的强化学习.ipynb
├── 第20章-多智能体强化学习入门.ipynb
├── 第21章-多智能体强化学习进阶.ipynb
├── 第2章-多臂老虎机问题.ipynb
├── 第3章-马尔可夫决策过程.ipynb
├── 第4章-动态规划算法.ipynb
├── 第5章-时序差分算法.ipynb
├── 第6章-Dyna-Q算法.ipynb
├── 第7章-DQN算法.ipynb
├── 第8章-DQN改进算法.ipynb
└── 第9章-策略梯度算法.ipynb


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 动手学强化学习
 2 | 
 3 | Tips: 若运行gym环境的代码时遇到报错,请尝试pip install gym==0.18.3安装此版本的gym库,若仍有问题,欢迎提交issue!
 4 | 
 5 | 欢迎来到《动手学强化学习》(Hands-on Reinforcement Learning)的地带。该系列从强化学习的定义等基础讲起,一步步由浅入深,介绍目前一些主流的强化学习算法。每一章内容都是一个Jupyter Notebook,内含详细的图文介绍和代码讲解。
 6 | 
 7 | * 由于GitHub上渲染notebook效果有限,我们推荐读者前往[Hands-on RL主页](https://hrl.boyuai.com/)进行浏览,我们在此提供了纯代码版本的notebook,供大家下载运行。
 8 | 
 9 | * 欢迎在[京东](https://item.jd.com/13129509.html)和[当当网](http://product.dangdang.com/29391150.html)购买《动手学强化学习》。
10 | 
11 | * 如果你发现了本书的任何问题,或者有任何改善建议的,欢迎提交issue!
12 | 
13 | * 本书配套的强化学习课程已上线到[伯禹学习平台](https://www.boyuai.com/elites/course/xVqhU42F5IDky94x),所有人都可以免费学习和讨论。
14 | 
15 | ![](https://boyuai.oss-cn-shanghai.aliyuncs.com/disk/tmp/hrl-poster.jpeg)
16 | 


--------------------------------------------------------------------------------
/rl_utils.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | import numpy as np
 3 | import torch
 4 | import collections
 5 | import random
 6 | 
 7 | class ReplayBuffer:
 8 |     def __init__(self, capacity):
 9 |         self.buffer = collections.deque(maxlen=capacity) 
10 | 
11 |     def add(self, state, action, reward, next_state, done): 
12 |         self.buffer.append((state, action, reward, next_state, done)) 
13 | 
14 |     def sample(self, batch_size): 
15 |         transitions = random.sample(self.buffer, batch_size)
16 |         state, action, reward, next_state, done = zip(*transitions)
17 |         return np.array(state), action, reward, np.array(next_state), done 
18 | 
19 |     def size(self): 
20 |         return len(self.buffer)
21 | 
22 | def moving_average(a, window_size):
23 |     cumulative_sum = np.cumsum(np.insert(a, 0, 0)) 
24 |     middle = (cumulative_sum[window_size:] - cumulative_sum[:-window_size]) / window_size
25 |     r = np.arange(1, window_size-1, 2)
26 |     begin = np.cumsum(a[:window_size-1])[::2] / r
27 |     end = (np.cumsum(a[:-window_size:-1])[::2] / r)[::-1]
28 |     return np.concatenate((begin, middle, end))
29 | 
30 | def train_on_policy_agent(env, agent, num_episodes):
31 |     return_list = []
32 |     for i in range(10):
33 |         with tqdm(total=int(num_episodes/10), desc='Iteration %d' % i) as pbar:
34 |             for i_episode in range(int(num_episodes/10)):
35 |                 episode_return = 0
36 |                 transition_dict = {'states': [], 'actions': [], 'next_states': [], 'rewards': [], 'dones': []}
37 |                 state = env.reset()
38 |                 done = False
39 |                 while not done:
40 |                     action = agent.take_action(state)
41 |                     next_state, reward, done, _ = env.step(action)
42 |                     transition_dict['states'].append(state)
43 |                     transition_dict['actions'].append(action)
44 |                     transition_dict['next_states'].append(next_state)
45 |                     transition_dict['rewards'].append(reward)
46 |                     transition_dict['dones'].append(done)
47 |                     state = next_state
48 |                     episode_return += reward
49 |                 return_list.append(episode_return)
50 |                 agent.update(transition_dict)
51 |                 if (i_episode+1) % 10 == 0:
52 |                     pbar.set_postfix({'episode': '%d' % (num_episodes/10 * i + i_episode+1), 'return': '%.3f' % np.mean(return_list[-10:])})
53 |                 pbar.update(1)
54 |     return return_list
55 | 
56 | def train_off_policy_agent(env, agent, num_episodes, replay_buffer, minimal_size, batch_size):
57 |     return_list = []
58 |     for i in range(10):
59 |         with tqdm(total=int(num_episodes/10), desc='Iteration %d' % i) as pbar:
60 |             for i_episode in range(int(num_episodes/10)):
61 |                 episode_return = 0
62 |                 state = env.reset()
63 |                 done = False
64 |                 while not done:
65 |                     action = agent.take_action(state)
66 |                     next_state, reward, done, _ = env.step(action)
67 |                     replay_buffer.add(state, action, reward, next_state, done)
68 |                     state = next_state
69 |                     episode_return += reward
70 |                     if replay_buffer.size() > minimal_size:
71 |                         b_s, b_a, b_r, b_ns, b_d = replay_buffer.sample(batch_size)
72 |                         transition_dict = {'states': b_s, 'actions': b_a, 'next_states': b_ns, 'rewards': b_r, 'dones': b_d}
73 |                         agent.update(transition_dict)
74 |                 return_list.append(episode_return)
75 |                 if (i_episode+1) % 10 == 0:
76 |                     pbar.set_postfix({'episode': '%d' % (num_episodes/10 * i + i_episode+1), 'return': '%.3f' % np.mean(return_list[-10:])})
77 |                 pbar.update(1)
78 |     return return_list
79 | 
80 | 
81 | def compute_advantage(gamma, lmbda, td_delta):
82 |     td_delta = td_delta.detach().numpy()
83 |     advantage_list = []
84 |     advantage = 0.0
85 |     for delta in td_delta[::-1]:
86 |         advantage = gamma * lmbda * advantage + delta
87 |         advantage_list.append(advantage)
88 |     advantage_list.reverse()
89 |     return torch.tensor(advantage_list, dtype=torch.float)
90 |                 


--------------------------------------------------------------------------------
/第16章-模型预测控制.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "executionInfo": {
  8 |      "elapsed": 6698,
  9 |      "status": "ok",
 10 |      "timestamp": 1649956814219,
 11 |      "user": {
 12 |       "displayName": "Sam Lu",
 13 |       "userId": "15789059763790170725"
 14 |      },
 15 |      "user_tz": -480
 16 |     },
 17 |     "id": "pkDNguALCr-X"
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import numpy as np\n",
 22 |     "from scipy.stats import truncnorm\n",
 23 |     "import gym\n",
 24 |     "import itertools\n",
 25 |     "import torch\n",
 26 |     "import torch.nn as nn\n",
 27 |     "import torch.nn.functional as F\n",
 28 |     "import collections\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "class CEM:\n",
 33 |     "    def __init__(self, n_sequence, elite_ratio, fake_env, upper_bound,\n",
 34 |     "                 lower_bound):\n",
 35 |     "        self.n_sequence = n_sequence\n",
 36 |     "        self.elite_ratio = elite_ratio\n",
 37 |     "        self.upper_bound = upper_bound\n",
 38 |     "        self.lower_bound = lower_bound\n",
 39 |     "        self.fake_env = fake_env\n",
 40 |     "\n",
 41 |     "    def optimize(self, state, init_mean, init_var):\n",
 42 |     "        mean, var = init_mean, init_var\n",
 43 |     "        X = truncnorm(-2, 2, loc=np.zeros_like(mean), scale=np.ones_like(var))\n",
 44 |     "        state = np.tile(state, (self.n_sequence, 1))\n",
 45 |     "\n",
 46 |     "        for _ in range(5):\n",
 47 |     "            lb_dist, ub_dist = mean - self.lower_bound, self.upper_bound - mean\n",
 48 |     "            constrained_var = np.minimum(\n",
 49 |     "                np.minimum(np.square(lb_dist / 2), np.square(ub_dist / 2)),\n",
 50 |     "                var)\n",
 51 |     "            # 生成动作序列\n",
 52 |     "            action_sequences = [X.rvs() for _ in range(self.n_sequence)\n",
 53 |     "                                ] * np.sqrt(constrained_var) + mean\n",
 54 |     "            # 计算每条动作序列的累积奖励\n",
 55 |     "            returns = self.fake_env.propagate(state, action_sequences)[:, 0]\n",
 56 |     "            # 选取累积奖励最高的若干条动作序列\n",
 57 |     "            elites = action_sequences[np.argsort(\n",
 58 |     "                returns)][-int(self.elite_ratio * self.n_sequence):]\n",
 59 |     "            new_mean = np.mean(elites, axis=0)\n",
 60 |     "            new_var = np.var(elites, axis=0)\n",
 61 |     "            # 更新动作序列分布\n",
 62 |     "            mean = 0.1 * mean + 0.9 * new_mean\n",
 63 |     "            var = 0.1 * var + 0.9 * new_var\n",
 64 |     "\n",
 65 |     "        return mean"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 2,
 71 |    "metadata": {
 72 |     "executionInfo": {
 73 |      "elapsed": 9,
 74 |      "status": "ok",
 75 |      "timestamp": 1649956814220,
 76 |      "user": {
 77 |       "displayName": "Sam Lu",
 78 |       "userId": "15789059763790170725"
 79 |      },
 80 |      "user_tz": -480
 81 |     },
 82 |     "id": "coGG5UOpCr-Z"
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\n",
 87 |     "    \"cpu\")\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "class Swish(nn.Module):\n",
 91 |     "    ''' Swish激活函数 '''\n",
 92 |     "    def __init__(self):\n",
 93 |     "        super(Swish, self).__init__()\n",
 94 |     "\n",
 95 |     "    def forward(self, x):\n",
 96 |     "        return x * torch.sigmoid(x)\n",
 97 |     "\n",
 98 |     "\n",
 99 |     "def init_weights(m):\n",
100 |     "    ''' 初始化模型权重 '''\n",
101 |     "    def truncated_normal_init(t, mean=0.0, std=0.01):\n",
102 |     "        torch.nn.init.normal_(t, mean=mean, std=std)\n",
103 |     "        while True:\n",
104 |     "            cond = (t < mean - 2 * std) | (t > mean + 2 * std)\n",
105 |     "            if not torch.sum(cond):\n",
106 |     "                break\n",
107 |     "            t = torch.where(\n",
108 |     "                cond,\n",
109 |     "                torch.nn.init.normal_(torch.ones(t.shape, device=device),\n",
110 |     "                                      mean=mean,\n",
111 |     "                                      std=std), t)\n",
112 |     "        return t\n",
113 |     "\n",
114 |     "    if type(m) == nn.Linear or isinstance(m, FCLayer):\n",
115 |     "        truncated_normal_init(m.weight, std=1 / (2 * np.sqrt(m._input_dim)))\n",
116 |     "        m.bias.data.fill_(0.0)\n",
117 |     "\n",
118 |     "\n",
119 |     "class FCLayer(nn.Module):\n",
120 |     "    ''' 集成之后的全连接层 '''\n",
121 |     "    def __init__(self, input_dim, output_dim, ensemble_size, activation):\n",
122 |     "        super(FCLayer, self).__init__()\n",
123 |     "        self._input_dim, self._output_dim = input_dim, output_dim\n",
124 |     "        self.weight = nn.Parameter(\n",
125 |     "            torch.Tensor(ensemble_size, input_dim, output_dim).to(device))\n",
126 |     "        self._activation = activation\n",
127 |     "        self.bias = nn.Parameter(\n",
128 |     "            torch.Tensor(ensemble_size, output_dim).to(device))\n",
129 |     "\n",
130 |     "    def forward(self, x):\n",
131 |     "        return self._activation(\n",
132 |     "            torch.add(torch.bmm(x, self.weight), self.bias[:, None, :]))"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 3,
138 |    "metadata": {
139 |     "executionInfo": {
140 |      "elapsed": 8,
141 |      "status": "ok",
142 |      "timestamp": 1649956814220,
143 |      "user": {
144 |       "displayName": "Sam Lu",
145 |       "userId": "15789059763790170725"
146 |      },
147 |      "user_tz": -480
148 |     },
149 |     "id": "SNVDgXI2Cr-a"
150 |    },
151 |    "outputs": [],
152 |    "source": [
153 |     "class EnsembleModel(nn.Module):\n",
154 |     "    ''' 环境模型集成 '''\n",
155 |     "    def __init__(self,\n",
156 |     "                 state_dim,\n",
157 |     "                 action_dim,\n",
158 |     "                 ensemble_size=5,\n",
159 |     "                 learning_rate=1e-3):\n",
160 |     "        super(EnsembleModel, self).__init__()\n",
161 |     "        # 输出包括均值和方差,因此是状态与奖励维度之和的两倍\n",
162 |     "        self._output_dim = (state_dim + 1) * 2\n",
163 |     "        self._max_logvar = nn.Parameter((torch.ones(\n",
164 |     "            (1, self._output_dim // 2)).float() / 2).to(device),\n",
165 |     "                                        requires_grad=False)\n",
166 |     "        self._min_logvar = nn.Parameter((-torch.ones(\n",
167 |     "            (1, self._output_dim // 2)).float() * 10).to(device),\n",
168 |     "                                        requires_grad=False)\n",
169 |     "\n",
170 |     "        self.layer1 = FCLayer(state_dim + action_dim, 200, ensemble_size,\n",
171 |     "                              Swish())\n",
172 |     "        self.layer2 = FCLayer(200, 200, ensemble_size, Swish())\n",
173 |     "        self.layer3 = FCLayer(200, 200, ensemble_size, Swish())\n",
174 |     "        self.layer4 = FCLayer(200, 200, ensemble_size, Swish())\n",
175 |     "        self.layer5 = FCLayer(200, self._output_dim, ensemble_size,\n",
176 |     "                              nn.Identity())\n",
177 |     "        self.apply(init_weights)  # 初始化环境模型中的参数\n",
178 |     "        self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)\n",
179 |     "\n",
180 |     "    def forward(self, x, return_log_var=False):\n",
181 |     "        ret = self.layer5(self.layer4(self.layer3(self.layer2(\n",
182 |     "            self.layer1(x)))))\n",
183 |     "        mean = ret[:, :, :self._output_dim // 2]\n",
184 |     "        # 在PETS算法中,将方差控制在最小值和最大值之间\n",
185 |     "        logvar = self._max_logvar - F.softplus(\n",
186 |     "            self._max_logvar - ret[:, :, self._output_dim // 2:])\n",
187 |     "        logvar = self._min_logvar + F.softplus(logvar - self._min_logvar)\n",
188 |     "        return mean, logvar if return_log_var else torch.exp(logvar)\n",
189 |     "\n",
190 |     "    def loss(self, mean, logvar, labels, use_var_loss=True):\n",
191 |     "        inverse_var = torch.exp(-logvar)\n",
192 |     "        if use_var_loss:\n",
193 |     "            mse_loss = torch.mean(torch.mean(torch.pow(mean - labels, 2) *\n",
194 |     "                                             inverse_var,\n",
195 |     "                                             dim=-1),\n",
196 |     "                                  dim=-1)\n",
197 |     "            var_loss = torch.mean(torch.mean(logvar, dim=-1), dim=-1)\n",
198 |     "            total_loss = torch.sum(mse_loss) + torch.sum(var_loss)\n",
199 |     "        else:\n",
200 |     "            mse_loss = torch.mean(torch.pow(mean - labels, 2), dim=(1, 2))\n",
201 |     "            total_loss = torch.sum(mse_loss)\n",
202 |     "        return total_loss, mse_loss\n",
203 |     "\n",
204 |     "    def train(self, loss):\n",
205 |     "        self.optimizer.zero_grad()\n",
206 |     "        loss += 0.01 * torch.sum(self._max_logvar) - 0.01 * torch.sum(\n",
207 |     "            self._min_logvar)\n",
208 |     "        loss.backward()\n",
209 |     "        self.optimizer.step()"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 4,
215 |    "metadata": {
216 |     "executionInfo": {
217 |      "elapsed": 8,
218 |      "status": "ok",
219 |      "timestamp": 1649956814221,
220 |      "user": {
221 |       "displayName": "Sam Lu",
222 |       "userId": "15789059763790170725"
223 |      },
224 |      "user_tz": -480
225 |     },
226 |     "id": "kVE0nKi6Cr-b"
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "class EnsembleDynamicsModel:\n",
231 |     "    ''' 环境模型集成,加入精细化的训练 '''\n",
232 |     "    def __init__(self, state_dim, action_dim, num_network=5):\n",
233 |     "        self._num_network = num_network\n",
234 |     "        self._state_dim, self._action_dim = state_dim, action_dim\n",
235 |     "        self.model = EnsembleModel(state_dim,\n",
236 |     "                                   action_dim,\n",
237 |     "                                   ensemble_size=num_network)\n",
238 |     "        self._epoch_since_last_update = 0\n",
239 |     "\n",
240 |     "    def train(self,\n",
241 |     "              inputs,\n",
242 |     "              labels,\n",
243 |     "              batch_size=64,\n",
244 |     "              holdout_ratio=0.1,\n",
245 |     "              max_iter=20):\n",
246 |     "        # 设置训练集与验证集\n",
247 |     "        permutation = np.random.permutation(inputs.shape[0])\n",
248 |     "        inputs, labels = inputs[permutation], labels[permutation]\n",
249 |     "        num_holdout = int(inputs.shape[0] * holdout_ratio)\n",
250 |     "        train_inputs, train_labels = inputs[num_holdout:], labels[num_holdout:]\n",
251 |     "        holdout_inputs, holdout_labels = inputs[:\n",
252 |     "                                                num_holdout], labels[:\n",
253 |     "                                                                     num_holdout]\n",
254 |     "        holdout_inputs = torch.from_numpy(holdout_inputs).float().to(device)\n",
255 |     "        holdout_labels = torch.from_numpy(holdout_labels).float().to(device)\n",
256 |     "        holdout_inputs = holdout_inputs[None, :, :].repeat(\n",
257 |     "            [self._num_network, 1, 1])\n",
258 |     "        holdout_labels = holdout_labels[None, :, :].repeat(\n",
259 |     "            [self._num_network, 1, 1])\n",
260 |     "\n",
261 |     "        # 保留最好的结果\n",
262 |     "        self._snapshots = {i: (None, 1e10) for i in range(self._num_network)}\n",
263 |     "\n",
264 |     "        for epoch in itertools.count():\n",
265 |     "            # 定义每一个网络的训练数据\n",
266 |     "            train_index = np.vstack([\n",
267 |     "                np.random.permutation(train_inputs.shape[0])\n",
268 |     "                for _ in range(self._num_network)\n",
269 |     "            ])\n",
270 |     "            # 所有真实数据都用来训练\n",
271 |     "            for batch_start_pos in range(0, train_inputs.shape[0], batch_size):\n",
272 |     "                batch_index = train_index[:, batch_start_pos:batch_start_pos +\n",
273 |     "                                          batch_size]\n",
274 |     "                train_input = torch.from_numpy(\n",
275 |     "                    train_inputs[batch_index]).float().to(device)\n",
276 |     "                train_label = torch.from_numpy(\n",
277 |     "                    train_labels[batch_index]).float().to(device)\n",
278 |     "\n",
279 |     "                mean, logvar = self.model(train_input, return_log_var=True)\n",
280 |     "                loss, _ = self.model.loss(mean, logvar, train_label)\n",
281 |     "                self.model.train(loss)\n",
282 |     "\n",
283 |     "            with torch.no_grad():\n",
284 |     "                mean, logvar = self.model(holdout_inputs, return_log_var=True)\n",
285 |     "                _, holdout_losses = self.model.loss(mean,\n",
286 |     "                                                    logvar,\n",
287 |     "                                                    holdout_labels,\n",
288 |     "                                                    use_var_loss=False)\n",
289 |     "                holdout_losses = holdout_losses.cpu()\n",
290 |     "                break_condition = self._save_best(epoch, holdout_losses)\n",
291 |     "                if break_condition or epoch > max_iter:  # 结束训练\n",
292 |     "                    break\n",
293 |     "\n",
294 |     "    def _save_best(self, epoch, losses, threshold=0.1):\n",
295 |     "        updated = False\n",
296 |     "        for i in range(len(losses)):\n",
297 |     "            current = losses[i]\n",
298 |     "            _, best = self._snapshots[i]\n",
299 |     "            improvement = (best - current) / best\n",
300 |     "            if improvement > threshold:\n",
301 |     "                self._snapshots[i] = (epoch, current)\n",
302 |     "                updated = True\n",
303 |     "        self._epoch_since_last_update = 0 if updated else self._epoch_since_last_update + 1\n",
304 |     "        return self._epoch_since_last_update > 5\n",
305 |     "\n",
306 |     "    def predict(self, inputs, batch_size=64):\n",
307 |     "        mean, var = [], []\n",
308 |     "        for i in range(0, inputs.shape[0], batch_size):\n",
309 |     "            input = torch.from_numpy(\n",
310 |     "                inputs[i:min(i +\n",
311 |     "                             batch_size, inputs.shape[0])]).float().to(device)\n",
312 |     "            cur_mean, cur_var = self.model(input[None, :, :].repeat(\n",
313 |     "                [self._num_network, 1, 1]),\n",
314 |     "                                           return_log_var=False)\n",
315 |     "            mean.append(cur_mean.detach().cpu().numpy())\n",
316 |     "            var.append(cur_var.detach().cpu().numpy())\n",
317 |     "        return np.hstack(mean), np.hstack(var)"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 5,
323 |    "metadata": {
324 |     "executionInfo": {
325 |      "elapsed": 7,
326 |      "status": "ok",
327 |      "timestamp": 1649956814221,
328 |      "user": {
329 |       "displayName": "Sam Lu",
330 |       "userId": "15789059763790170725"
331 |      },
332 |      "user_tz": -480
333 |     },
334 |     "id": "1auD04WgCr-c"
335 |    },
336 |    "outputs": [],
337 |    "source": [
338 |     "class FakeEnv:\n",
339 |     "    def __init__(self, model):\n",
340 |     "        self.model = model\n",
341 |     "\n",
342 |     "    def step(self, obs, act):\n",
343 |     "        inputs = np.concatenate((obs, act), axis=-1)\n",
344 |     "        ensemble_model_means, ensemble_model_vars = self.model.predict(inputs)\n",
345 |     "        ensemble_model_means[:, :, 1:] += obs.numpy()\n",
346 |     "        ensemble_model_stds = np.sqrt(ensemble_model_vars)\n",
347 |     "        ensemble_samples = ensemble_model_means + np.random.normal(\n",
348 |     "            size=ensemble_model_means.shape) * ensemble_model_stds\n",
349 |     "\n",
350 |     "        num_models, batch_size, _ = ensemble_model_means.shape\n",
351 |     "        models_to_use = np.random.choice(\n",
352 |     "            [i for i in range(self.model._num_network)], size=batch_size)\n",
353 |     "        batch_inds = np.arange(0, batch_size)\n",
354 |     "        samples = ensemble_samples[models_to_use, batch_inds]\n",
355 |     "        rewards, next_obs = samples[:, :1], samples[:, 1:]\n",
356 |     "        return rewards, next_obs\n",
357 |     "\n",
358 |     "    def propagate(self, obs, actions):\n",
359 |     "        with torch.no_grad():\n",
360 |     "            obs = np.copy(obs)\n",
361 |     "            total_reward = np.expand_dims(np.zeros(obs.shape[0]), axis=-1)\n",
362 |     "            obs, actions = torch.as_tensor(obs), torch.as_tensor(actions)\n",
363 |     "            for i in range(actions.shape[1]):\n",
364 |     "                action = torch.unsqueeze(actions[:, i], 1)\n",
365 |     "                rewards, next_obs = self.step(obs, action)\n",
366 |     "                total_reward += rewards\n",
367 |     "                obs = torch.as_tensor(next_obs)\n",
368 |     "            return total_reward"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": 6,
374 |    "metadata": {
375 |     "executionInfo": {
376 |      "elapsed": 8,
377 |      "status": "ok",
378 |      "timestamp": 1649956814222,
379 |      "user": {
380 |       "displayName": "Sam Lu",
381 |       "userId": "15789059763790170725"
382 |      },
383 |      "user_tz": -480
384 |     },
385 |     "id": "Kl3fh7_iCr-c"
386 |    },
387 |    "outputs": [],
388 |    "source": [
389 |     "class ReplayBuffer:\n",
390 |     "    def __init__(self, capacity):\n",
391 |     "        self.buffer = collections.deque(maxlen=capacity)\n",
392 |     "\n",
393 |     "    def add(self, state, action, reward, next_state, done):\n",
394 |     "        self.buffer.append((state, action, reward, next_state, done))\n",
395 |     "\n",
396 |     "    def size(self):\n",
397 |     "        return len(self.buffer)\n",
398 |     "\n",
399 |     "    def return_all_samples(self):\n",
400 |     "        all_transitions = list(self.buffer)\n",
401 |     "        state, action, reward, next_state, done = zip(*all_transitions)\n",
402 |     "        return np.array(state), action, reward, np.array(next_state), done"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "code",
407 |    "execution_count": 7,
408 |    "metadata": {
409 |     "executionInfo": {
410 |      "elapsed": 11,
411 |      "status": "ok",
412 |      "timestamp": 1649956814723,
413 |      "user": {
414 |       "displayName": "Sam Lu",
415 |       "userId": "15789059763790170725"
416 |      },
417 |      "user_tz": -480
418 |     },
419 |     "id": "7iPZNkXHCr-d"
420 |    },
421 |    "outputs": [],
422 |    "source": [
423 |     "class PETS:\n",
424 |     "    ''' PETS算法 '''\n",
425 |     "    def __init__(self, env, replay_buffer, n_sequence, elite_ratio,\n",
426 |     "                 plan_horizon, num_episodes):\n",
427 |     "        self._env = env\n",
428 |     "        self._env_pool = ReplayBuffer(buffer_size)\n",
429 |     "\n",
430 |     "        obs_dim = env.observation_space.shape[0]\n",
431 |     "        self._action_dim = env.action_space.shape[0]\n",
432 |     "        self._model = EnsembleDynamicsModel(obs_dim, self._action_dim)\n",
433 |     "        self._fake_env = FakeEnv(self._model)\n",
434 |     "        self.upper_bound = env.action_space.high[0]\n",
435 |     "        self.lower_bound = env.action_space.low[0]\n",
436 |     "\n",
437 |     "        self._cem = CEM(n_sequence, elite_ratio, self._fake_env,\n",
438 |     "                        self.upper_bound, self.lower_bound)\n",
439 |     "        self.plan_horizon = plan_horizon\n",
440 |     "        self.num_episodes = num_episodes\n",
441 |     "\n",
442 |     "    def train_model(self):\n",
443 |     "        env_samples = self._env_pool.return_all_samples()\n",
444 |     "        obs = env_samples[0]\n",
445 |     "        actions = np.array(env_samples[1])\n",
446 |     "        rewards = np.array(env_samples[2]).reshape(-1, 1)\n",
447 |     "        next_obs = env_samples[3]\n",
448 |     "        inputs = np.concatenate((obs, actions), axis=-1)\n",
449 |     "        labels = np.concatenate((rewards, next_obs - obs), axis=-1)\n",
450 |     "        self._model.train(inputs, labels)\n",
451 |     "\n",
452 |     "    def mpc(self):\n",
453 |     "        mean = np.tile((self.upper_bound + self.lower_bound) / 2.0,\n",
454 |     "                       self.plan_horizon)\n",
455 |     "        var = np.tile(\n",
456 |     "            np.square(self.upper_bound - self.lower_bound) / 16,\n",
457 |     "            self.plan_horizon)\n",
458 |     "        obs, done, episode_return = self._env.reset(), False, 0\n",
459 |     "        while not done:\n",
460 |     "            actions = self._cem.optimize(obs, mean, var)\n",
461 |     "            action = actions[:self._action_dim]  # 选取第一个动作\n",
462 |     "            next_obs, reward, done, _ = self._env.step(action)\n",
463 |     "            self._env_pool.add(obs, action, reward, next_obs, done)\n",
464 |     "            obs = next_obs\n",
465 |     "            episode_return += reward\n",
466 |     "            mean = np.concatenate([\n",
467 |     "                np.copy(actions)[self._action_dim:],\n",
468 |     "                np.zeros(self._action_dim)\n",
469 |     "            ])\n",
470 |     "        return episode_return\n",
471 |     "\n",
472 |     "    def explore(self):\n",
473 |     "        obs, done, episode_return = self._env.reset(), False, 0\n",
474 |     "        while not done:\n",
475 |     "            action = self._env.action_space.sample()\n",
476 |     "            next_obs, reward, done, _ = self._env.step(action)\n",
477 |     "            self._env_pool.add(obs, action, reward, next_obs, done)\n",
478 |     "            obs = next_obs\n",
479 |     "            episode_return += reward\n",
480 |     "        return episode_return\n",
481 |     "\n",
482 |     "    def train(self):\n",
483 |     "        return_list = []\n",
484 |     "        explore_return = self.explore()  # 先进行随机策略的探索来收集一条序列的数据\n",
485 |     "        print('episode: 1, return: %d' % explore_return)\n",
486 |     "        return_list.append(explore_return)\n",
487 |     "\n",
488 |     "        for i_episode in range(self.num_episodes - 1):\n",
489 |     "            self.train_model()\n",
490 |     "            episode_return = self.mpc()\n",
491 |     "            return_list.append(episode_return)\n",
492 |     "            print('episode: %d, return: %d' % (i_episode + 2, episode_return))\n",
493 |     "        return return_list"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "code",
498 |    "execution_count": 8,
499 |    "metadata": {
500 |     "colab": {
501 |      "base_uri": "https://localhost:8080/",
502 |      "height": 469
503 |     },
504 |     "executionInfo": {
505 |      "elapsed": 641756,
506 |      "status": "ok",
507 |      "timestamp": 1649957456469,
508 |      "user": {
509 |       "displayName": "Sam Lu",
510 |       "userId": "15789059763790170725"
511 |      },
512 |      "user_tz": -480
513 |     },
514 |     "id": "pfzBBzuECr-d",
515 |     "outputId": "4b8d971d-31e3-47c6-aa29-00f0f4c062a3"
516 |    },
517 |    "outputs": [
518 |     {
519 |      "name": "stdout",
520 |      "output_type": "stream",
521 |      "text": [
522 |       "episode: 1, return: -985\n",
523 |       "episode: 2, return: -1384\n",
524 |       "episode: 3, return: -1006\n",
525 |       "episode: 4, return: -1853\n",
526 |       "episode: 5, return: -378\n",
527 |       "episode: 6, return: -123\n",
528 |       "episode: 7, return: -124\n",
529 |       "episode: 8, return: -122\n",
530 |       "episode: 9, return: -124\n",
531 |       "episode: 10, return: -125\n"
532 |      ]
533 |     },
534 |     {
535 |      "data": {
536 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEWCAYAAACjYXoKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3xc9ZX//9dRsdzlJhfJxgVccJOwTQsxKUAoATv0npDkB6GFZMluFlLYLEk25QtLQkIgpAAJYJKl2bRQAwQSihuS3HDDILnJNh65yFY7vz/myh7bkjXSzOjOWO/n43EfmvncMkdja87cz7n38zF3R0REJBFZYQcgIiKZT8lEREQSpmQiIiIJUzIREZGEKZmIiEjClExERCRhSiYihwgze9XM/r9kbysSDyUTSTtm9oGZ1ZjZdjPbYGb3m1nPYN2rZrYrWNe0PGVml8Y8rzGzxthtgn0/aWb/NLOImW0xszfN7OgO+H1iY95kZo+b2ZBUv24mMLN+ZvaEme0wszVmdknYMUn7KJlIujrL3XsCU4BpwPdi1l3v7j1jlrPc/aGm58DpwNrYbcysN/A08CugH1AE/Dewu4N+n+uD2MYAfYA7Ouh1091dQC0wCLgUuNvMJoQbkrSHkomkNXevBJ4DJiZ4qDHB8Wa5e4O717j7C+5e2tzGZpZnZr8ws7XB8gszywvWfdrMKszsW2a20czWmdmX4/x9tgCPNf0+ZjbOzF4MzpSWmdkFMTHcb2Z3mdkzZrbNzN42s8Nj1p9iZkuDM61fAxaz7gdm9mDM8xFm5maW08zvetBtgzOrHwVndU1ngv3N7CEzqzazd81sRAvv491mdtt+bbPN7EYz6wGcC3zf3be7+xvAHODyeN5LSS9KJpLWzGwYcAawIMFDvQ80mNkDZna6mfVtZfvvAscBJUAxcAz7nh0NBvKJnuF8FbgrjmNiZgOIfoAuCD5MXwQeBgYCFwG/MbPxMbtcRPQMqi+wAvhxzHEeD2IaAKwETmjt9RNwEdEP+SLgcOBfwH1Ez/KWAP/Vwn6zgAvNzIK4+wKfAx4hmuDr3f39mO3fA3RmkoGUTCRdPWlmW4E3gNeA/4lZd6eZbY1Zftjawdy9Gvgk4MDvgCozm2Nmg1rY5VLgVnff6O5VRD/QY78x1wXr69z9WWA7MPYgIdwZ/D7vAeuAG4EzgQ/c/T53r3f3BUTPWs6P2e8Jd3/H3euBh4gmN4gm2EXu/qi71wG/ANa39j4k4D53X+nuEaJniivd/aUgrv8Djmphv38Qfc+nB8/PA/7l7muBnkD1fttHgF5Jj15STslE0tUX3L2Puw9392vdvSZm3Q3Buqbl+/Ec0N2XuPsV7j6UaDdTIdEP4eYUAmtinq8J2ppsDj5Im+wk+uHYkqaYi9z90iBBDQeOjU2MRJPY4Jj9YhNE7GsUAh/F/G4e+zwFNsQ8rmnmedMFEt+JufDhniCuR4CLg20vIZoUIZqAe+/3Or2BbckOXlJPyUQ6JXdfCtxPy7WYtUQ/7JscFrQl00fAa/slxp7ufk0c+64DhjU9CbqRhsWs3wF0j3kem6D215ZtD8rd/yfmwoerg+ZZwHlmNhw4lujZF0S7HnPMbHTMIYqBRe19fQmPkol0CkGh+1tmNjR4Pozot+W3WthlFvA9MysI6hO3AA+2sG17PQ2MMbPLzSw3WI42syPj2PcZYIKZnRMUym9g3ySwEDjRzA4zs3zg5oMcqy3btlnQfbcJ+D3wvLtvDdp3EK373GpmPczsBGAm8Odkvr50DCUTyUS/tn3vM5kXxz7biH4rftvMdhBNIuXAt1rY/kfAXKAUKAPmB21J4+7biBajLyJ61rMe+BmQF8e+m4jWVn4KbAZGA2/GrH8R+EsQ/zyiiaulY8W9bQIeBk4Ofsa6FugGbCSawK9xd52ZZCDT5FgiIpIonZmIiEjClExERCRhSiYiIpIwJRMREUnYAeP0dBYDBgzwESNGhB2GiEhGmTdv3iZ3L9i/vdMmkxEjRjB37tywwxARyShmtqa5dnVziYhIwpRMREQkYUomIiKSMCUTERFJmJKJiIgkTMlEREQSpmQiIiIJ67T3mYgcqrbsqOXVZRtpaNx3RPBgGvbo4/32MWvlecwe+6/bn+23gbvjDk70Z6MHbfusI1gXbSf42djo+6wLVuF4cJy9x409VmPMY4L9zIwsM8wgy6LPo4+NrOAn7H1uMT9jtzP2Po/9aTHHyYrOeL/nedN2TW+jxbxPFvOeNh17z79RC+179t17yGB9c+22zzaGMXxAd3p3zT34P2QbKZmIHEKWb9jGFfe9S+XWmtY3lk7r/i8fzafHDkzqMZVMRA4Rb67YxNUPzqNrbjazrjyOoX27Nbvd/lMYBecCB1kfu85bXNfcvuB7vhnbft/sm77NN30zz2r6Bh1skxWzvulbfdM37j1nA7HHij1jYO/xY2OPPfvZc/YStEWX6C/V9Lyl7WKf791m79lSc9vtObPa533yPY9j25vO3Jre0z3PnGbbfZ/2mH/R/bcJVkwsyt//HyphSiYih4C/vvsR33mijMMLevLHLx9NUZ/mE0lntqe76oBOPkkGJRORDNbY6Nz+4jLu+vtKpo8ewF2XTkl6X7hIPJRMRDLUrroG/uPRUp56by0XHzOMW2dOJDdbF2hKOJRMRDLQlh21XPmnucxb8zE3nT6Or5046oCrqEQ6kpKJSIZZVbWdL9//Lusiu7jrkil8fvKQsEMSUTIRySTvrN7CVX+eS7YZs648jqnD+4YdkgigZCKSMZ5cUMm3Hy1laL9u3H/FMRzWv3vYIYnsoWQikubcnTtfXsEdL73PcaP68dvLppHfXVdsSXpRMhFJY7X1jdz8eBmPza/gnClF/PScyXTJ0RVbkn6UTETSVGRnHV97cC5vrdrCv508hhtOOkJXbEnaSruvOGb2/8xsqZmVmtkTZtYnaB9hZjVmtjBY7onZZ6qZlZnZCjO70/QXJxnuw807OefuN5m/Zit3XFjMN04erUQiaS3tkgnwIjDR3ScD7wM3x6xb6e4lwXJ1TPvdwJXA6GA5rcOiFUmy+R9+zNm/eZPNO2r581eP4eyjhoYdkkir0i6ZuPsL7l4fPH0LOOhfkpkNAXq7+1seHYXuT8AXUhymSEo8U7qOi+99i55dc3j8mk9w7Kj+YYckEpe0Syb7+QrwXMzzkWa2wMxeM7PpQVsRUBGzTUXQdgAzu8rM5prZ3KqqqtRELNIO7s49r63kuofnM7EonyeuPYFRBT3DDkskbqEU4M3sJWBwM6u+6+6zg22+C9QDDwXr1gGHuftmM5sKPGlmE9ryuu5+L3AvwLRp0w4YLFskDHUNjdwyexGz3vmQMycP4bbzi+mamx12WCJtEkoycfeTD7bezK4AzgROCrqucPfdwO7g8TwzWwmMASrZtytsaNAmkvaqd9Vx3UPz+cfyTVz3mcP51iljycpSoV0yT9pdGmxmpwHfBj7l7jtj2guALe7eYGajiBbaV7n7FjOrNrPjgLeBLwK/CiN2kbao3FrDV+57l5VV2/n5uZO54OhhYYck0m5pl0yAXwN5wIvBpZBvBVdunQjcamZ1QCNwtbtvCfa5Frgf6Ea0xvLc/gcVSSdlFRG+8sC77Kpt4P4vH8MnRw8IOySRhKRdMnH3I1pofwx4rIV1c4GJqYxLJFleXLyBG2YtoF+PLjx07bGMGdQr7JBEEpZ2yUTkUHbfm6u59enFTC7K53dfmsbAXl3DDkkkKZRMRDpAQ6Pzw6cXc/8/P+DUCYP4xYVH0a2LrtiSQ4eSiUiK7dhdzw2zFvDy0o1cOX0kN51+JNm6YksOMUomIim0oXoXX7n/XZasq+aHX5jI5ccNDzskkZRQMhFJkSXrqvnK/e9SXVPHH750NJ8ZNzDskERSRslEJAVeXbaR6x6aT6+uufzf1Z9gfGHvsEMSSSklE5Eke+jtNdwyexFjB/Xij1cczeB8XbElhz4lE5EkaWx0fvq3pdz7+io+M7aAX10yhZ55+hOTzkH/00WS5JWlG7n39VVcftxw/uus8eRkp/ug3CLJo//tIkky78OPyc02vnfmkUok0unof7xIkpRXRhgzqBd5OboZUTofJRORJHB3yiojTB6aH3YoIqFQMhFJgoqPa9i6s46JRUom0jkpmYgkQVllBIDJRX1CjkQkHEomIklQWhEhN9sYM1jztkvnpGQikgTllRHGDlbxXTovJRORBDUV3yepi0s6MSUTkQR9tKWGSE0dk1R8l05MyUQkQXuK77osWDqxtEsmZvYDM6s0s4XBckbMupvNbIWZLTOzU2PaTwvaVpjZTeFELp1VaeVWumRnaS536dTSdWyuO9z9ttgGMxsPXARMAAqBl8xsTLD6LuAUoAJ418zmuPvijgxYOq+m4nuXnLT7bibSYTLpf/9M4BF33+3uq4EVwDHBssLdV7l7LfBIsK1Iyrk7ZRURJqmLSzq5dE0m15tZqZn90cz6Bm1FwEcx21QEbS21H8DMrjKzuWY2t6qqKhVxSyfz4ZadVO+qV/FdOr1QkomZvWRm5c0sM4G7gcOBEmAdcHuyXtfd73X3ae4+raCgIFmHlU6sqfiuZCKdXSg1E3c/OZ7tzOx3wNPB00pgWMzqoUEbB2kXSamyioiK7yKkYTeXmQ2JeXo2UB48ngNcZGZ5ZjYSGA28A7wLjDazkWbWhWiRfk5HxiydV1llhHFDVHwXSceruX5uZiWAAx8AXwNw90Vm9ldgMVAPXOfuDQBmdj3wPJAN/NHdF4URuHQuTXe+zyguDDsUkdClXTJx98sPsu7HwI+baX8WeDaVcYnsb83mnWxT8V0ESMNuLpFMsaf4rsuCRZRMRNqrrDJClxwV30VAyUSk3coqIhw5uBe52fozEtFfgUg7NDY65ZW6812kiZKJSDus2bKTbbtVfBdpomQi0g5773zXhFgioGQi0i5lFVvpkpPF6EGa810ElExE2qWsMsKRQ3qr+C4S0F+CSBtFi+/VTFa9RGQPJRORNvpg8w62q/gusg8lE5E20p3vIgdSMhFpo7KKCHk5WYweqOK7SBMlE5E2aiq+56j4LrKH/hpE2qCx0Vm0tprJ6uIS2YeSiUgbrA6K7xNVfBfZh5KJSBuUVUSL7zozEdmXkolIG5RVRuiam8URBSq+i8RSMhFpAxXfRZqnvwiRODU2OosqI7rzXaQZSiYicVq1aQc7ahtUfBdpRtolEzP7i5ktDJYPzGxh0D7CzGpi1t0Ts89UMyszsxVmdqeZWXi/gRyqyiq3AjB5qIadF9lfTtgB7M/dL2x6bGa3A5GY1SvdvaSZ3e4GrgTeBp4FTgOeS2Wc0vmUVVTTNTeLwwt6hB2KSNpJuzOTJsHZxQXArFa2GwL0dve33N2BPwFf6IAQpZMpr4wwXsV3kWal81/FdGCDuy+PaRtpZgvM7DUzmx60FQEVMdtUBG0HMLOrzGyumc2tqqpKTdRySGpodMrXRtTFJdKCULq5zOwlYHAzq77r7rODxxez71nJOuAwd99sZlOBJ81sQlte193vBe4FmDZtmrc9cumsVm/azk4V30VaFEoycfeTD7bezHKAc4CpMfvsBnYHj+eZ2UpgDFAJDI3ZfWjQJpI0pbrzXeSg0rWb62Rgqbvv6b4yswIzyw4ejwJGA6vcfR1QbWbHBXWWLwKzmzuoSHuVVUbolpvN4brzXaRZaXc1V+AiDiy8nwjcamZ1QCNwtbtvCdZdC9wPdCN6FZeu5JKkKq+MML6wN9lZuupcpDlpmUzc/Ypm2h4DHmth+7nAxBSHJZ1UQzDn+4VHDws7FJG0la7dXCJpY1XVdmrqGjTnu8hBKJmItELFd5HWKZmItKKsMkL3LtmMUvFdpEVKJiKtKAvufFfxXaRlSiYiB9HQ6CxeW80kdXGJHFRcycTMvmFmvS3qD2Y238w+l+rgRMK2UsV3kbjEe2byFXevBj4H9AUuB36asqhE0oSK7yLxiTeZNHUWnwH82d0XxbSJHLLKg+L7yAEqvoscTLzJZJ6ZvUA0mTxvZr2I3oUuckgrrdjKxMJ8Fd9FWhFvMvkqcBNwtLvvBLoAX05ZVCJpoL6hkcXrqjVSsEgc4hpOxd0bzWwDMD4Y0VfkkLeyage76hqZNLR32KGIpL24EoOZ/Qy4EFgMNATNDryeorhEQldaEZ3zfVKRJsQSaU28ZxlfAMYGc4qIdArllRF6dMlm1ADN+S7SmnhrJquA3FQGIpJuSisjTCjKJ0vFd5FWxXtmshNYaGYvE8x2CODuN6QkKpGQ1Tc0smRdNZceOzzsUEQyQrzJZE6wiHQKK6q2R4vvupJLJC6tJpNgqtwr3P0zHRCPSFpouvNdY3KJxKfVmom7NwCNZqa/Kuk0yisj9MzLYWR/Fd9F4hFvN9d2oMzMXgR2NDWqZiKHqtKKCBMKe6v4LhKneK/mehz4PtH7SubFLO1mZueb2SIzazSzafutu9nMVpjZMjM7Nab9tKBthZndFNM+0szeDtr/YmZdEontYBobnfWRXak6vKSBuqD4rnqJSPzivQP+gRS8djlwDvDb2EYzGw9cBEwACoGXzGxMsPou4BSgAnjXzOa4+2LgZ8Ad7v6Imd1DdPiXu1MQM999spx/LK/imRumk99NV0sfipZv2M7u+kbVS0TaIN75TFab2ar9l0Re2N2XuPuyZlbNBB5x993uvhpYARwTLCvcfZW71wKPADPNzIDPAo8G+z9A9CbLlDh/2lDWR3Zx02OluHuqXkZCVF4ZFN91ZiISt3i7uaYBRwfLdOBO4MEUxVQEfBTzvCJoa6m9P7DV3ev3az+AmV1lZnPNbG5VVVW7gptyWF/+49SxPFe+ngffWtOuYxxKnlxQyQebdrS+YQYpC4rvI1R8F4lbXMnE3TfHLJXu/gvg863tZ2YvmVl5M8vMhCNvB3e/192nufu0goKCdh/nyumj+PTYAn749BIWrY0kMcLM8sg7H/LNvyzk1qcXhx1KUpVWRphYpOK7SFvE2801JWaZZmZXE0e9xd1PdveJzSyzD7JbJTAs5vnQoK2l9s1An5jRjJvaUyYry7j9/GL69sjl6w8vYPvu+tZ3OsSUVmzlljmLyMvJ4vX3q/h4R23YISWFiu8i7RNvN9ftMctPgCnABSmKaQ5wkZnlmdlIYDTwDvAuMDq4cqsL0SL9HI8WLv4OnBfs/yXgYMkqKfr3zOOXFx3FB5t38L0nyjpV/WTLjlqueXA+BT3z+OMVR1Pf6Dxbvi7ssJLi/Q3bqK1v1BwmIm0U9+RY7v6ZYDnF3a8CEvoqamZnm1kFcDzwjJk9DxBMCfxXosPd/w24zt0bgprI9cDzwBLgr8G2AP8J3GhmK4jWUP6QSGzxOm5Uf75x0hieXLiW/5tX0REvGbqGRueGWQuo2r6buy+bwicO788RA3sye+HasENLiqbi++ShGnZepC3ivWnxUaJnI/u3TW3vC7v7E8ATLaz7MfDjZtqfBZ5tpn0V0au9Otz1nz2Ct1Zt5pbZ5Rw1rA+jB/UKI4wOc/sLy3hjxSZ+du6kPR+4M4oLueOl91m7tYbCPt1CjjAxZZUReuXlMLxf97BDEckoBz0zMbNxZnYukG9m58QsVwBdOyTCNJedZfzyohJ6dMnhuofnU1Pb0PpOGer5Rev5zasrufiYYVx49GF72mcUF+IOT5dm/tlJWUWEiRp2XqTNWuvmGgucCfQBzopZpgBXpja0zDGwd1f+98IS3t+wnVufXtT6DhloVdV2vvXX9ygems8PZkzYZ92IAT0oHprPnPcyO5nU1jeyZP023awo0g4H7eYKrrqabWbHu/u/OiimjPSpMQVc8+nDufvVlRw3qj8zS5q91SUj7dhdz9f+PI8uOVn85rKp5OVkH7DNjJIifvj0YlZWbefwgp4hRJk4Fd9F2i/eAvxmM3vZzMoBzGyymX0vhXFlpBtPGcPU4X35zuNlrD5EbuRzd779WCkrq7bzq4uPoqiFmsiZk4dgBnMyuBC/p/iuZCLSZvEmk98BNwN1AO5eSvTSXImRm53FnRcfRU52Ftc/PJ/d9ZlfP/nDG6t5pnQd/37qWE44YkCL2w3q3ZXjR/VnzntrM/Yy6bLKCL265jC8v4rvIm0VbzLp7u7v7NfW+e7Ui0NRn27cdn4xi9ZW85Nnl4YdTkLeWrWZnzy3lFMnDOKaTx3e6vYzigtZvWkH5ZXVHRBd8pVVRphUlE90uDcRaYt4k8kmMzsccAAzOw84NO5SS4FTxg/iyyeM4P5/fsDzi9aHHU67rI/s4vqH5zO8X3duO784rg/Y0ycOITfbmL0wpQMQpERtfSNL123Tne8i7RRvMrmO6FDx48ysEvgmcHXKojoE3HT6OCYV5fMf//ceFR/vDDucNqmtb+Tah+axs7aB314+lV5d4xtqP797Lp8aM5CnStfS0JhZXV3vb9hGbYOK7yLtFe9Aj6vc/WSgABgHfAr4ZCoDy3R5Odn8+pKjaHT4+qwF1DU0hh1S3H70zGLmf7iV/3decZtvwpxZUsiG6t28s3pLiqJLjbI9d74rmYi0R2s3LfYOZj38tZmdAuwkOvbVClI3NtchY3j/Hvz03Eks+HArt73Q3NQt6efx+RX86V9ruHL6SD4/eUib9z/5yEF075KdcfeclFVG6N01h8N057tIu7R2ZvJnojculhG9SfHvwPnA2e4eyjDymebMyYVccuxh/Pa1Vby6bGPY4RzU4rXVfOeJMo4d2Y//PG1cu47RrUs2nxs/iGfL1lFbnzlnY2UVESYNVfFdpL1aSyaj3P0Kd/8tcDEwHjjV3RemPrRDxy1njmfc4F7c+Nf32FCdnvPHR3bWcfWD8+jTrQu/vmQKOdnxltMONKOkkEhNHf9Y3r4JyDra7voGlq6vVr1EJAGtfWLUNT1w9wagwt3T89MwjXXNjdZPamobuGHWgrQrTjc2Ot/8ywLWRWq469IpFPTKS+h400cX0Ld7bsaMJPz++u3UNbiu5BJJQGvJpNjMqoNlGzC56bGZZebNBCE5YmAvfviFiby9egt3vrw87HD2cecry/n7sipuOXM8U4f3Tfh4udlZnD5pCC8u3sDO2vS/HWlP8b1Iw86LtNdBk4m7Z7t772Dp5e45MY97d1SQh4rzpg7lnClF3PnKcv65clPY4QDw96Ub+eXLyzlnShGXHTc8acedWVxITV0DLy7ekLRjpkpZ5Vbyu+UyrF9mD58vEqb2d4xLu/xw5kRGDujBNx9ZyKbtu0ON5cPNO/nGIws4cnBv/ufsSUktPh89oh9D8rvyVAZc1aU730USp2TSwXrk5XDXJVPYWlPHjX99j8aQ6ic1tQ187cF5mBn3XDaVrrkHjgSciKws46ziQl57v4qtO9N3fvjd9Q0sW79NxXeRBCmZhODIIb255czxvP5+Fb99fVWHv767890nyli6vppfXFTCYSka2HBGcSF1Dc5z5ek7pMyy9dtUfBdJAiWTkFx67GF8ftIQbnthGfPWdOzd4g++tYbHF1TyjZNG85mxA1P2OhMKezOqoEdaj9WlO99FkkPJJCRmxk/OnURhn658/eEFHdYVNG/Nx9z69GI+O24gN3x2dEpfy8yYWVzE26u3sD6SnleUl1VEyO+Wy9C+Kr6LJCKUZGJm55vZIjNrNLNpMe2nmNk8MysLfn42Zt2rZrbMzBYGy8CgPc/M/mJmK8zsbTMb0fG/Ufv07prLry+eQtX23fzHo6Upnwekatturn1oHkPyu3HHBSUdMs/5jJL0nh++rDLCZN35LpKwsM5MyoFzgNf3a98EnOXuk4iOAfbn/dZf6u4lwdI0NslXgY/d/QjgDuBnKYw76YqH9eE/TxvHi4s3cP8/P0jZ69Q3NHL9w/OJ1NRxz2VTye8e30jAiRo5oAeT03R++F11Dby/QcV3kWQIJZm4+xJ3P2DkQ3df4O5NnzqLgG5m1trt2DOBB4LHjwInWYZ9zfzqJ0dy0riB/M+zSyit2JqS1/jpc0t5e/UWfnLOJMYXduwtQjOKCymtiKTdVMYqvoskTzrXTM4F5rt77M0Y9wVdXN+PSRhFwEcA7l4PRID+zR3QzK4ys7lmNreqKn3GjTIzbju/mAE987j+4QVU76prfac2eLp0Lb9/YzVfOn44Zx81NKnHjseZkwvTcn74puK7kolI4lKWTMzsJTMrb2ZpdbRhM5tAtLvqazHNlwbdX9OD5fK2xuTu97r7NHefVlBQ0NbdU6pvjy7cefFRVG6t4TuPlyWtfvL+hm18+9FSpg7vy3c/Pz4px2yrwfldOXZkP2a/V5lW88OXVUTo013Fd5FkSFkycfeT3X1iM8vsg+1nZkOBJ4AvuvvKmONVBj+3AQ8DxwSrKoFhwb45QD6wOfm/UeodPaIfN54yhqdL1/HIux8lfLzqXXVc/ed5dO+Sw28unUKXnPBORGeWFLGqageL1qbPkG66810kedKqm8vM+gDPADe5+5sx7TlmNiB4nAucSbSIDzCHaLEe4DzgFU+nr79tdM2nDmf66AH8YM4ilq5v/wevu/Pvf32PNVt2ctclRzGod9ckRtl2p08cTG62pU0hvqn4ri4ukeQI69Lgs82sAjgeeMbMng9WXQ8cAdyy3yXAecDzZlYKLCR6NvK7YJ8/AP3NbAVwI3BTR/4uyZaVZfzvBSX06prLdQ/Nb/eou3e/tpIXFm/g5tPHceyoZktIHapP9y58akwBT723NrQhZGItXb+N+kYV30WSJayruZ5w96Hunufug9z91KD9R+7eI+by3xJ33+juO9x9qrtPdvcJ7v6NYH4V3H2Xu5/v7ke4+zHu3vHjkyRZQa88fnlRCas27eCW2YvavP8byzdx2/PLOHPyEL76yZEpiLB9ziouZF1kF+9+EP788HuK77rzXSQp0qqbS/Y64YgBfP0zR/DovAoen18R936VW2u44ZEFHDGwJz87d3Ja1QNOGT+IbrnpMT98WcVW+nbPpaiPiu8iyaBkksZuOGk0x4zox/eeLGdl1fZWt99V18A1D86jrr6Rey6bSo+8nA6IMn7du+RwSjA/fF1DuPPDl1VWM2lon7RKtiKZTMkkjeVkZ/HLi0vIy8niunnjcIYAAA9tSURBVIfms6uu4aDb//dTiyitiHD7BcWMKujZQVG2zcySQj7eWccby8ObHGxv8V3zu4kki5JJmhuS343bLyhm6fpt/OiZxS1u95d3P2TWOx9x7acP53MTBndghG0zfXQB+d1yQx1JeMm6ahpUfBdJKiWTDPDZcYO46sRRPPjWhzxTuu6A9aUVW/n+7EVMHz2Ab31ubAgRxq9LThZnTBrCC4s3UFN78DOtVCnfU3zXnO8iyaJkkiH+/XNjKRnWh5seK+XDzTv3tG/ZUcs1D86noGcev7zoKLI7YCTgRM0oLmRnbQMvLQlnfvjSigj9enShMD/ce29EDiVKJhmiS04Wv7r4KDD4+qz51NY30tDo3DBrAVXbd3P3ZVPo16NL2GHG5ZiR/RjcuyuzQxqrS3e+iySfkkkGGdavOz8/dzLvVUT4+d+WcvsLy3hjxSZ+OHMCkzOoyyY7yzhz8hBee38jkZ3JHdSyNbvqGli+cbvqJSJJpmSSYU6fNIQvHj+c37+xmt+8upKLjxnGhUcfFnZYbTazpCiYH/7AGlAqLW4qvutmRZGkUjLJQN8540hKhvVh6vC+/NdZE8IOp10mFvVm5IAeHX4DY7mGnRdJifS6q03i0jU3m8eu+QQGHTL1biqYGTOKC7nzleVsqN7VYQNRllZE6N+jC0NUfBdJKp2ZZKjsLMvYRNJk7/zwHdfVVV4ZYZLmfBdJOiUTCc3hBT2ZWNSbOR10A2NNrYadF0kVJRMJ1YziQt6riPBBB8wPv3hdNY2ueolIKiiZSKjOKg7mh++AQny5hp0XSRklEwnVkPxuHD2iH7MXpn5++NKKCAN6dmFwyLNOihyKlEwkdDNLCllZtYPF61I7P3y57nwXSRklEwndGROHkJOV2vnhd9bWs3yjiu8iqaJkIqHr26MLJ44p4KmFqZsffklT8T2Dhp0RySShJBMzO9/MFplZo5lNi2kfYWY1ZrYwWO6JWTfVzMrMbIWZ3WlBX4WZ9TOzF81sefCzbxi/kyRmRnEhayO7mPfhxyk5flmF7nwXSaWwzkzKgXOA15tZt9LdS4Ll6pj2u4ErgdHBclrQfhPwsruPBl4OnkuGOWX8ILrmZqVs0qzSyggDeuYxqHdeSo4v0tmFkkzcfYm7L4t3ezMbAvR297c8esnPn4AvBKtnAg8Ejx+IaZcM0iMvh5OPHMSzZetTMj98eWWEybrzXSRl0rFmMtLMFpjZa2Y2PWgrAipitqkI2gAGuXvTeBzrgUEdFKck2cySIrbsqOWNFcmdH35nbT0rNm5norq4RFImZQM9mtlLQHOTkX/X3We3sNs64DB332xmU4EnzSzuYXHd3c2sxQqumV0FXAVw2GGZN2z7oe7EMQPo3TWHpxau5TNjBybtuIvXRovvk5VMRFImZcnE3U9uxz67gd3B43lmthIYA1QCQ2M2HRq0AWwwsyHuvi7oDtt4kOPfC9wLMG3atNTeISdtlpeTzRmThvDUe2upqW2gW5fspBy3tEJ3voukWlp1c5lZgZllB49HES20rwq6sarN7LjgKq4vAk1nN3OALwWPvxTTLhloRnEhO2obeGVpi98J2qy8MkJBr7wOG+ZepDMK69Lgs82sAjgeeMbMng9WnQiUmtlC4FHganffEqy7Fvg9sAJYCTwXtP8UOMXMlgMnB88lQx07qj8De+Ul9aqussqIurhEUiyUybHc/QngiWbaHwMea2GfucDEZto3AyclO0YJR3R++EIefGsNkZo68rvlJnS8HbvrWVG1nTMmDUlShCLSnLTq5hKB6FhdtQ2NPF++PuFjLV5XjTtMVr1EJKWUTCTtTB6az/D+3ZMyVlep7nwX6RBKJpJ2zIyZxYX8c+UmNm7bldCxyisjDOyVx0AV30VSSslE0tKMkkIaHZ5JcH74suDOdxFJLSUTSUtHDOzF+CG9mb2w/V1d23fXs7JKd76LdAQlE0lbM0oKWfjRVj7cvLNd+y9eq+K7SEdRMpG0dVZxIQBz3mvfPSelFVsBdGYi0gGUTCRtFfXpxtEj+jJ74dp2zQ9fXhlhUO88BvZS8V0k1ZRMJK3NKCli+cbtLF2/rc37llVGmFSkmRVFOoKSiaS1MyYOJrsd88Nv313Pqk07dH+JSAdRMpG01r9nHtNHD2BOG7u6FlVGVHwX6UBKJpL2ZhQXUrm1hvltmB++rDJ657uK7yIdQ8lE0t7nJgwmLyerTfeclFVGGNy7KwW9NOe7SEdQMpG01zOYH/6Z0nXUxzk/fFllRJNhiXQgJRPJCDNKCtm8o5Y3V25uddttu+pYVaXiu0hHUjKRjPDpsQX06prDnDi6uhatrQY0Ta9IR1IykYyQl5PN6RMH8/yi9eyqazjotmUadl6kwymZSMaYUVzE9t31/L2V+eHLKiMMye/KgJ4qvot0FCUTyRjHH96fAT3zWr2qK3rnu85KRDqSkolkjOj88EN4ZdlGqnfVNbtN9a46VuvOd5EOF0oyMbPzzWyRmTWa2bSY9kvNbGHM0mhmJcG6V81sWcy6gUF7npn9xcxWmNnbZjYijN9JOsbMkkJq61ueH35RpYrvImEI68ykHDgHeD220d0fcvcSdy8BLgdWu/vCmE0ubVrv7k0d518FPnb3I4A7gJ91QPwSkpJhfTisX8vzw5dVRoed15mJSMcKJZm4+xJ3X9bKZhcDj8RxuJnAA8HjR4GTzMwSiU/Sl5kxo7iQN1dsomrb7gPWl1VWU5jflf4qvot0qHSumVwIzNqv7b6gi+v7MQmjCPgIwN3rgQjQv7kDmtlVZjbXzOZWVVWlKm5Jsb3zwx94dlJWsVVdXCIhSFkyMbOXzKy8mWVmHPseC+x09/KY5kvdfRIwPVgub2tM7n6vu09z92kFBQVt3V3SxJhBvRg3uNcBXV3Vu+r4YPNOdXGJhCAnVQd295MT2P0i9jsrcffK4Oc2M3sYOAb4E1AJDAMqzCwHyAdaH3NDMtqMkkJ+/rdlfLRlJ8P6dQeiMysCTBqqCbFEOlradXOZWRZwATH1EjPLMbMBweNc4EyiRXyAOcCXgsfnAa94e+Z4lYxy1uSm+eH3np3ozneR8IR1afDZZlYBHA88Y2bPx6w+EfjI3VfFtOUBz5tZKbCQ6NnI74J1fwD6m9kK4EbgppT/AhK6Yf26M3V4333G6iqrjFDUpxv9enQJMTKRzill3VwH4+5PAE+0sO5V4Lj92nYAU1vYfhdwfpJDlAwws6SQW2YvYun6asYN7q0730VClHbdXCLxOmPSkOj88AvXEqmpY83mnbqSSyQkSiaSsQb0zOOEIwYw5721e4vvOjMRCYWSiWS0GcWFVHxcw5/+9QGgZCISFiUTyWinThhEl5wsnl+0gaI+3eir4rtIKJRMJKP16prLSeMGAjBZ9RKR0CiZSMabWRK952SiurhEQqNkIhnvs+MG8bUTR3H2UUVhhyLSaYVyn4lIMnXJyeLmM44MOwyRTk1nJiIikjAlExERSZiSiYiIJEzJREREEqZkIiIiCVMyERGRhCmZiIhIwpRMREQkYdZZZ7g1sypgTTt3HwBsSmI4mU7vx156L/al92Nfh8L7MdzdC/Zv7LTJJBFmNtfdp4UdR7rQ+7GX3ot96f3Y16H8fqibS0REEqZkIiIiCVMyaZ97ww4gzej92Evvxb70fuzrkH0/VDMREZGE6cxEREQSpmQiIiIJUzJpIzM7zcyWmdkKM7sp7HjCYmbDzOzvZrbYzBaZ2TfCjikdmFm2mS0ws6fDjiVsZtbHzB41s6VmtsTMjg87prCY2b8FfyflZjbLzLqGHVOyKZm0gZllA3cBpwPjgYvNbHy4UYWmHviWu48HjgOu68TvRaxvAEvCDiJN/BL4m7uPA4rppO+LmRUBNwDT3H0ikA1cFG5Uyadk0jbHACvcfZW71wKPADNDjikU7r7O3ecHj7cR/aDo1JOwm9lQ4PPA78OOJWxmlg+cCPwBwN1r3X1ruFGFKgfoZmY5QHdgbcjxJJ2SSdsUAR/FPK+gk3+AApjZCOAo4O1wIwndL4BvA41hB5IGRgJVwH1Bt9/vzaxH2EGFwd0rgduAD4F1QMTdXwg3quRTMpGEmFlP4DHgm+5eHXY8YTGzM4GN7j4v7FjSRA4wBbjb3Y8CdgCdssZoZn2J9mCMBAqBHmZ2WbhRJZ+SSdtUAsNing8N2jolM8slmkgecvfHw44nZCcAM8zsA6Ldn581swfDDSlUFUCFuzedrT5KNLl0RicDq929yt3rgMeBT4QcU9IpmbTNu8BoMxtpZl2IFtHmhBxTKMzMiPaHL3H3/w07nrC5+83uPtTdRxD9f/GKux9y3z7j5e7rgY/MbGzQdBKwOMSQwvQhcJyZdQ/+bk7iELwYISfsADKJu9eb2fXA80SvyPijuy8KOaywnABcDpSZ2cKg7Tvu/myIMUl6+TrwUPDFaxXw5ZDjCYW7v21mjwLziV4FuYBDcFgVDaciIiIJUzeXiIgkTMlEREQSpmQiIiIJUzIREZGEKZmIiEjClExEEmBmDWa2MGY56F3eZna1mX0xCa/7gZkNSPQ4IsmiS4NFEmBm2929Zwiv+wHRUWg3dfRrizRHZyYiKRCcOfzczMrM7B0zOyJo/4GZ/Xvw+IZgPphSM3skaOtnZk8GbW+Z2eSgvb+ZvRDMifF7wGJe67LgNRaa2W+DOVWyzez+YP6MMjP7txDeBulElExEEtNtv26uC2PWRdx9EvBroiMK7+8m4Ch3nwxcHbT9N7AgaPsO8Keg/b+AN9x9AvAEcBiAmR0JXAic4O4lQANwKVACFLn7xCCG+5L4O4scQMOpiCSmJvgQb86smJ93NLO+lOhwI08CTwZtnwTOBXD3V4Izkt5E5wY5J2h/xsw+DrY/CZgKvBsd9oluwEbgKWCUmf0KeAY45IY8l/SiMxOR1PEWHjf5PNGZO6cQTQbt+XJnwAPuXhIsY939B+7+MdHZDV8letbT6SfsktRSMhFJnQtjfv4rdoWZZQHD3P3vwH8C+UBP4B9Eu6kws08Dm4J5Yl4HLgnaTwf6Bod6GTjPzAYG6/qZ2fDgSq8sd38M+B6dd/h36SDq5hJJTLeYUZMhOud50+XBfc2sFNgNXLzfftnAg8H0tgbc6e5bzewHwB+D/XYCXwq2/29glpktAv5JdFhz3H2xmX0PeCFIUHXAdUAN0VkOm74w3py8X1nkQLo0WCQFdOmudDbq5hIRkYTpzERERBKmMxMREUmYkomIiCRMyURERBKmZCIiIglTMhERkYT9//QR9OysykgbAAAAAElFTkSuQmCC\n",
537 |       "text/plain": [
538 |        "<Figure size 432x288 with 1 Axes>"
539 |       ]
540 |      },
541 |      "metadata": {
542 |       "needs_background": "light"
543 |      },
544 |      "output_type": "display_data"
545 |     }
546 |    ],
547 |    "source": [
548 |     "buffer_size = 100000\n",
549 |     "n_sequence = 50\n",
550 |     "elite_ratio = 0.2\n",
551 |     "plan_horizon = 25\n",
552 |     "num_episodes = 10\n",
553 |     "env_name = 'Pendulum-v0'\n",
554 |     "env = gym.make(env_name)\n",
555 |     "\n",
556 |     "replay_buffer = ReplayBuffer(buffer_size)\n",
557 |     "pets = PETS(env, replay_buffer, n_sequence, elite_ratio, plan_horizon,\n",
558 |     "            num_episodes)\n",
559 |     "return_list = pets.train()\n",
560 |     "\n",
561 |     "episodes_list = list(range(len(return_list)))\n",
562 |     "plt.plot(episodes_list, return_list)\n",
563 |     "plt.xlabel('Episodes')\n",
564 |     "plt.ylabel('Returns')\n",
565 |     "plt.title('PETS on {}'.format(env_name))\n",
566 |     "plt.show()\n",
567 |     "\n",
568 |     "# episode: 1, return: -1062\n",
569 |     "# episode: 2, return: -1257\n",
570 |     "# episode: 3, return: -1792\n",
571 |     "# episode: 4, return: -1225\n",
572 |     "# episode: 5, return: -248\n",
573 |     "# episode: 6, return: -124\n",
574 |     "# episode: 7, return: -249\n",
575 |     "# episode: 8, return: -269\n",
576 |     "# episode: 9, return: -245\n",
577 |     "# episode: 10, return: -119"
578 |    ]
579 |   }
580 |  ],
581 |  "metadata": {
582 |   "colab": {
583 |    "collapsed_sections": [],
584 |    "name": "第16章-模型预测控制.ipynb",
585 |    "provenance": []
586 |   },
587 |   "kernelspec": {
588 |    "display_name": "Python 3",
589 |    "language": "python",
590 |    "name": "python3"
591 |   },
592 |   "language_info": {
593 |    "codemirror_mode": {
594 |     "name": "ipython",
595 |     "version": 3
596 |    },
597 |    "file_extension": ".py",
598 |    "mimetype": "text/x-python",
599 |    "name": "python",
600 |    "nbconvert_exporter": "python",
601 |    "pygments_lexer": "ipython3",
602 |    "version": "3.7.6"
603 |   }
604 |  },
605 |  "nbformat": 4,
606 |  "nbformat_minor": 1
607 | }
608 | 


--------------------------------------------------------------------------------
/第17章-基于模型的策略优化.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {
  7 |     "executionInfo": {
  8 |      "elapsed": 17,
  9 |      "status": "ok",
 10 |      "timestamp": 1649957428444,
 11 |      "user": {
 12 |       "displayName": "Sam Lu",
 13 |       "userId": "15789059763790170725"
 14 |      },
 15 |      "user_tz": -480
 16 |     },
 17 |     "id": "WGYnB9z5GEne"
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import gym\n",
 22 |     "from collections import namedtuple\n",
 23 |     "import itertools\n",
 24 |     "from itertools import count\n",
 25 |     "import torch\n",
 26 |     "import torch.nn as nn\n",
 27 |     "import torch.nn.functional as F\n",
 28 |     "from torch.distributions.normal import Normal\n",
 29 |     "import numpy as np\n",
 30 |     "import collections\n",
 31 |     "import random\n",
 32 |     "import matplotlib.pyplot as plt"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 7,
 38 |    "metadata": {
 39 |     "executionInfo": {
 40 |      "elapsed": 17,
 41 |      "status": "ok",
 42 |      "timestamp": 1649957428445,
 43 |      "user": {
 44 |       "displayName": "Sam Lu",
 45 |       "userId": "15789059763790170725"
 46 |      },
 47 |      "user_tz": -480
 48 |     },
 49 |     "id": "z8M3b0CiGEnj"
 50 |    },
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "class PolicyNet(torch.nn.Module):\n",
 54 |     "    def __init__(self, state_dim, hidden_dim, action_dim, action_bound):\n",
 55 |     "        super(PolicyNet, self).__init__()\n",
 56 |     "        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)\n",
 57 |     "        self.fc_mu = torch.nn.Linear(hidden_dim, action_dim)\n",
 58 |     "        self.fc_std = torch.nn.Linear(hidden_dim, action_dim)\n",
 59 |     "        self.action_bound = action_bound\n",
 60 |     "\n",
 61 |     "    def forward(self, x):\n",
 62 |     "        x = F.relu(self.fc1(x))\n",
 63 |     "        mu = self.fc_mu(x)\n",
 64 |     "        std = F.softplus(self.fc_std(x))\n",
 65 |     "        dist = Normal(mu, std)\n",
 66 |     "        normal_sample = dist.rsample()  # rsample()是重参数化采样函数\n",
 67 |     "        log_prob = dist.log_prob(normal_sample)\n",
 68 |     "        action = torch.tanh(normal_sample)  # 计算tanh_normal分布的对数概率密度\n",
 69 |     "        log_prob = log_prob - torch.log(1 - torch.tanh(action).pow(2) + 1e-7)\n",
 70 |     "        action = action * self.action_bound\n",
 71 |     "        return action, log_prob\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "class QValueNet(torch.nn.Module):\n",
 75 |     "    def __init__(self, state_dim, hidden_dim, action_dim):\n",
 76 |     "        super(QValueNet, self).__init__()\n",
 77 |     "        self.fc1 = torch.nn.Linear(state_dim + action_dim, hidden_dim)\n",
 78 |     "        self.fc2 = torch.nn.Linear(hidden_dim, 1)\n",
 79 |     "\n",
 80 |     "    def forward(self, x, a):\n",
 81 |     "        cat = torch.cat([x, a], dim=1)  # 拼接状态和动作\n",
 82 |     "        x = F.relu(self.fc1(cat))\n",
 83 |     "        return self.fc2(x)\n",
 84 |     "\n",
 85 |     "\n",
 86 |     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\n",
 87 |     "    \"cpu\")\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "class SAC:\n",
 91 |     "    ''' 处理连续动作的SAC算法 '''\n",
 92 |     "    def __init__(self, state_dim, hidden_dim, action_dim, action_bound,\n",
 93 |     "                 actor_lr, critic_lr, alpha_lr, target_entropy, tau, gamma):\n",
 94 |     "        self.actor = PolicyNet(state_dim, hidden_dim, action_dim,\n",
 95 |     "                               action_bound).to(device)  # 策略网络\n",
 96 |     "        # 第一个Q网络\n",
 97 |     "        self.critic_1 = QValueNet(state_dim, hidden_dim, action_dim).to(device)\n",
 98 |     "        # 第二个Q网络\n",
 99 |     "        self.critic_2 = QValueNet(state_dim, hidden_dim, action_dim).to(device)\n",
100 |     "        self.target_critic_1 = QValueNet(state_dim, hidden_dim,\n",
101 |     "                                         action_dim).to(device)  # 第一个目标Q网络\n",
102 |     "        self.target_critic_2 = QValueNet(state_dim, hidden_dim,\n",
103 |     "                                         action_dim).to(device)  # 第二个目标Q网络\n",
104 |     "        # 令目标Q网络的初始参数和Q网络一样\n",
105 |     "        self.target_critic_1.load_state_dict(self.critic_1.state_dict())\n",
106 |     "        self.target_critic_2.load_state_dict(self.critic_2.state_dict())\n",
107 |     "        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),\n",
108 |     "                                                lr=actor_lr)\n",
109 |     "        self.critic_1_optimizer = torch.optim.Adam(self.critic_1.parameters(),\n",
110 |     "                                                   lr=critic_lr)\n",
111 |     "        self.critic_2_optimizer = torch.optim.Adam(self.critic_2.parameters(),\n",
112 |     "                                                   lr=critic_lr)\n",
113 |     "        # 使用alpha的log值,可以使训练结果比较稳定\n",
114 |     "        self.log_alpha = torch.tensor(np.log(0.01), dtype=torch.float)\n",
115 |     "        self.log_alpha.requires_grad = True  # 可以对alpha求梯度\n",
116 |     "        self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha],\n",
117 |     "                                                    lr=alpha_lr)\n",
118 |     "        self.target_entropy = target_entropy  # 目标熵的大小\n",
119 |     "        self.gamma = gamma\n",
120 |     "        self.tau = tau\n",
121 |     "\n",
122 |     "    def take_action(self, state):\n",
123 |     "        state = torch.tensor([state], dtype=torch.float).to(device)\n",
124 |     "        action = self.actor(state)[0]\n",
125 |     "        return [action.item()]\n",
126 |     "\n",
127 |     "    def calc_target(self, rewards, next_states, dones):  # 计算目标Q值\n",
128 |     "        next_actions, log_prob = self.actor(next_states)\n",
129 |     "        entropy = -log_prob\n",
130 |     "        q1_value = self.target_critic_1(next_states, next_actions)\n",
131 |     "        q2_value = self.target_critic_2(next_states, next_actions)\n",
132 |     "        next_value = torch.min(q1_value,\n",
133 |     "                               q2_value) + self.log_alpha.exp() * entropy\n",
134 |     "        td_target = rewards + self.gamma * next_value * (1 - dones)\n",
135 |     "        return td_target\n",
136 |     "\n",
137 |     "    def soft_update(self, net, target_net):\n",
138 |     "        for param_target, param in zip(target_net.parameters(),\n",
139 |     "                                       net.parameters()):\n",
140 |     "            param_target.data.copy_(param_target.data * (1.0 - self.tau) +\n",
141 |     "                                    param.data * self.tau)\n",
142 |     "\n",
143 |     "    def update(self, transition_dict):\n",
144 |     "        states = torch.tensor(transition_dict['states'],\n",
145 |     "                              dtype=torch.float).to(device)\n",
146 |     "        actions = torch.tensor(transition_dict['actions'],\n",
147 |     "                               dtype=torch.float).view(-1, 1).to(device)\n",
148 |     "        rewards = torch.tensor(transition_dict['rewards'],\n",
149 |     "                               dtype=torch.float).view(-1, 1).to(device)\n",
150 |     "        next_states = torch.tensor(transition_dict['next_states'],\n",
151 |     "                                   dtype=torch.float).to(device)\n",
152 |     "        dones = torch.tensor(transition_dict['dones'],\n",
153 |     "                             dtype=torch.float).view(-1, 1).to(device)\n",
154 |     "        rewards = (rewards + 8.0) / 8.0  # 对倒立摆环境的奖励进行重塑\n",
155 |     "\n",
156 |     "        # 更新两个Q网络\n",
157 |     "        td_target = self.calc_target(rewards, next_states, dones)\n",
158 |     "        critic_1_loss = torch.mean(\n",
159 |     "            F.mse_loss(self.critic_1(states, actions), td_target.detach()))\n",
160 |     "        critic_2_loss = torch.mean(\n",
161 |     "            F.mse_loss(self.critic_2(states, actions), td_target.detach()))\n",
162 |     "        self.critic_1_optimizer.zero_grad()\n",
163 |     "        critic_1_loss.backward()\n",
164 |     "        self.critic_1_optimizer.step()\n",
165 |     "        self.critic_2_optimizer.zero_grad()\n",
166 |     "        critic_2_loss.backward()\n",
167 |     "        self.critic_2_optimizer.step()\n",
168 |     "\n",
169 |     "        # 更新策略网络\n",
170 |     "        new_actions, log_prob = self.actor(states)\n",
171 |     "        entropy = -log_prob\n",
172 |     "        q1_value = self.critic_1(states, new_actions)\n",
173 |     "        q2_value = self.critic_2(states, new_actions)\n",
174 |     "        actor_loss = torch.mean(-self.log_alpha.exp() * entropy -\n",
175 |     "                                torch.min(q1_value, q2_value))\n",
176 |     "        self.actor_optimizer.zero_grad()\n",
177 |     "        actor_loss.backward()\n",
178 |     "        self.actor_optimizer.step()\n",
179 |     "\n",
180 |     "        # 更新alpha值\n",
181 |     "        alpha_loss = torch.mean(\n",
182 |     "            (entropy - target_entropy).detach() * self.log_alpha.exp())\n",
183 |     "        self.log_alpha_optimizer.zero_grad()\n",
184 |     "        alpha_loss.backward()\n",
185 |     "        self.log_alpha_optimizer.step()\n",
186 |     "\n",
187 |     "        self.soft_update(self.critic_1, self.target_critic_1)\n",
188 |     "        self.soft_update(self.critic_2, self.target_critic_2)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 8,
194 |    "metadata": {
195 |     "executionInfo": {
196 |      "elapsed": 17,
197 |      "status": "ok",
198 |      "timestamp": 1649957428446,
199 |      "user": {
200 |       "displayName": "Sam Lu",
201 |       "userId": "15789059763790170725"
202 |      },
203 |      "user_tz": -480
204 |     },
205 |     "id": "xfK4N1doGEnl"
206 |    },
207 |    "outputs": [],
208 |    "source": [
209 |     "class Swish(nn.Module):\n",
210 |     "    ''' Swish激活函数 '''\n",
211 |     "    def __init__(self):\n",
212 |     "        super(Swish, self).__init__()\n",
213 |     "\n",
214 |     "    def forward(self, x):\n",
215 |     "        return x * torch.sigmoid(x)\n",
216 |     "\n",
217 |     "\n",
218 |     "def init_weights(m):\n",
219 |     "    ''' 初始化模型权重 '''\n",
220 |     "    def truncated_normal_init(t, mean=0.0, std=0.01):\n",
221 |     "        torch.nn.init.normal_(t, mean=mean, std=std)\n",
222 |     "        while True:\n",
223 |     "            cond = (t < mean - 2 * std) | (t > mean + 2 * std)\n",
224 |     "            if not torch.sum(cond):\n",
225 |     "                break\n",
226 |     "            t = torch.where(\n",
227 |     "                cond,\n",
228 |     "                torch.nn.init.normal_(torch.ones(t.shape, device=device),\n",
229 |     "                                      mean=mean,\n",
230 |     "                                      std=std), t)\n",
231 |     "        return t\n",
232 |     "\n",
233 |     "    if type(m) == nn.Linear or isinstance(m, FCLayer):\n",
234 |     "        truncated_normal_init(m.weight, std=1 / (2 * np.sqrt(m._input_dim)))\n",
235 |     "        m.bias.data.fill_(0.0)\n",
236 |     "\n",
237 |     "\n",
238 |     "class FCLayer(nn.Module):\n",
239 |     "    ''' 集成之后的全连接层 '''\n",
240 |     "    def __init__(self, input_dim, output_dim, ensemble_size, activation):\n",
241 |     "        super(FCLayer, self).__init__()\n",
242 |     "        self._input_dim, self._output_dim = input_dim, output_dim\n",
243 |     "        self.weight = nn.Parameter(\n",
244 |     "            torch.Tensor(ensemble_size, input_dim, output_dim).to(device))\n",
245 |     "        self._activation = activation\n",
246 |     "        self.bias = nn.Parameter(\n",
247 |     "            torch.Tensor(ensemble_size, output_dim).to(device))\n",
248 |     "\n",
249 |     "    def forward(self, x):\n",
250 |     "        return self._activation(\n",
251 |     "            torch.add(torch.bmm(x, self.weight), self.bias[:, None, :]))"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 10,
257 |    "metadata": {
258 |     "executionInfo": {
259 |      "elapsed": 779,
260 |      "status": "ok",
261 |      "timestamp": 1649957441286,
262 |      "user": {
263 |       "displayName": "Sam Lu",
264 |       "userId": "15789059763790170725"
265 |      },
266 |      "user_tz": -480
267 |     },
268 |     "id": "o8OfdjXJGEnm"
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "class EnsembleModel(nn.Module):\n",
273 |     "    ''' 环境模型集成 '''\n",
274 |     "    def __init__(self,\n",
275 |     "                 state_dim,\n",
276 |     "                 action_dim,\n",
277 |     "                 model_alpha,\n",
278 |     "                 ensemble_size=5,\n",
279 |     "                 learning_rate=1e-3):\n",
280 |     "        super(EnsembleModel, self).__init__()\n",
281 |     "        # 输出包括均值和方差,因此是状态与奖励维度之和的两倍\n",
282 |     "        self._output_dim = (state_dim + 1) * 2\n",
283 |     "        self._model_alpha = model_alpha  # 模型损失函数中加权时的权重\n",
284 |     "        self._max_logvar = nn.Parameter((torch.ones(\n",
285 |     "            (1, self._output_dim // 2)).float() / 2).to(device),\n",
286 |     "                                        requires_grad=False)\n",
287 |     "        self._min_logvar = nn.Parameter((-torch.ones(\n",
288 |     "            (1, self._output_dim // 2)).float() * 10).to(device),\n",
289 |     "                                        requires_grad=False)\n",
290 |     "\n",
291 |     "        self.layer1 = FCLayer(state_dim + action_dim, 200, ensemble_size,\n",
292 |     "                              Swish())\n",
293 |     "        self.layer2 = FCLayer(200, 200, ensemble_size, Swish())\n",
294 |     "        self.layer3 = FCLayer(200, 200, ensemble_size, Swish())\n",
295 |     "        self.layer4 = FCLayer(200, 200, ensemble_size, Swish())\n",
296 |     "        self.layer5 = FCLayer(200, self._output_dim, ensemble_size,\n",
297 |     "                              nn.Identity())\n",
298 |     "        self.apply(init_weights)  # 初始化环境模型中的参数\n",
299 |     "        self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)\n",
300 |     "\n",
301 |     "    def forward(self, x, return_log_var=False):\n",
302 |     "        ret = self.layer5(self.layer4(self.layer3(self.layer2(\n",
303 |     "            self.layer1(x)))))\n",
304 |     "        mean = ret[:, :, :self._output_dim // 2]\n",
305 |     "        # 在PETS算法中,将方差控制在最小值和最大值之间\n",
306 |     "        logvar = self._max_logvar - F.softplus(\n",
307 |     "            self._max_logvar - ret[:, :, self._output_dim // 2:])\n",
308 |     "        logvar = self._min_logvar + F.softplus(logvar - self._min_logvar)\n",
309 |     "        return mean, logvar if return_log_var else torch.exp(logvar)\n",
310 |     "\n",
311 |     "    def loss(self, mean, logvar, labels, use_var_loss=True):\n",
312 |     "        inverse_var = torch.exp(-logvar)\n",
313 |     "        if use_var_loss:\n",
314 |     "            mse_loss = torch.mean(torch.mean(torch.pow(mean - labels, 2) *\n",
315 |     "                                             inverse_var,\n",
316 |     "                                             dim=-1),\n",
317 |     "                                  dim=-1)\n",
318 |     "            var_loss = torch.mean(torch.mean(logvar, dim=-1), dim=-1)\n",
319 |     "            total_loss = torch.sum(mse_loss) + torch.sum(var_loss)\n",
320 |     "        else:\n",
321 |     "            mse_loss = torch.mean(torch.pow(mean - labels, 2), dim=(1, 2))\n",
322 |     "            total_loss = torch.sum(mse_loss)\n",
323 |     "        return total_loss, mse_loss\n",
324 |     "\n",
325 |     "    def train(self, loss):\n",
326 |     "        self.optimizer.zero_grad()\n",
327 |     "        loss += self._model_alpha * torch.sum(\n",
328 |     "            self._max_logvar) - self._model_alpha * torch.sum(self._min_logvar)\n",
329 |     "        loss.backward()\n",
330 |     "        self.optimizer.step()\n",
331 |     "\n",
332 |     "\n",
333 |     "class EnsembleDynamicsModel:\n",
334 |     "    ''' 环境模型集成,加入精细化的训练 '''\n",
335 |     "    def __init__(self, state_dim, action_dim, model_alpha=0.01, num_network=5):\n",
336 |     "        self._num_network = num_network\n",
337 |     "        self._state_dim, self._action_dim = state_dim, action_dim\n",
338 |     "        self.model = EnsembleModel(state_dim,\n",
339 |     "                                   action_dim,\n",
340 |     "                                   model_alpha,\n",
341 |     "                                   ensemble_size=num_network)\n",
342 |     "        self._epoch_since_last_update = 0\n",
343 |     "\n",
344 |     "    def train(self,\n",
345 |     "              inputs,\n",
346 |     "              labels,\n",
347 |     "              batch_size=64,\n",
348 |     "              holdout_ratio=0.1,\n",
349 |     "              max_iter=20):\n",
350 |     "        # 设置训练集与验证集\n",
351 |     "        permutation = np.random.permutation(inputs.shape[0])\n",
352 |     "        inputs, labels = inputs[permutation], labels[permutation]\n",
353 |     "        num_holdout = int(inputs.shape[0] * holdout_ratio)\n",
354 |     "        train_inputs, train_labels = inputs[num_holdout:], labels[num_holdout:]\n",
355 |     "        holdout_inputs, holdout_labels = inputs[:\n",
356 |     "                                                num_holdout], labels[:\n",
357 |     "                                                                     num_holdout]\n",
358 |     "        holdout_inputs = torch.from_numpy(holdout_inputs).float().to(device)\n",
359 |     "        holdout_labels = torch.from_numpy(holdout_labels).float().to(device)\n",
360 |     "        holdout_inputs = holdout_inputs[None, :, :].repeat(\n",
361 |     "            [self._num_network, 1, 1])\n",
362 |     "        holdout_labels = holdout_labels[None, :, :].repeat(\n",
363 |     "            [self._num_network, 1, 1])\n",
364 |     "\n",
365 |     "        # 保留最好的结果\n",
366 |     "        self._snapshots = {i: (None, 1e10) for i in range(self._num_network)}\n",
367 |     "\n",
368 |     "        for epoch in itertools.count():\n",
369 |     "            # 定义每一个网络的训练数据\n",
370 |     "            train_index = np.vstack([\n",
371 |     "                np.random.permutation(train_inputs.shape[0])\n",
372 |     "                for _ in range(self._num_network)\n",
373 |     "            ])\n",
374 |     "            # 所有真实数据都用来训练\n",
375 |     "            for batch_start_pos in range(0, train_inputs.shape[0], batch_size):\n",
376 |     "                batch_index = train_index[:, batch_start_pos:batch_start_pos +\n",
377 |     "                                          batch_size]\n",
378 |     "                train_input = torch.from_numpy(\n",
379 |     "                    train_inputs[batch_index]).float().to(device)\n",
380 |     "                train_label = torch.from_numpy(\n",
381 |     "                    train_labels[batch_index]).float().to(device)\n",
382 |     "\n",
383 |     "                mean, logvar = self.model(train_input, return_log_var=True)\n",
384 |     "                loss, _ = self.model.loss(mean, logvar, train_label)\n",
385 |     "                self.model.train(loss)\n",
386 |     "\n",
387 |     "            with torch.no_grad():\n",
388 |     "                mean, logvar = self.model(holdout_inputs, return_log_var=True)\n",
389 |     "                _, holdout_losses = self.model.loss(mean,\n",
390 |     "                                                    logvar,\n",
391 |     "                                                    holdout_labels,\n",
392 |     "                                                    use_var_loss=False)\n",
393 |     "                holdout_losses = holdout_losses.cpu()\n",
394 |     "                break_condition = self._save_best(epoch, holdout_losses)\n",
395 |     "                if break_condition or epoch > max_iter:  # 结束训练\n",
396 |     "                    break\n",
397 |     "\n",
398 |     "    def _save_best(self, epoch, losses, threshold=0.1):\n",
399 |     "        updated = False\n",
400 |     "        for i in range(len(losses)):\n",
401 |     "            current = losses[i]\n",
402 |     "            _, best = self._snapshots[i]\n",
403 |     "            improvement = (best - current) / best\n",
404 |     "            if improvement > threshold:\n",
405 |     "                self._snapshots[i] = (epoch, current)\n",
406 |     "                updated = True\n",
407 |     "        self._epoch_since_last_update = 0 if updated else self._epoch_since_last_update + 1\n",
408 |     "        return self._epoch_since_last_update > 5\n",
409 |     "\n",
410 |     "    def predict(self, inputs, batch_size=64):\n",
411 |     "        inputs = np.tile(inputs, (self._num_network, 1, 1))\n",
412 |     "        inputs = torch.tensor(inputs, dtype=torch.float).to(device)\n",
413 |     "        mean, var = self.model(inputs, return_log_var=False)\n",
414 |     "        return mean.detach().cpu().numpy(), var.detach().cpu().numpy()\n",
415 |     "\n",
416 |     "\n",
417 |     "class FakeEnv:\n",
418 |     "    def __init__(self, model):\n",
419 |     "        self.model = model\n",
420 |     "\n",
421 |     "    def step(self, obs, act):\n",
422 |     "        inputs = np.concatenate((obs, act), axis=-1)\n",
423 |     "        ensemble_model_means, ensemble_model_vars = self.model.predict(inputs)\n",
424 |     "        ensemble_model_means[:, :, 1:] += obs\n",
425 |     "        ensemble_model_stds = np.sqrt(ensemble_model_vars)\n",
426 |     "        ensemble_samples = ensemble_model_means + np.random.normal(\n",
427 |     "            size=ensemble_model_means.shape) * ensemble_model_stds\n",
428 |     "\n",
429 |     "        num_models, batch_size, _ = ensemble_model_means.shape\n",
430 |     "        models_to_use = np.random.choice(\n",
431 |     "            [i for i in range(self.model._num_network)], size=batch_size)\n",
432 |     "        batch_inds = np.arange(0, batch_size)\n",
433 |     "        samples = ensemble_samples[models_to_use, batch_inds]\n",
434 |     "        rewards, next_obs = samples[:, :1][0][0], samples[:, 1:][0]\n",
435 |     "        return rewards, next_obs"
436 |    ]
437 |   },
438 |   {
439 |    "cell_type": "code",
440 |    "execution_count": 11,
441 |    "metadata": {
442 |     "executionInfo": {
443 |      "elapsed": 636,
444 |      "status": "ok",
445 |      "timestamp": 1649957452282,
446 |      "user": {
447 |       "displayName": "Sam Lu",
448 |       "userId": "15789059763790170725"
449 |      },
450 |      "user_tz": -480
451 |     },
452 |     "id": "T1X6ABP3GEno"
453 |    },
454 |    "outputs": [],
455 |    "source": [
456 |     "class MBPO:\n",
457 |     "    def __init__(self, env, agent, fake_env, env_pool, model_pool,\n",
458 |     "                 rollout_length, rollout_batch_size, real_ratio, num_episode):\n",
459 |     "\n",
460 |     "        self.env = env\n",
461 |     "        self.agent = agent\n",
462 |     "        self.fake_env = fake_env\n",
463 |     "        self.env_pool = env_pool\n",
464 |     "        self.model_pool = model_pool\n",
465 |     "        self.rollout_length = rollout_length\n",
466 |     "        self.rollout_batch_size = rollout_batch_size\n",
467 |     "        self.real_ratio = real_ratio\n",
468 |     "        self.num_episode = num_episode\n",
469 |     "\n",
470 |     "    def rollout_model(self):\n",
471 |     "        observations, _, _, _, _ = self.env_pool.sample(\n",
472 |     "            self.rollout_batch_size)\n",
473 |     "        for obs in observations:\n",
474 |     "            for i in range(self.rollout_length):\n",
475 |     "                action = self.agent.take_action(obs)\n",
476 |     "                reward, next_obs = self.fake_env.step(obs, action)\n",
477 |     "                self.model_pool.add(obs, action, reward, next_obs, False)\n",
478 |     "                obs = next_obs\n",
479 |     "\n",
480 |     "    def update_agent(self, policy_train_batch_size=64):\n",
481 |     "        env_batch_size = int(policy_train_batch_size * self.real_ratio)\n",
482 |     "        model_batch_size = policy_train_batch_size - env_batch_size\n",
483 |     "        for epoch in range(10):\n",
484 |     "            env_obs, env_action, env_reward, env_next_obs, env_done = self.env_pool.sample(\n",
485 |     "                env_batch_size)\n",
486 |     "            if self.model_pool.size() > 0:\n",
487 |     "                model_obs, model_action, model_reward, model_next_obs, model_done = self.model_pool.sample(\n",
488 |     "                    model_batch_size)\n",
489 |     "                obs = np.concatenate((env_obs, model_obs), axis=0)\n",
490 |     "                action = np.concatenate((env_action, model_action), axis=0)\n",
491 |     "                next_obs = np.concatenate((env_next_obs, model_next_obs),\n",
492 |     "                                          axis=0)\n",
493 |     "                reward = np.concatenate((env_reward, model_reward), axis=0)\n",
494 |     "                done = np.concatenate((env_done, model_done), axis=0)\n",
495 |     "            else:\n",
496 |     "                obs, action, next_obs, reward, done = env_obs, env_action, env_next_obs, env_reward, env_done\n",
497 |     "            transition_dict = {\n",
498 |     "                'states': obs,\n",
499 |     "                'actions': action,\n",
500 |     "                'next_states': next_obs,\n",
501 |     "                'rewards': reward,\n",
502 |     "                'dones': done\n",
503 |     "            }\n",
504 |     "            self.agent.update(transition_dict)\n",
505 |     "\n",
506 |     "    def train_model(self):\n",
507 |     "        obs, action, reward, next_obs, done = self.env_pool.return_all_samples(\n",
508 |     "        )\n",
509 |     "        inputs = np.concatenate((obs, action), axis=-1)\n",
510 |     "        reward = np.array(reward)\n",
511 |     "        labels = np.concatenate(\n",
512 |     "            (np.reshape(reward, (reward.shape[0], -1)), next_obs - obs),\n",
513 |     "            axis=-1)\n",
514 |     "        self.fake_env.model.train(inputs, labels)\n",
515 |     "\n",
516 |     "    def explore(self):\n",
517 |     "        obs, done, episode_return = self.env.reset(), False, 0\n",
518 |     "        while not done:\n",
519 |     "            action = self.agent.take_action(obs)\n",
520 |     "            next_obs, reward, done, _ = self.env.step(action)\n",
521 |     "            self.env_pool.add(obs, action, reward, next_obs, done)\n",
522 |     "            obs = next_obs\n",
523 |     "            episode_return += reward\n",
524 |     "        return episode_return\n",
525 |     "\n",
526 |     "    def train(self):\n",
527 |     "        return_list = []\n",
528 |     "        explore_return = self.explore()  # 随机探索采取数据\n",
529 |     "        print('episode: 1, return: %d' % explore_return)\n",
530 |     "        return_list.append(explore_return)\n",
531 |     "\n",
532 |     "        for i_episode in range(self.num_episode - 1):\n",
533 |     "            obs, done, episode_return = self.env.reset(), False, 0\n",
534 |     "            step = 0\n",
535 |     "            while not done:\n",
536 |     "                if step % 50 == 0:\n",
537 |     "                    self.train_model()\n",
538 |     "                    self.rollout_model()\n",
539 |     "                action = self.agent.take_action(obs)\n",
540 |     "                next_obs, reward, done, _ = self.env.step(action)\n",
541 |     "                self.env_pool.add(obs, action, reward, next_obs, done)\n",
542 |     "                obs = next_obs\n",
543 |     "                episode_return += reward\n",
544 |     "\n",
545 |     "                self.update_agent()\n",
546 |     "                step += 1\n",
547 |     "            return_list.append(episode_return)\n",
548 |     "            print('episode: %d, return: %d' % (i_episode + 2, episode_return))\n",
549 |     "        return return_list\n",
550 |     "\n",
551 |     "\n",
552 |     "class ReplayBuffer:\n",
553 |     "    def __init__(self, capacity):\n",
554 |     "        self.buffer = collections.deque(maxlen=capacity)\n",
555 |     "\n",
556 |     "    def add(self, state, action, reward, next_state, done):\n",
557 |     "        self.buffer.append((state, action, reward, next_state, done))\n",
558 |     "\n",
559 |     "    def size(self):\n",
560 |     "        return len(self.buffer)\n",
561 |     "\n",
562 |     "    def sample(self, batch_size):\n",
563 |     "        if batch_size > len(self.buffer):\n",
564 |     "            return self.return_all_samples()\n",
565 |     "        else:\n",
566 |     "            transitions = random.sample(self.buffer, batch_size)\n",
567 |     "            state, action, reward, next_state, done = zip(*transitions)\n",
568 |     "            return np.array(state), action, reward, np.array(next_state), done\n",
569 |     "\n",
570 |     "    def return_all_samples(self):\n",
571 |     "        all_transitions = list(self.buffer)\n",
572 |     "        state, action, reward, next_state, done = zip(*all_transitions)\n",
573 |     "        return np.array(state), action, reward, np.array(next_state), done"
574 |    ]
575 |   },
576 |   {
577 |    "cell_type": "code",
578 |    "execution_count": 12,
579 |    "metadata": {
580 |     "colab": {
581 |      "base_uri": "https://localhost:8080/",
582 |      "height": 680
583 |     },
584 |     "executionInfo": {
585 |      "elapsed": 613836,
586 |      "status": "ok",
587 |      "timestamp": 1649958070782,
588 |      "user": {
589 |       "displayName": "Sam Lu",
590 |       "userId": "15789059763790170725"
591 |      },
592 |      "user_tz": -480
593 |     },
594 |     "id": "_gcY5HvTGEnr",
595 |     "outputId": "49c828a2-35ec-44d9-f952-a52e01e46fe8"
596 |    },
597 |    "outputs": [
598 |     {
599 |      "name": "stderr",
600 |      "output_type": "stream",
601 |      "text": [
602 |       "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:59: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ../torch/csrc/utils/tensor_new.cpp:201.)\n"
603 |      ]
604 |     },
605 |     {
606 |      "name": "stdout",
607 |      "output_type": "stream",
608 |      "text": [
609 |       "episode: 1, return: -1617\n",
610 |       "episode: 2, return: -1463\n",
611 |       "episode: 3, return: -1407\n",
612 |       "episode: 4, return: -929\n",
613 |       "episode: 5, return: -860\n",
614 |       "episode: 6, return: -643\n",
615 |       "episode: 7, return: -128\n",
616 |       "episode: 8, return: -368\n",
617 |       "episode: 9, return: -118\n",
618 |       "episode: 10, return: -123\n",
619 |       "episode: 11, return: -122\n",
620 |       "episode: 12, return: -118\n",
621 |       "episode: 13, return: -119\n",
622 |       "episode: 14, return: -119\n",
623 |       "episode: 15, return: -121\n",
624 |       "episode: 16, return: -123\n",
625 |       "episode: 17, return: 0\n",
626 |       "episode: 18, return: -125\n",
627 |       "episode: 19, return: -126\n",
628 |       "episode: 20, return: -243\n"
629 |      ]
630 |     },
631 |     {
632 |      "data": {
633 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEWCAYAAACjYXoKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU1f3/8dc7CUnYd0H2xaCCK0YE69aKCrYW14pWRWtLbbXbt7Xq1y528fe1trWtttXiUtHWqq1V+X7V4lYLakAWEUEEQthlycK+ZJvP7497o2NMQshk5k6Sz/PxmMfcOefM3M9MkvnknnPvOTIznHPOuURkRB2Ac865ls+TiXPOuYR5MnHOOZcwTybOOecS5snEOedcwjyZOOecS5gnE+daGUlnSNrQ3G2da4gnE5cWJK2RVCGpV63ytyWZpCHh44fDdrsl7ZK0QNLpce2vllQd1u+UtEjS5+Lqu0m6V9JmSXslvSvpmhS9xzMkxeJiX56qfbcEki6XtFbSHknPSOoRdUyu8TyZuHSyGris5oGko4EOdbS708w6AV2Ae4F/SsqMqy8I67sBDwJPSuouKRt4GRgMjAO6AjcCd0j6r2S8oTp8EBf7TcD9kkamaN9pS9Io4E/AlUAfYC/wx0iDcgfFk4lLJ48CV8U9ngI8Ul9jC6ZveAzoQfAFVLs+BjwEtAeGE3xRDQIuMbPVZlZpZv8Cvgn8VFKXuvYj6WRJ8yTtCO9Pjqt7TdLPJL0RHm28WPvoqr7YzewZYBswUlKGpJslrZJUKunJmv/MJQ0Jj86mSFonqUTSrXExtA+P2LZJeg84sVb8JumwuMcPS/p5Pe+13rY1XWKSvi9pq6RNks6XdK6kFZLKJP13Pa/bT9K++KMNSceH76Ud8EXgf81slpntBn4IXCip84E+S5cePJm4dDIH6CLpyPBIYzLwl/oah22uIjii2VJHfRbwZWA3sBI4C3jBzPbUavoUkEtwtFL7NXoAzwF3Az2Bu4DnJPWMa3Y5cA1wCJANfO9AbzRMHhcQHD29C3wDOB84HehHkGT+UOtppwCHA2cCP5J0ZFj+Y4JkORw4hyAJJ0tfgs+qP/Aj4H7gCuAE4FTgh5KG1n6SmX0AFAAXxRVfDvzDzCqBUcA7ce1XARXAiOS8DdfcPJm4dFNzdHIWsAzYWEeb70naTpAkfgv80Myq4+rHhvWbCbrNLjCzHUAvYFPtFzOzKqAkrK/ts8BKM3vUzKrM7G/A+8B5cW3+bGYrzGwf8CRwXAPvr18YWwlBErjSzJYD1wG3mtkGMysHbgMuDhNijZ+Y2T4ze4fgi/fYsPwLwO1mVmZm6wkSX7JUhvuqBB4n+Mx+Z2a7zGwp8F5cXLU9RtiNKUkE/yw8FtZ1AnbUar8D8COTFiLrwE2cS6lHgVnAUOrv4vqVmf0g/EIaBbwoqczMXgjr55jZKXU8rwQ4tHZh+IXdK6yvrR+wtlbZWoL/zGtsjtveS/DFWJ8PzGxAHeWDgaclxeLKqvl49119++kHrK8VX7KUxiXufeF9/FHhvpq4JO2OKx9JcAR4j6RDCY44YsDssH43wThSvC7AruYL3SWTH5m4tGJmawm6rc4F/nmAtmZmS4A3CI4gDuRlYKKkjrXKLwLKCbrZavuA4Is+3iDqPmJKxHpgopl1i7vlmllj9rMJGFgrvnh7+fiJDH0beK2DadsgM+sUd1tnZtuAF4FLCbq4HrePpi1fStwRjaRhQA6woqn7d6nlycSlo2uBz9QxtvEJko4gGEtY2ojXfRTYAPw9HNRuJ+kcgm6h28KusNqeB0aEp61mSbqU4L/s/2vsm2mk+4DbJQ0GkNRb0qRGPvdJ4JbwjLUBBOMv8RYBl0vKlDSBYFymPgfTtikeI+jGvJiPurgA/gqcJ+nUMNn/FPinmfmRSQvhycSlHTNbZWbzG2jy/fBajT0E/+n+meC00gO9bjkwnuAoYC6wk2BA/VYz+2U9zykFPgd8FygFvg98zszq6hJLxO+AGQRddrsIjpJOauRzf0LQtbWa4PN4tFb9twjGeLYTnDX1TAOvdTBtm2IGkAdsDsd+AAjHW64jSCpbCcZKvt7M+3ZJJF8cyznnXKL8yMQ551zCPJk455xLmCcT55xzCfNk4pxzLmFt9qLFXr162ZAhQ6IOwznnWpQFCxaUmFnv2uVtNpkMGTKE+fMbOvvUOedcbZLqnGHBu7mcc84lzJOJc865hHkycc45lzBPJs455xLmycQ551zCWk0ykTRB0nJJhZJujjoe55xrS1pFMgmXb/0DMJFgevDLJI2MNirnnGs7WkUyAcYAhWZWZGYVBMuJNnYtCOeca5TNO/bz2Nx1VFbHDty4jWktFy325+PLlm6gjrUgJE0FpgIMGlR7MTrnnKtfUfFurnhgLh/s2M9ry7dyz+XHk5OVGXVYaaO1HJk0iplNM7N8M8vv3fsTswE451ydln6wg0vuK6C8KsZXTx/Gi+9t4cvT57Ovojrq0NJGa0kmG/n4GtgDaP41up1zbdD8NWVMnjaHnKwMnrxuHLdMPJI7LzqGNwpLmPLQW+zaXxl1iGmhtSSTeUCepKGSsoHJBMuDOudck722fCtXPDiX3p1y+PvXTmZ4704AfOHEgfxu8vEsXLeNKx6Yy/a9FRFHGr1WkUzMrAq4AZgJLAOeDNeUds65Jnlu8Sa+8sh8hvXqxJPXjaN/t/Yfqz/v2H7cd8UJLNu0i8nT5lC8qzyiSNNDq0gmAGb2vJmNMLPhZnZ71PE451quJ+at4xt/W8ixA7rxt6lj6dUpp85240f24aGrT2Rt6V4u/VMBH2zfl+JI00erSSbOOdcc7p9VxE1Pvcupeb159NqT6Nq+XYPtT8nrxaPXjqF4VzmX3FfA2tI9KYo0vXgycc45wMz49YvLuf35ZXz26EO5/6p82mc37tTf/CE9eOwrY9lbUcUl9xWwcsuuJEebfjyZOOfavFjMuG3GUu55tZBL8wdy92XHk511cF+PRw/oyhNfHYcBl06bw5KNO5ITbJryZOKca9Mqq2N89+/vML1gLVNPG8YdFx1NZoaa9Foj+nTm718dR/t2mVx2/xwWrC1r5mjTlycT51ybtb+ymq/9ZSFPv72RG885nFsmHoHUtERSY0ivjjx53Th6dcrhygff4s3CkmaKNr15MnHOtUm7y6v40sPzeHnZFn46aRTXf/qwhBNJjf7d2vPEV8cysHsHrn54Hq++v6VZXjedeTJxzrU52/dW8MUH5jJ3dRm/ufRYrho3pNn3cUjnXB6fOpbD+3Rm6iMLeG7xpmbfRzppLRM9OpcSZkbZngqyszLIbZdJu8z0/3/MzCivilFRHaO8MkZVLEaGFN4I7jOC7cyMoFyCTH203Vz/saeDLTv3c+WDc1lTupf7rjiBs0b2Sdq+unfM5q9fOYlrH57HN/62kL0Vx3BJ/sADP7EF8mTi3EH4+XPLePD11R8+zswQuWFiyam5b5dJbrsMcrMyyQnvc9sFdUECEjEDMzAsuDfDgJgFj2MGULMdtgnrYzH7MDGUV8WoqIpRXlVNeVWtx5UxyquDx4n6MOmEyaWGxTey+M2P1WAff0hmhsjKUHCfmfHxxx/eh+WZnyzPyBCZgsyMDDIzgtfLzMggU5AR1zZDCutEZrj9wpLNlO4u5+FrTuTk4b0S/mwOpEtuO6Z/aQxffXQBN/5jMcW7yxk9qDsiiFUEyTpDcfcEn3PN5/3RPXRp345DOucmPe6D5cnEtSiPzlnLK8u28OCUE5t8xk1TrS/byyMFa/j04b05eXgvyquq2V8ZY39lNfvjtys/+jIv21PxYVlwX01VzD78ApGo88sEPvryiP9igeCLMycrg+ysDHKyMsjJyqRbh+xgu10m2ZkZ5LTL+PA+JyszbBfcMjMywgRlxMJkVR37KHFV1yS0WLAdC5NddeyjbeI+esU90MfKP66mzgyqzaiuNqpiwesG97GPP66uu3xvRRXVNfHV3MIkW9MuZsF2zXuorg7vY0avTjn89StjOW5gt+b/JalHh+ws7r8qnxsee5s7/7U8odeS4LbzRjHl5CHNE1wz8WTiWpS/z1/P4g07eGrhBr6Q4u6C372yEkn8z4XH0Ldr+v1n6NJbbrtM/nTlCSxav53yqmoIj0CNj5J0/JFoLDxatQ8TelD2zNsb+fGMpWzZuZ8bzzk8bbogPZm4FmPn/soPLwS768UVnHdMv0ZfoZyolVt28c+FG7j2lKGeSFyTZWaIEwZ3T+g1Jh7Vlx8+u4Q/vraKLTvLueOio9Ni7C76CJxrpLeKyogZfHt8Hpt37ufPb64+8JOayV0vraB9u0y+dsZhKdunc3XJyszg/11wNN8ZP4KnFm7gy9Pns6e8KuqwPJm4lqOgqJTsrAyuO3044488hHv/vYqyPclfR2Lxhu28sGQzXz51GD06Zid9f84diCS+NT6POy48mtkri7ns/jmU7I52CnxPJq7FKFhVyuhB3chtl8lNE45gT0UV97y6Mun7/dWLK+jeoR1fPnVo0vfl3MGYPGYQ067MZ8WWXVx075usKYluxuK0SyaSfinpfUmLJT0tqVtc3S2SCiUtl3ROXPmEsKxQ0s3RRO6SafveCpZt3sm4YcGpnHl9OvOF/IH8Zc5a1pXuTdp+5xSVMmtFMV8/4zA65zY8FblzURg/sg+PfWUsO/dVctG9b7J4w/ZI4ki7ZAK8BBxlZscAK4BbACSNJFiOdxQwAfijpExJmcAfgInASOCysK1rReYUlWEG44b3/LDsO2eNIDND/PLFxE61rI+Z8cuZy+nTJYcrxw1Oyj6caw6jB3XnH187mdx2mUyeNofXlm9NeQxpl0zM7MVwGV6AOcCAcHsS8LiZlZvZaqAQGBPeCs2syMwqgMfDtq4VmVNUSm67DI4d2PXDsj5dcvnyKcP433c+SMp/Y/9evpUFa7fxzTPzyG2XmrPGnGuq4b078fTXT2ZIz458efp8/rFgQ0r3n3bJpJYvAS+E2/2B9XF1G8Ky+spdK1KwqpQTh/QgJ+vjX+pfPT0YFP+f598PLqZrJrGY8cuZKxjcs0PKr2dxrqkO6ZLLE18dy0nDevC9v7/DH/5d2Kx/Fw2JJJlIelnSkjpuk+La3ApUAX9txv1OlTRf0vzi4uLmelmXZKW7y1m+ZRdjh/X8RF3n3HZ88zOHUVBUymsrmu9n+ty7m1i2aSf/ddaItDiH37nG6pzbjj9fPYZJx/XjlzOX86Nnl1IdS35CieSiRTMb31C9pKuBzwFn2kdpdSMQ/y/igLCMBspr73caMA0gPz8/NenaJWxOUbDAUPx4SbzLTxrMn99cwx3Pv89peb0TnmalsjrGXS+t4Ii+nTnvmH4JvZZzUcjOyuA3XziOQzrncP/s1RTvKue3k49Landt2v3LJWkC8H3g82YWf5rODGCypBxJQ4E84C1gHpAnaaikbIJB+hmpjtslT0FRCR2zMzm6f9c667OzMrjxnMNZvmUXTy1MvJ/4qQUbWF2yh++efTgZKZ7/y7nmkpEhbv3sSH7w2SP519LNXPXgW+zYW5m8/SXtlZvu90Bn4CVJiyTdB2BmS4EngfeAfwHXm1l1OFh/AzATWAY8GbZ1rUTBqlJOHNqjwe6mzx59KMcO6MpdL65gf2V1k/e1v7Ka372ykuMHdWP8kYc0+XWcSxdfPnUYd192PIvWb+eSP73JB9v3JWU/aZdMzOwwMxtoZseFt+vi6m43s+FmdriZvRBX/ryZjQjrbo8mcpcMW3fuZ1XxHsbVMV4STxK3nHskm3fu56E3mj7Nyl/nrmPTjvSaQM+5RH3+2H48/KUT2bR9Pxf+8U1WFe9u9n2kXTJxLl5BUSlQ/3hJvLHDenLmEU2fZmV3eRV//HchpxzWKyXrXDiXSicP78UTXx1HXp9O9OqY0+yv78nEpbU5RaV0zs1iVL+6x0tqu2liMM3K718tPOh9PfT6akr3VPC9cw4/6Oc61xKM7NeFR689ia4dmn82B08mLq0VrCrlpKE9Gn2G1og+nbnkhIE8OmfNQU2zsm1PBffPKuLskX1SumiSc62FJxOXtjbt2Mea0r11Xl/SkJppVn51ENOs3DdrFbsrqvju2X5U4lxTeDJxaatgVePHS+L17RpMszKjkdOsbNm5n+lvruGC4/pzeN/OTYrVubbOk4lLWwWrSunWoR1H9u1y0M89mGlW7nl1JVXVxrfHj2hqqM61eZ5MXNoqKArGS5py4WBjp1lZV7qXx99az+QxAxnUs0Mi4TrXpnkycWlpfdleNmzbd8DrSxpy+UmDGdyzA7944f165yb67csryMwQ3/hMXpP345zzZOLS1EfXlzT9eo+aaVbe37yLf9YxzcqKLbt4etFGrj55CH265DZ5P845TyYuTc1ZVUrPjtmM6NMpodf5cJqVlz45zcqvZi6nU3YW150+PKF9OOc8mbg0ZGYUFJUydljPhKc0qZlmZdOO/fz5jTUfli9av50X39vCV04bRveO2QlG7JzzZOLSztrSvWzasZ+xB3lKcH1qpln542uFbAunWfnVzOX06JjNl04Z2iz7cK6t82Ti0s6bNdeXJDD4XttNE49gT3kVv/93IW8WlvB6YQlfP2M4nXIiWdLHuVbH/5Jc2ikoKqV35xyG9+7YbK9ZM83KIwVrmL2ymEO75nLF2MHN9vrOtXV+ZOLSiplRsKqUcc0wXlJbzTQrK7bs5ltn5iV11Tnn2hpPJi6trCreTcnu8oOeQqUx+nbN5aYJR3BqXi8uOmFAs7++c21Z2iYTSd+VZJJ6hY8l6W5JhZIWSxod13aKpJXhbUp0UbtEFSRhvCTeNZ8ayqPXntTgqo3OuYOXlmMmkgYCZwPr4oonEqz7ngecBNwLnCSpB/BjIB8wYIGkGWa2LbVRu+ZQUFTKoV1zGexTmzjXoqTrv2e/Ab5PkBxqTAIescAcoJukQ4FzgJfMrCxMIC8BE1IesUtYLGbMKSpLyniJcy650i6ZSJoEbDSzd2pV9QfWxz3eEJbVV17Xa0+VNF/S/OLi+if/c9FYsXUXZXsqmu36Eudc6kTSzSXpZaBvHVW3Av9N0MXV7MxsGjANID8/v+F5yV3KJXu8xDmXPJEkEzMbX1e5pKOBocA7YTfHAGChpDHARmBgXPMBYdlG4Ixa5a81e9Au6QpWlTKge3sG9vDxEudamrTq5jKzd83sEDMbYmZDCLqsRpvZZmAGcFV4VtdYYIeZbQJmAmdL6i6pO8FRzcyo3oNrmljMmLu6zI9KnGuh0vJsrno8D5wLFAJ7gWsAzKxM0s+AeWG7n5pZWTQhuqZ6b9NOduyrTMr1Jc655EvrZBIendRsG3B9Pe0eAh5KUVguCeYUNW29d+dcekirbi7XdhWsKmVIzw4c2rV91KE455rAk4mLXFV1jLdWl/lRiXMtmCcTF7mlH+xkV3kVY33w3bkWy5OJi9yH6717MnGuxfJk4iJXsKqU4b07ckiX3KhDcc41kScTF6nK6hjz1vh4iXMtnScTF6nFG3awt6KaccN6RR2Kcy4BnkxcpGquLxk7rEfEkTjnEuHJxEWqYFUph/fpTM9OOVGH4pxLgCcTF5nyqmrmr/XxEudaA08mLjLvrN/B/sqYX1/iXCvgycRFpmBVKZKPlzjXGngycZEpKCrhyL5d6NYhO+pQnHMJ8mTiIrG/spqF67b7eIlzrYQnExeJheu2UVEV8ylUnGsl0jKZSPqGpPclLZV0Z1z5LZIKJS2XdE5c+YSwrFDSzdFE7Q7GnFWlZAjG+HiJc61C2i2OJenTwCTgWDMrl3RIWD4SmAyMAvoBL0saET7tD8BZBMv8zpM0w8zeS330rrEKiko5qn9XuuS2izoU51wzSMcjk68Bd5hZOYCZbQ3LJwGPm1m5ma0mWL53THgrNLMiM6sAHg/bujS1r6KaReu3exeXc61IOiaTEcCpkuZK+o+kE8Py/sD6uHYbwrL6yl2amr+2jMpqY6wPvjvXakTSzSXpZaBvHVW3EsTUAxgLnAg8KWlYM+13KjAVYNCgQc3xkq4JClaVkpkhThzi4yXOtRaRJBMzG19fnaSvAf80MwPekhQDegEbgYFxTQeEZTRQXnu/04BpAPn5+dbkN+ASUlBUyjEDutIpJ+2G7JxzTZSO3VzPAJ8GCAfYs4ESYAYwWVKOpKFAHvAWMA/IkzRUUjbBIP2MSCJ3B7S7vIrFG3b4eIlzrUw6/mv4EPCQpCVABTAlPEpZKulJ4D2gCrjezKoBJN0AzAQygYfMbGk0obsDmbemjOqY+cWKzrUyaZdMwjOyrqin7nbg9jrKnweeT3Jorhn8Z3kx7TJF/mAfL3GuNUnHbi7XSq0p2cNjb63js0cfSvvszKjDcc41I08mLiXMjNv+dynZmRnccu6RUYfjnGtmnkxcSsxcuoXXlhfz7fF59OmSG3U4zrlm5snEJd3eiip+9n/vcUTfzlx98pCow3HOJUGjkomkb0nqosCDkhZKOjvZwbnW4fevFrJx+z5+OukosjL9/xfnWqPG/mV/ycx2AmcD3YErgTuSFpVrNQq37ub+2UVcOLo/Y4b6GVzOtVaNTSYK788FHg2v41AD7Z0LBt1nLCW3XSa3TPRBd+das8YmkwWSXiRIJjMldQZiyQvLtQbPvbuJ1wtL+N7Zh9O7c07U4TjnkqixFy1eCxwHFJnZXkk9gWuSF5Zr6XaXB4Puo/p14Yqxg6MOxzmXZI1KJmYWk7QFGCkp7a6ad+nn7ldWsmVnOX/84glkZniPqHOtXaMSg6RfAJcSzItVHRYbMCtJcbkWbMWWXTz0+mouzR/ICYO7Rx2Ocy4FGnuUcT5weM3qh87Vx8z44TNL6JiTxU0Tj4g6HOdcijR2AL4I8MW63QE9u+gD5q4u4/sTDqdHx+yow3HOpUhjj0z2AoskvQJ8eHRiZt9MSlSuRdq5v5Lbn1/GsQO6MvlEX8nSubaksclkBr7glDuA37y0gpLd5Tw4Jd8H3Z1rYw6YTCRlAleb2adTEI9rod77YCfT31zD5WMGccyAblGH45xLsQOOmYSrGcYkdU1BPEg6TtIcSYskzZc0JiyXpLslFUpaLGl03HOmSFoZ3qakIk73kVjM+NGzS+jWIZsbzzk86nCccxFobDfXbuBdSS8Be2oKkzRmcifwEzN7QdK54eMzgIkE677nAScB9wInSeoB/BjIJzhdeYGkGWa2LQmxuTo8tXAD89du486LjqFbBx90d64tamwy+Wd4SwUDuoTbXYEPwu1JwCPhevBzJHWTdChBonnJzMoAwoQ3AfhbiuJt03bsreSOF95n9KBuXHzCgKjDcc5FpLFXwE9PdiBxvk0w/9evCLrhTg7L+wPr49ptCMvqK/8ESVOBqQCDBvnZRs3hVy8uZ9veCh65dgwZPujuXJvV2CvgVxMcMXyMmQ1ryk4lvQz0raPqVuBM4Dtm9pSkLwAPAuObsp/azGwaMA0gPz//E+/HHZx3N+zgL3PXMmXcEEb1S8mQmnMuTTW2mys/bjsXuARo8uIUZlZvcpD0CPCt8OHfgQfC7Y3AwLimA8KyjQRdXfHlrzU1Ntc4sZjxw2eX0LNjDt85a0TU4TjnItaoK+DNrDTuttHMfgt8NkkxfQCcHm5/BlgZbs8ArgrP6hoL7DCzTcBM4GxJ3SV1J1jAa2aSYnOhJ+avZ9H67fz3uUfQtb1PjuBcW9fYbq7RcQ8zCI5UkjV78FeA34WzE+8nHOMAnidYT6WQ4Ir8awDMrEzSz4B5Ybuf1gzGu+TYtqeCX/zrfcYM6cEFx9c5POWca2MamxB+HbddBawGvtD84YCZvQ6cUEe5AdfX85yHgIeSEY/7pDtnvs+u/VX89PxRSD7o7pw7iMWxzKwovkDS0CTE49LcgrVlPD5vPdd+aihH9O1y4Cc459qExiaTfwCj6yj7xBGEa5227NzPPa+u5Il56+nTOZdvjc+LOiTnXBppMJlIOgIYBXSVdGFcVReCs7pcK1e2p4L7/rOK6W+uoTpmXHriQL55Zh6dc33Q3Tn3kQMdmRwOfA7oBpwXV76LYKDctVK79lfywOzVPPj6avZUVHHB8f359pkjGNSzQ9ShOefSUIPJxMyeBZ6VNM7MClIUk4vQvopqHilYw73/WcX2vZVMPKov/3XWCPL6dI46NOdcGmvsmElpuDBWHzM7StIxwOfN7OdJjM2lUEVVjCfmreOeVwvZuquc00b05ntnj/Dp5J1zjdLYZHI/cCPwJwAzWyzpMcCTSQtXHTOefnsjv315BRu27ePEId2557LjOWlYz6hDc861II1NJh3M7K1a1xRUJSEelyKxmPGvpZu566UVFG7dzVH9u/Dz84/i9BG9/doR59xBa2wyKZE0nHCyR0kXA5uSFpVLGjPjtRXF/PrF5SzZuJPDDunEvV8czYSj+noScc41WWOTyfUEs+0eIWkjwRXwX0xaVC5pfvPSCu5+tZAB3dvz60uO5fzj+/t67c65hDV2PZMiYLykjgRzc+0FJgNrkxiba2ZmxlMLN3JqXi8enHIi2VmNmufTOecOqMFvE0ldJN0i6feSziJIIlMIJltMytxcLnmKSvawcfs+zhnV1xOJc65ZHejI5FFgG1BAcJHirYCAC8xsUZJjc81s9opiAE4f0TviSJxzrc2BkskwMzsaQNIDBIPug8xsf9Ijc81u9soShvTswMAefhW7c655Haivo7Jmw8yqgQ2eSFqmiqoYBUWlnJrnRyXOueZ3oGRyrKSd4W0XcEzNtqSdTd2ppEskLZUUk5Rfq+4WSYWSlks6J658QlhWKOnmuPKhkuaG5U9Iym5qXK3ZwnXb2FtRzal5vaIOxTnXCjWYTMws08y6hLfOZpYVt53IYhZLgAuBWfGFkkYSnCU2CpgA/FFSpqRM4A/ARGAkcFnYFuAXwG/M7DCC8Z1rE4ir1Zq9spjMDDFuuF/Z7pxrfpGc0mNmy8xseR1Vk4DHzazczFYTnDU2JrwVmlmRmVUAjwOTFFxl9xmCtVUApgPnJ/8dtDyzVpQwelA3nzreOZcU6XZ+aH9gfdzjDWFZfeU9ge1mVlWrvE6SpkqaL2l+cXFxswaezkp3l7Pkg41thOIAABLASURBVB0+XuKcS5rGXgF/0CS9DPSto+rWcGr7lDOzaQRX8pOfn29RxBCFN1aVYgan+SnBzrkkSVoyMbPxTXjaRmBg3OMBYRn1lJcC3SRlhUcn8e1daPaKYrq2b8fR/btGHYpzrpVKt26uGcBkSTmShgJ5wFvAPCAvPHMrm2CQfoaZGfBv4OLw+VOASI560pWZMXtlCacc1svn4HLOJU0kyUTSBZI2AOOA5yTNBDCzpcCTwHvAv4Drzaw6POq4AZgJLAOeDNsC3AT8l6RCgjGUB1P7btJb4dbdbN65308Jds4lVdK6uRpiZk8DT9dTdztwex3lzwPP11FeRHC2l6vDf8IpVE7xZOKcS6J06+ZyzWz2yhKG9e7IgO4+hYpzLnk8mbRi+yurmbu6lNP8lGDnXJJ5MmnFFqzdxv7KGKeN8C4u51xyeTJpxWatLKZdpjhpqE+h4pxLLk8mrdjsFSWcMLg7HXMiOc/COdeGeDJppYp3lfPepp0+hYpzLiU8mbRSrxcGpwT74LtzLhU8mbRSs1eU0KNjNqP6JbJSgHPONY4nk1bIzJgVTqGS4VOoOOdSwJNJK/T+5l2U7C73KVSccynjyaQVmr0yGC/xwXfnXKp4MmmFZq8sYUSfTvTtmht1KM65NsKTSSuzr6KauavL/KjEOZdSnkxambfWlFFRFfPxEudcSnkyaWVmrygmOyvDp1BxzqWUJ5NWZvbKEsYM6UH77MyoQ3HOtSFRrbR4iaSlkmKS8uPKz5K0QNK74f1n4upOCMsLJd0tSWF5D0kvSVoZ3neP4j2lgy0797N8yy7v4nLOpVxURyZLgAuBWbXKS4DzzOxogvXcH42ruxf4CsG68HnAhLD8ZuAVM8sDXgkft0mzV5YAfkqwcy71IkkmZrbMzJbXUf62mX0QPlwKtJeUI+lQoIuZzTEzAx4Bzg/bTQKmh9vT48rbnFkriunVKYcj+naOOhTnXBuTzmMmFwELzawc6A9siKvbEJYB9DGzTeH2ZqBPfS8oaaqk+ZLmFxcXJyPmyMRixuuFJZya51OoOOdSL2kLXUh6GehbR9WtZvbsAZ47CvgFcPbB7NPMTJI1UD8NmAaQn59fb7uW6L1NOynbU+HjJc65SCQtmZjZ+KY8T9IA4GngKjNbFRZvBAbENRsQlgFskXSomW0Ku8O2NjXmlmxWOIXKKZ5MnHMRSKtuLkndgOeAm83sjZrysBtrp6Sx4VlcVwE1RzczCAbrCe8bPOpprWavKOHIQ7twSGefQsU5l3pRnRp8gaQNwDjgOUkzw6obgMOAH0laFN4OCeu+DjwAFAKrgBfC8juAsyStBMaHj9uUvRVVzF9bxml+VOKci0gki4Ob2dMEXVm1y38O/Lye58wHjqqjvBQ4s7ljbEnmFpVRWW1+SrBzLjJp1c3lmuY/K4rJycogf0ibvV7TORcxTyatwOyVxZw0rCe57XwKFedcNDyZtHAbt+9jVfEeHy9xzkXKk0kL93p4SvBpI3y8xDkXHU8mLdyslSX06ZJD3iGdog7FOdeGeTJpwapjxhuFJZya15twEmXnnIuEJ5MW7N2NO9i+t9KnUHHORc6TSQs2e0U4hcphnkycc9HyZNKCzV5ZwlH9u9CzU07UoTjn2jhPJi3Urv2VLFy3jdP8qnfnXBrwZNJCzSkqoyrmU6g459KDJ5MWavbKYjpkZzJ6cLeoQ3HOOU8mLdXslSWMHdaTnCyfQsU5Fz1PJi3Q+rK9rC7Z46cEO+fShieTFqhmVUUfL3HOpYuoFse6RNJSSTFJ+XXUD5K0W9L34somSFouqVDSzXHlQyXNDcufkJSdqvcRldkrSujXNZfhvTtGHYpzzgHRHZksAS4EZtVTfxcfraSIpEzgD8BEYCRwmaSRYfUvgN+Y2WHANuDaZAWdDqqqY7yxqoTTRvgUKs659BFJMjGzZWa2vK46SecDq4GlccVjgEIzKzKzCuBxYFK4HvxngH+E7aYD5ycv8ui9s2EHu/ZXeReXcy6tpNWYiaROwE3AT2pV9QfWxz3eEJb1BLabWVWt8vpef6qk+ZLmFxcXN1/gKVJZHeOZtzciwacO6xl1OM4596GkrQEv6WWgbx1Vt5rZs/U87TaCLqvdyejCMbNpwDSA/Px8a/YdJEnJ7nIef2sdf5mzjs0793PWyD5069Dqh4accy1I0pKJmY1vwtNOAi6WdCfQDYhJ2g8sAAbGtRsAbARKgW6SssKjk5ryVuHdDTv485ur+b93NlFRHePUvF78vwuP4owRh0QdmnPOfUzSkklTmNmpNduSbgN2m9nvJWUBeZKGEiSLycDlZmaS/g1cTDCOMgWo76inRaisjvHCks08/MZqFq7bTsfsTCaPGchV44ZwmC+A5ZxLU5EkE0kXAPcAvYHnJC0ys3Pqa29mVZJuAGYCmcBDZlYzQH8T8LiknwNvAw8mN/rkKN5VzmNz1/HXuWvZuqucIT078OPzRnLRCQPoktsu6vCcc65BMmsxQwfNKj8/3+bPnx91GLyzfjsPv7mG5xYHXVmnj+jN1Z8awul5vcnI8FN/nXPpRdICM/vE9YFp1c3VVlRUxXj+3U08/OYaFq3fTqecLC4/aRBXjRvMsN7eleWca3k8maTYUws2cMe/3qd4VznDenXkJ58fxYWj+9PZu7Kccy2YJ5MUemXZFm78xzscP6g7v7rkWE49rJd3ZTnnWgVPJiny3gc7+cbf3mZUv648eu0YOmT7R++caz3S6gr41mrrzv1cO30eXdu344Ep+Z5InHOtjn+rJdneiiqunT6fHfsq+ft14+jTJTfqkJxzrtn5kUkSxWLGd55YxNIPdnDPZcczql/XqENyzrmk8GSSRL+Y+T4zl27hB58dyZlH9ok6HOecSxpPJkny+Fvr+NN/irhi7CCu+dSQqMNxzrmk8mSSBG8WlvCDZ5Zwal4vbjtvlC9i5Zxr9TyZNLPCrbu57i8LGNa7I3/44miyMv0jds61fv5N14zK9lTwpYfnkZ2VwYNTTvQJGp1zbYafGtxMyquq+eqj89m8cz+PTx3LwB4dog7JOedSxo9MmoGZcfNT7zJvzTZ+fcmxjB7UPeqQnHMupTyZNIN7Xi3k6bc38t2zRnDesf2iDsc551LOk0mCnl20kbteWsGFx/fnhs8cFnU4zjkXiUiSiaRLJC2VFJOUX6vuGEkFYf27knLD8hPCx4WS7lZ4vq2kHpJekrQyvE9ZH9OCtWXc+I/FjBnSg/+56Gg/Bdg512ZFdWSyBLgQmBVfGK71/hfgOjMbBZwBVIbV9wJfAfLC24Sw/GbgFTPLA14JHyfd+rK9TH1kAf265vKnK08gJyszFbt1zrm0FEkyMbNlZra8jqqzgcVm9k7YrtTMqiUdCnQxszkWrDP8CHB++JxJwPRwe3pcedLs2FfJNQ/Po7I6xoNXn0j3jtnJ3qVzzqW1dBszGQGYpJmSFkr6fljeH9gQ125DWAbQx8w2hdubgXonwZI0VdJ8SfOLi4ubFGBldYwbHlvImpI93HflCQz3ZXadcy5515lIehnoW0fVrWb2bAPxnAKcCOwFXpG0ANjRmH2amUmyBuqnAdMA8vPz623XwPP58YylzF5Zwp0XH8PJw3sd7Es451yrlLRkYmbjm/C0DcAsMysBkPQ8MJpgHGVAXLsBwMZwe4ukQ81sU9gdtjWBsA9oeO9OXP/p4Xwhf2Ayd+Occy1KunVzzQSOltQhHIw/HXgv7MbaKWlseBbXVUDN0c0MYEq4PSWuvNlJ4tpThnLjOUckaxfOOdciRXVq8AWSNgDjgOckzQQws23AXcA8YBGw0MyeC5/2deABoBBYBbwQlt8BnCVpJTA+fOyccy6FFJwc1fbk5+fb/Pnzow7DOedaFEkLzCy/dnm6dXM555xrgTyZOOecS5gnE+eccwnzZOKccy5hnkycc84lzJOJc865hLXZU4MlFQNrm/j0XkBJM4bT3Dy+xHh8ifH4EpPu8Q02s961C9tsMkmEpPl1nWedLjy+xHh8ifH4EpPu8dXHu7mcc84lzJOJc865hHkyaZppUQdwAB5fYjy+xHh8iUn3+OrkYybOOecS5kcmzjnnEubJxDnnXMI8mTRA0gRJyyUVSrq5jvocSU+E9XMlDUlhbAMl/VvSe5KWSvpWHW3OkLRD0qLw9qNUxRfuf42kd8N9f2K+fwXuDj+/xZJGpzC2w+M+l0WSdkr6dq02Kf38JD0kaaukJXFlPSS9JGlleN+9nudOCduslDSlrjZJiu+Xkt4Pf35PS+pWz3Mb/F1IYny3SdoY9zM8t57nNvi3nsT4noiLbY2kRfU8N+mfX8LMzG913IBMgkW4hgHZwDvAyFptvg7cF25PBp5IYXyHAqPD7c7AijriOwP4vwg/wzVArwbqzyVY5EzAWGBuhD/rzQQXY0X2+QGnESxTvSSu7E7g5nD7ZuAXdTyvB1AU3ncPt7unKL6zgaxw+xd1xdeY34Ukxncb8L1G/Pwb/FtPVny16n8N/Ciqzy/Rmx+Z1G8MUGhmRWZWATwOTKrVZhIwPdz+B3BmuKxw0pnZJjNbGG7vApYB/VOx72Y0CXjEAnOAbpIOjSCOM4FVZtbUGRGahZnNAspqFcf/jk0Hzq/jqecAL5lZmQWrlb4ETEhFfGb2oplVhQ/nAAOae7+NVc/n1xiN+VtPWEPxhd8bXwD+1tz7TRVPJvXrD6yPe7yBT35Zf9gm/IPaAfRMSXRxwu6144G5dVSPk/SOpBckjUppYGDAi5IWSJpaR31jPuNUmEz9f8RRfn4AfcxsU7i9GehTR5t0+Ry/xEfLadd2oN+FZLoh7IZ7qJ5uwnT4/E4FtpjZynrqo/z8GsWTSQsnqRPwFPBtM9tZq3ohQdfNscA9wDMpDu8UMxsNTASul3Raivd/QJKygc8Df6+jOurP72Ms6O9Iy3P5Jd0KVAF/radJVL8L9wLDgeOATQRdSenoMho+Kkn7vyVPJvXbCAyMezwgLKuzjaQsoCtQmpLogn22I0gkfzWzf9auN7OdZrY73H4eaCepV6riM7ON4f1W4GmC7oR4jfmMk20isNDMttSuiPrzC22p6foL77fW0SbSz1HS1cDngC+GCe8TGvG7kBRmtsXMqs0sBtxfz36j/vyygAuBJ+prE9XndzA8mdRvHpAnaWj43+tkYEatNjOAmjNnLgZere+PqbmFfawPAsvM7K562vStGcORNIbg552SZCepo6TONdsEA7VLajWbAVwVntU1FtgR16WTKvX+Rxjl5xcn/ndsCvBsHW1mAmdL6h5245wdliWdpAnA94HPm9neeto05nchWfHFj8FdUM9+G/O3nkzjgffNbENdlVF+fgcl6jMA0vlGcLbRCoIzPW4Ny35K8IcDkEvQPVIIvAUMS2FspxB0eSwGFoW3c4HrgOvCNjcASwnOTpkDnJzC+IaF+30njKHm84uPT8Afws/3XSA/xT/fjgTJoWtcWWSfH0FS2wRUEvTbX0swBvcKsBJ4GegRts0HHoh77pfC38NC4JoUxldIMN5Q8ztYc3ZjP+D5hn4XUhTfo+Hv1mKCBHFo7fjCx5/4W09FfGH5wzW/c3FtU/75JXrz6VScc84lzLu5nHPOJcyTiXPOuYR5MnHOOZcwTybOOecS5snEOedcwjyZOJcASdW1Zh9ucMZZSddJuqoZ9rsmggsonauXnxrsXAIk7TazThHsdw3BdTklqd63c3XxIxPnkiA8crgzXIPiLUmHheW3SfpeuP1NBevRLJb0eFjWQ9IzYdkcSceE5T0lvahg7ZoHCC74rNnXFeE+Fkn6k6TM8PawpCVhDN+J4GNwbYgnE+cS075WN9elcXU7zOxo4PfAb+t47s3A8WZ2DMGV9wA/Ad4Oy/4beCQs/zHwupmNIpibaRCApCOBS4FPmdlxQDXwRYKJDfub2VFhDH9uxvfs3CdkRR2Acy3cvvBLvC5/i7v/TR31i4G/SnqGj2YkPgW4CMDMXg2PSLoQLKx0YVj+nKRtYfszgROAeeE0Yu0JJoP8X2CYpHuA54AXm/4WnTswPzJxLnmsnu0anyWYm2w0QTJoyj93Aqab2XHh7XAzu82CRbKOBV4jOOp5oAmv7VyjeTJxLnkujbsviK+QlAEMNLN/AzcRLF/QCZhN0E2FpDOAEgvWqZkFXB6WTyRYnheCSSAvlnRIWNdD0uDwTK8MM3sK+AFBwnIuabyby7nEtJe0KO7xv8ys5vTg7pIWA+UEU93HywT+IqkrwdHF3Wa2XdJtwEPh8/by0fTzPwH+Jmkp8CawDsDM3pP0A4JV+DIIZqS9HtgH/DksA7il+d6yc5/kpwY7lwR+6q5ra7ybyznnXML8yMQ551zC/MjEOedcwjyZOOecS5gnE+eccwnzZOKccy5hnkycc84l7P8DJ679CZQ+algAAAAASUVORK5CYII=\n",
634 |       "text/plain": [
635 |        "<Figure size 432x288 with 1 Axes>"
636 |       ]
637 |      },
638 |      "metadata": {
639 |       "needs_background": "light"
640 |      },
641 |      "output_type": "display_data"
642 |     }
643 |    ],
644 |    "source": [
645 |     "real_ratio = 0.5\n",
646 |     "env_name = 'Pendulum-v0'\n",
647 |     "env = gym.make(env_name)\n",
648 |     "num_episodes = 20\n",
649 |     "actor_lr = 5e-4\n",
650 |     "critic_lr = 5e-3\n",
651 |     "alpha_lr = 1e-3\n",
652 |     "hidden_dim = 128\n",
653 |     "gamma = 0.98\n",
654 |     "tau = 0.005  # 软更新参数\n",
655 |     "buffer_size = 10000\n",
656 |     "target_entropy = -1\n",
657 |     "model_alpha = 0.01  # 模型损失函数中的加权权重\n",
658 |     "state_dim = env.observation_space.shape[0]\n",
659 |     "action_dim = env.action_space.shape[0]\n",
660 |     "action_bound = env.action_space.high[0]  # 动作最大值\n",
661 |     "\n",
662 |     "rollout_batch_size = 1000\n",
663 |     "rollout_length = 1  # 推演长度k,推荐更多尝试\n",
664 |     "model_pool_size = rollout_batch_size * rollout_length\n",
665 |     "\n",
666 |     "agent = SAC(state_dim, hidden_dim, action_dim, action_bound, actor_lr,\n",
667 |     "            critic_lr, alpha_lr, target_entropy, tau, gamma)\n",
668 |     "model = EnsembleDynamicsModel(state_dim, action_dim, model_alpha)\n",
669 |     "fake_env = FakeEnv(model)\n",
670 |     "env_pool = ReplayBuffer(buffer_size)\n",
671 |     "model_pool = ReplayBuffer(model_pool_size)\n",
672 |     "mbpo = MBPO(env, agent, fake_env, env_pool, model_pool, rollout_length,\n",
673 |     "            rollout_batch_size, real_ratio, num_episodes)\n",
674 |     "\n",
675 |     "return_list = mbpo.train()\n",
676 |     "\n",
677 |     "episodes_list = list(range(len(return_list)))\n",
678 |     "plt.plot(episodes_list, return_list)\n",
679 |     "plt.xlabel('Episodes')\n",
680 |     "plt.ylabel('Returns')\n",
681 |     "plt.title('MBPO on {}'.format(env_name))\n",
682 |     "plt.show()\n",
683 |     "\n",
684 |     "# episode: 1, return: -1083\n",
685 |     "# episode: 2, return: -1324\n",
686 |     "# episode: 3, return: -979\n",
687 |     "# episode: 4, return: -130\n",
688 |     "# episode: 5, return: -246\n",
689 |     "# episode: 6, return: -2\n",
690 |     "# episode: 7, return: -239\n",
691 |     "# episode: 8, return: -2\n",
692 |     "# episode: 9, return: -122\n",
693 |     "# episode: 10, return: -236\n",
694 |     "# episode: 11, return: -238\n",
695 |     "# episode: 12, return: -2\n",
696 |     "# episode: 13, return: -127\n",
697 |     "# episode: 14, return: -128\n",
698 |     "# episode: 15, return: -125\n",
699 |     "# episode: 16, return: -124\n",
700 |     "# episode: 17, return: -125\n",
701 |     "# episode: 18, return: -247\n",
702 |     "# episode: 19, return: -127\n",
703 |     "# episode: 20, return: -129"
704 |    ]
705 |   }
706 |  ],
707 |  "metadata": {
708 |   "colab": {
709 |    "collapsed_sections": [],
710 |    "name": "第17章-基于模型的策略优化.ipynb",
711 |    "provenance": []
712 |   },
713 |   "kernelspec": {
714 |    "display_name": "Python 3",
715 |    "language": "python",
716 |    "name": "python3"
717 |   },
718 |   "language_info": {
719 |    "codemirror_mode": {
720 |     "name": "ipython",
721 |     "version": 3
722 |    },
723 |    "file_extension": ".py",
724 |    "mimetype": "text/x-python",
725 |    "name": "python",
726 |    "nbconvert_exporter": "python",
727 |    "pygments_lexer": "ipython3",
728 |    "version": "3.7.6"
729 |   }
730 |  },
731 |  "nbformat": 4,
732 |  "nbformat_minor": 1
733 | }
734 | 


--------------------------------------------------------------------------------
/第19章-目标导向的强化学习.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "executionInfo": {
  8 |      "elapsed": 6862,
  9 |      "status": "ok",
 10 |      "timestamp": 1650010812842,
 11 |      "user": {
 12 |       "displayName": "Sam Lu",
 13 |       "userId": "15789059763790170725"
 14 |      },
 15 |      "user_tz": -480
 16 |     },
 17 |     "id": "98nP9Uh9GUTL"
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import torch\n",
 22 |     "import torch.nn.functional as F\n",
 23 |     "import numpy as np\n",
 24 |     "import random\n",
 25 |     "from tqdm import tqdm\n",
 26 |     "import collections\n",
 27 |     "import matplotlib.pyplot as plt\n",
 28 |     "\n",
 29 |     "\n",
 30 |     "class WorldEnv:\n",
 31 |     "    def __init__(self):\n",
 32 |     "        self.distance_threshold = 0.15\n",
 33 |     "        self.action_bound = 1\n",
 34 |     "\n",
 35 |     "    def reset(self):  # 重置环境\n",
 36 |     "        # 生成一个目标状态, 坐标范围是[3.5~4.5, 3.5~4.5]\n",
 37 |     "        self.goal = np.array(\n",
 38 |     "            [4 + random.uniform(-0.5, 0.5), 4 + random.uniform(-0.5, 0.5)])\n",
 39 |     "        self.state = np.array([0, 0])  # 初始状态\n",
 40 |     "        self.count = 0\n",
 41 |     "        return np.hstack((self.state, self.goal))\n",
 42 |     "\n",
 43 |     "    def step(self, action):\n",
 44 |     "        action = np.clip(action, -self.action_bound, self.action_bound)\n",
 45 |     "        x = max(0, min(5, self.state[0] + action[0]))\n",
 46 |     "        y = max(0, min(5, self.state[1] + action[1]))\n",
 47 |     "        self.state = np.array([x, y])\n",
 48 |     "        self.count += 1\n",
 49 |     "\n",
 50 |     "        dis = np.sqrt(np.sum(np.square(self.state - self.goal)))\n",
 51 |     "        reward = -1.0 if dis > self.distance_threshold else 0\n",
 52 |     "        if dis <= self.distance_threshold or self.count == 50:\n",
 53 |     "            done = True\n",
 54 |     "        else:\n",
 55 |     "            done = False\n",
 56 |     "\n",
 57 |     "        return np.hstack((self.state, self.goal)), reward, done"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 2,
 63 |    "metadata": {
 64 |     "executionInfo": {
 65 |      "elapsed": 3,
 66 |      "status": "ok",
 67 |      "timestamp": 1650010812843,
 68 |      "user": {
 69 |       "displayName": "Sam Lu",
 70 |       "userId": "15789059763790170725"
 71 |      },
 72 |      "user_tz": -480
 73 |     },
 74 |     "id": "hhrV6UDwGUTP"
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "class PolicyNet(torch.nn.Module):\n",
 79 |     "    def __init__(self, state_dim, hidden_dim, action_dim, action_bound):\n",
 80 |     "        super(PolicyNet, self).__init__()\n",
 81 |     "        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)\n",
 82 |     "        self.fc2 = torch.nn.Linear(hidden_dim, hidden_dim)\n",
 83 |     "        self.fc3 = torch.nn.Linear(hidden_dim, action_dim)\n",
 84 |     "        self.action_bound = action_bound  # action_bound是环境可以接受的动作最大值\n",
 85 |     "\n",
 86 |     "    def forward(self, x):\n",
 87 |     "        x = F.relu(self.fc2(F.relu(self.fc1(x))))\n",
 88 |     "        return torch.tanh(self.fc3(x)) * self.action_bound\n",
 89 |     "\n",
 90 |     "\n",
 91 |     "class QValueNet(torch.nn.Module):\n",
 92 |     "    def __init__(self, state_dim, hidden_dim, action_dim):\n",
 93 |     "        super(QValueNet, self).__init__()\n",
 94 |     "        self.fc1 = torch.nn.Linear(state_dim + action_dim, hidden_dim)\n",
 95 |     "        self.fc2 = torch.nn.Linear(hidden_dim, hidden_dim)\n",
 96 |     "        self.fc3 = torch.nn.Linear(hidden_dim, 1)\n",
 97 |     "\n",
 98 |     "    def forward(self, x, a):\n",
 99 |     "        cat = torch.cat([x, a], dim=1)  # 拼接状态和动作\n",
100 |     "        x = F.relu(self.fc2(F.relu(self.fc1(cat))))\n",
101 |     "        return self.fc3(x)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 3,
107 |    "metadata": {
108 |     "executionInfo": {
109 |      "elapsed": 2,
110 |      "status": "ok",
111 |      "timestamp": 1650010819329,
112 |      "user": {
113 |       "displayName": "Sam Lu",
114 |       "userId": "15789059763790170725"
115 |      },
116 |      "user_tz": -480
117 |     },
118 |     "id": "bxiqOl_vGUTR"
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "class DDPG:\n",
123 |     "    ''' DDPG算法 '''\n",
124 |     "    def __init__(self, state_dim, hidden_dim, action_dim, action_bound,\n",
125 |     "                 actor_lr, critic_lr, sigma, tau, gamma, device):\n",
126 |     "        self.action_dim = action_dim\n",
127 |     "        self.actor = PolicyNet(state_dim, hidden_dim, action_dim,\n",
128 |     "                               action_bound).to(device)\n",
129 |     "        self.critic = QValueNet(state_dim, hidden_dim, action_dim).to(device)\n",
130 |     "        self.target_actor = PolicyNet(state_dim, hidden_dim, action_dim,\n",
131 |     "                                      action_bound).to(device)\n",
132 |     "        self.target_critic = QValueNet(state_dim, hidden_dim,\n",
133 |     "                                       action_dim).to(device)\n",
134 |     "        # 初始化目标价值网络并使其参数和价值网络一样\n",
135 |     "        self.target_critic.load_state_dict(self.critic.state_dict())\n",
136 |     "        # 初始化目标策略网络并使其参数和策略网络一样\n",
137 |     "        self.target_actor.load_state_dict(self.actor.state_dict())\n",
138 |     "        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),\n",
139 |     "                                                lr=actor_lr)\n",
140 |     "        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),\n",
141 |     "                                                 lr=critic_lr)\n",
142 |     "        self.gamma = gamma\n",
143 |     "        self.sigma = sigma  # 高斯噪声的标准差,均值直接设为0\n",
144 |     "        self.tau = tau  # 目标网络软更新参数\n",
145 |     "        self.action_bound = action_bound\n",
146 |     "        self.device = device\n",
147 |     "\n",
148 |     "    def take_action(self, state):\n",
149 |     "        state = torch.tensor([state], dtype=torch.float).to(self.device)\n",
150 |     "        action = self.actor(state).detach().cpu().numpy()[0]\n",
151 |     "        # 给动作添加噪声,增加探索\n",
152 |     "        action = action + self.sigma * np.random.randn(self.action_dim)\n",
153 |     "        return action\n",
154 |     "\n",
155 |     "    def soft_update(self, net, target_net):\n",
156 |     "        for param_target, param in zip(target_net.parameters(),\n",
157 |     "                                       net.parameters()):\n",
158 |     "            param_target.data.copy_(param_target.data * (1.0 - self.tau) +\n",
159 |     "                                    param.data * self.tau)\n",
160 |     "\n",
161 |     "    def update(self, transition_dict):\n",
162 |     "        states = torch.tensor(transition_dict['states'],\n",
163 |     "                              dtype=torch.float).to(self.device)\n",
164 |     "        actions = torch.tensor(transition_dict['actions'],\n",
165 |     "                               dtype=torch.float).to(self.device)\n",
166 |     "        rewards = torch.tensor(transition_dict['rewards'],\n",
167 |     "                               dtype=torch.float).view(-1, 1).to(self.device)\n",
168 |     "        next_states = torch.tensor(transition_dict['next_states'],\n",
169 |     "                                   dtype=torch.float).to(self.device)\n",
170 |     "        dones = torch.tensor(transition_dict['dones'],\n",
171 |     "                             dtype=torch.float).view(-1, 1).to(self.device)\n",
172 |     "\n",
173 |     "        next_q_values = self.target_critic(next_states,\n",
174 |     "                                           self.target_actor(next_states))\n",
175 |     "        q_targets = rewards + self.gamma * next_q_values * (1 - dones)\n",
176 |     "        # MSE损失函数\n",
177 |     "        critic_loss = torch.mean(\n",
178 |     "            F.mse_loss(self.critic(states, actions), q_targets))\n",
179 |     "        self.critic_optimizer.zero_grad()\n",
180 |     "        critic_loss.backward()\n",
181 |     "        self.critic_optimizer.step()\n",
182 |     "\n",
183 |     "        # 策略网络就是为了使Q值最大化\n",
184 |     "        actor_loss = -torch.mean(self.critic(states, self.actor(states)))\n",
185 |     "        self.actor_optimizer.zero_grad()\n",
186 |     "        actor_loss.backward()\n",
187 |     "        self.actor_optimizer.step()\n",
188 |     "\n",
189 |     "        self.soft_update(self.actor, self.target_actor)  # 软更新策略网络\n",
190 |     "        self.soft_update(self.critic, self.target_critic)  # 软更新价值网络"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 4,
196 |    "metadata": {
197 |     "executionInfo": {
198 |      "elapsed": 303,
199 |      "status": "ok",
200 |      "timestamp": 1650010821234,
201 |      "user": {
202 |       "displayName": "Sam Lu",
203 |       "userId": "15789059763790170725"
204 |      },
205 |      "user_tz": -480
206 |     },
207 |     "id": "aw60NZwLGUTS"
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "class Trajectory:\n",
212 |     "    ''' 用来记录一条完整轨迹 '''\n",
213 |     "    def __init__(self, init_state):\n",
214 |     "        self.states = [init_state]\n",
215 |     "        self.actions = []\n",
216 |     "        self.rewards = []\n",
217 |     "        self.dones = []\n",
218 |     "        self.length = 0\n",
219 |     "\n",
220 |     "    def store_step(self, action, state, reward, done):\n",
221 |     "        self.actions.append(action)\n",
222 |     "        self.states.append(state)\n",
223 |     "        self.rewards.append(reward)\n",
224 |     "        self.dones.append(done)\n",
225 |     "        self.length += 1\n",
226 |     "\n",
227 |     "\n",
228 |     "class ReplayBuffer_Trajectory:\n",
229 |     "    ''' 存储轨迹的经验回放池 '''\n",
230 |     "    def __init__(self, capacity):\n",
231 |     "        self.buffer = collections.deque(maxlen=capacity)\n",
232 |     "\n",
233 |     "    def add_trajectory(self, trajectory):\n",
234 |     "        self.buffer.append(trajectory)\n",
235 |     "\n",
236 |     "    def size(self):\n",
237 |     "        return len(self.buffer)\n",
238 |     "\n",
239 |     "    def sample(self, batch_size, use_her, dis_threshold=0.15, her_ratio=0.8):\n",
240 |     "        batch = dict(states=[],\n",
241 |     "                     actions=[],\n",
242 |     "                     next_states=[],\n",
243 |     "                     rewards=[],\n",
244 |     "                     dones=[])\n",
245 |     "        for _ in range(batch_size):\n",
246 |     "            traj = random.sample(self.buffer, 1)[0]\n",
247 |     "            step_state = np.random.randint(traj.length)\n",
248 |     "            state = traj.states[step_state]\n",
249 |     "            next_state = traj.states[step_state + 1]\n",
250 |     "            action = traj.actions[step_state]\n",
251 |     "            reward = traj.rewards[step_state]\n",
252 |     "            done = traj.dones[step_state]\n",
253 |     "\n",
254 |     "            if use_her and np.random.uniform() <= her_ratio:\n",
255 |     "                step_goal = np.random.randint(step_state + 1, traj.length + 1)\n",
256 |     "                goal = traj.states[step_goal][:2]  # 使用HER算法的future方案设置目标\n",
257 |     "                dis = np.sqrt(np.sum(np.square(next_state[:2] - goal)))\n",
258 |     "                reward = -1.0 if dis > dis_threshold else 0\n",
259 |     "                done = False if dis > dis_threshold else True\n",
260 |     "                state = np.hstack((state[:2], goal))\n",
261 |     "                next_state = np.hstack((next_state[:2], goal))\n",
262 |     "\n",
263 |     "            batch['states'].append(state)\n",
264 |     "            batch['next_states'].append(next_state)\n",
265 |     "            batch['actions'].append(action)\n",
266 |     "            batch['rewards'].append(reward)\n",
267 |     "            batch['dones'].append(done)\n",
268 |     "\n",
269 |     "        batch['states'] = np.array(batch['states'])\n",
270 |     "        batch['next_states'] = np.array(batch['next_states'])\n",
271 |     "        batch['actions'] = np.array(batch['actions'])\n",
272 |     "        return batch"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": 6,
278 |    "metadata": {
279 |     "colab": {
280 |      "base_uri": "https://localhost:8080/",
281 |      "height": 506
282 |     },
283 |     "executionInfo": {
284 |      "elapsed": 890109,
285 |      "status": "ok",
286 |      "timestamp": 1650011748151,
287 |      "user": {
288 |       "displayName": "Sam Lu",
289 |       "userId": "15789059763790170725"
290 |      },
291 |      "user_tz": -480
292 |     },
293 |     "id": "0wLycs-3GUTT",
294 |     "outputId": "a73c3a0d-d7ac-486b-87aa-b141aa9d4d0c"
295 |    },
296 |    "outputs": [
297 |     {
298 |      "name": "stderr",
299 |      "output_type": "stream",
300 |      "text": [
301 |       "Iteration 0:   0%|          | 0/200 [00:00<?, ?it/s]/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:23: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ../torch/csrc/utils/tensor_new.cpp:201.)\n",
302 |       "Iteration 0: 100%|██████████| 200/200 [00:08<00:00, 24.96it/s, episode=200, return=-50.000]\n",
303 |       "Iteration 1: 100%|██████████| 200/200 [01:41<00:00,  1.96it/s, episode=400, return=-4.400]\n",
304 |       "Iteration 2: 100%|██████████| 200/200 [01:37<00:00,  2.06it/s, episode=600, return=-4.000]\n",
305 |       "Iteration 3: 100%|██████████| 200/200 [01:36<00:00,  2.07it/s, episode=800, return=-4.100]\n",
306 |       "Iteration 4: 100%|██████████| 200/200 [01:35<00:00,  2.09it/s, episode=1000, return=-4.500]\n",
307 |       "Iteration 5: 100%|██████████| 200/200 [01:34<00:00,  2.11it/s, episode=1200, return=-4.500]\n",
308 |       "Iteration 6: 100%|██████████| 200/200 [01:36<00:00,  2.08it/s, episode=1400, return=-4.600]\n",
309 |       "Iteration 7: 100%|██████████| 200/200 [01:35<00:00,  2.09it/s, episode=1600, return=-4.100]\n",
310 |       "Iteration 8: 100%|██████████| 200/200 [01:35<00:00,  2.09it/s, episode=1800, return=-4.300]\n",
311 |       "Iteration 9: 100%|██████████| 200/200 [01:35<00:00,  2.09it/s, episode=2000, return=-3.600]\n"
312 |      ]
313 |     },
314 |     {
315 |      "data": {
316 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEWCAYAAACNJFuYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZgcZbn38e+dfZkkk8m+T3YSQhKSIQlLEmLCTgwCAqLsgnjgAB4URTyIC6+Ioh6PHgQXFvWAHBXZEYIKyCIECIGELUAgOwnZ9+1+/6jqmequ7pnumeklM7/Pdc013U9VV91V1V131fNUPWXujoiISFSLYgcgIiKlR8lBRERilBxERCRGyUFERGKUHEREJEbJQUREYpQcpEkws6lm9lYtwyvNzM2sVSHj2t+Y2RYzG5Jh2Llm9s8CxXG7mX23luFuZsMKEUtzpeQgAJjZEjPbbmabzWyDmT1rZhebWYvIOLeb2a5wnM1m9rqZfc/MukTGOdfM9oY7mU1mNt/MTowM72RmPwrnt9XMPjSzP5rZ5IbE7+5Pu/vIlOWZVd/ppds5pSaYyPrYEvl7NWXcRPkSM/tafeOpR/z1Ws/uXubu72Ux/VvM7ObI+9bhfNKVTWnY0kgxKDlI1Gx37wQMAm4Avgr8OmWcG8NxegDnAVOAZ8ysY2Sc59y9DCgPP3+PmXU1s7bA34CDgBOBzsAo4G7guPwtVl7dGO5QE3/jUoaXh+viVOA/zeyofAdUn/VcjzOqp4BpkfdVwIfA1JQygJdymbCZtcwxFskDJQeJcfeN7n4/cDpwjpmNSTPODnd/Efgk0I0gUaSOsw/4DdAeGAqcBfQHTnL31919r7tvdfc/uvt16WIxszvM7Mrwdb/waPyS8P1QM1tnZi3M7EgzWxaW/xYYCDwQHrVfFZnkZ8Oj6LVmdk09V1HO3H0esBAYn2kcMzvMzF40s43h/8Miw/5hZt8xs2fCs7bHzKx7hklltZ4T69LM3gHeiZQNC193M7P7wzPAFwi2YcJTwKhIDFMJkk/HlLLn3H23mY0Kl2GDmS00s09G4rjdzG42s4fNbCswI826+YqZrTSzFWZ2fqZ1KI1HyUEycvcXgGUkHw2mjrMZeDzdOOHR6OeBLQQ7n1nAX919aw5hPAkcGb6eDrxHzRHrdODpMAlFYzqL4Ch2dng0f2Nk8BHASGAmcK2ZjcohlnoLq1bGAIszDK8AHgJ+SpBsfwQ8ZGbdIqOdSZCEewJtgC9nmF0u6/kkYDIwOs2wnwM7gD7A+eEfAO6+FPiAmu0+DXgaeDal7Ckzaw08ADwWxv7vwO/NrLoaMFy264FOQFK7hpkdGy7rUcDwcPkkz5QcpC4rgIocx5liZhuAVcBngE+5+0age1gGgJmND48kN1nmxuQngSPCto9pwI3A4eGw6eHwXHzL3be7+6vAq0BqNVDUl8P4NoTLs6CucczsjpTha81sO/Ac8D/AXzLM6wTgHXf/rbvvcfe7gDeB2ZFxbnP3t919O3APmc9CclnP33P3deE0iXymJXAKcG141vE6kLpsTwLTwm0zCXieIEEkyg4Px5kClAE3uPsud/8b8CDBdyPhPnd/xt33ufuOlPmcFi7762HCuy7DcksjUnKQuvQD1uU4zvPuXu7u3d19irvPDcs/JjgKBcDd57t7OXAy0DbdhN39XWArwY5wKsFOZUV41Fmf5LAq8nobwU4rkx+Gy1Eexjm2rnHc/ZyU4d3DeVxJcAbUOsO8+hIciUd9QLBuc409l/W8NMM0egCtUoanxpdodzgIeM/dtxEc9SfK2gP/Ili2pSlneKnLlikOEp+vJQ7JAyUHycjMDiH4AWe8fNHMyghO85/OYpJPAEenNF5n40mCBt027r48fH8O0BWYn+EzJdPdcFjn/yOCKpp/yzDaCoILAaIGAsvrMctc1nOm9bQG2AMMSIkn6imCM68TqNn+C8PPnAC8GJ4FrAAGWOTKN+LLVtv2WllHHJIHSg4SY2adLbj89G7gd+7+Wppx2prZRIJqkvXAbVlM+k6CH/q9ZjbGzFqaWTtqrmrJ5EngUoKdEcA/wvf/dPe9GT6zGkh7vX4R3QBcFS5zqoeBEWZ2ppm1MrPTCdoBHqzHfOq7nquF6/XPwHVm1sHMRhMk5Og4iwnW8+WEycGDZwD8KyxLbK9/EZzpXBVe3nokQXXZ3VmGcw9wrpmNNrMOwDezXQ6pPyUHiXrAzDYTnMJfQ9AomnoV0lXhOB8T7IReAg7LpvEzPIqcASwiaHzdBLwFHEJQr5zJkwQNlYmdzT+BDpH36XwP+EZY156p4bYxXGXJ9zmsrWXchwgS6YWpA9z9Y4LLTq8kWLdXASe6e23TS6sB6znVpQRVV6uA20l/APAUQRXUM5Gypwkanp8K49lFkAyOA9YStL2c7e5vZrk8jwA/Ibg8d3H4X/LM9LAfERFJpTMHERGJUXIQEZEYJQcREYlRchARkZgm0X1x9+7dvbKysthhiIjsV1566aW17t4j3bAmkRwqKyuZN29escMQEdmvmFnGu81VrSQiIjFKDiIiEqPkICIiMUoOIiISo+QgIiIxSg4iIhKj5CAiIjFN4j6H/dmStVu595XlnDC2D0N7lPGnl5ZxysT+PLhgBUN7lLFh2242bt/NoG4dWLNlJzNG9gTgmcVr6dK+Nbc9s4TLZg6jQ5tWvPzhej78eBvXP/wGp1cNYMvOPfTu0o5hPctYuWE7DyxYycXTh7B3H+zcs5ePNu/kgVdXcEhlBZ3atWLuotV0aNuK8QPKad2yBT06taVzu1Z8dvIgdu7Zy/cffZPJg7tx0sH9Ysvx+TtepEObVhw5sgfHHNib9dt2cc+8ZfTv2p7hPcv425sfsXPPPiZVVrBnn/PO6s3MHteXy/8wn3+fMYwBFR1Yu2Unyzdsp2uHNry2bANfPHIYF9zxIsN6lvHtOWMA2LF7Lw+8uoIhPcpo17oF67fu5v89/AYXThvMlp17+cebHzFhUFfatW7JBUcMZsnarSxdvw13qOzWkYHdOnDvK8u4+4WlXDZzOL948l127N7LpMEVnDWlkkdfX4mZsfijLVw2czg3PfYWG7btZsKgcrbt2svIXp3468JV7Nnn3HDKWH773AdMGVLB4o+28PryjWzdtZczJw/koQUrmTmqJ4tWbGLTjj1s2r6bLx01gr+/+REzR/Xk8UWrOaSygr8uXMXQHmX06NSW3z3/AfM+WM9xY3ozrGcZ767ZSttWLVizeScDKzrQp0s7Jg2uoFtZW5au28bMm55k19593HhqzQPqWrc0PjmuH7c89S7zlqynV+e2PPnWGo4+sDe9u7TjmcVrOWxodzbv2E1lt450aNuSe+YtY9rw7swc1YtXl27go807qKqs4KNNOwHYunMPJ0/ox4JlG3nizY/o3K4Vn64aQJf2rdm9dx9f//NrtDCjV5d2TBvenW279vL3tz7itmeW8OgVU/nXe+vYsXsvZ0wayJ69+7jp8bfZuH03kwdXcPahlfz874up7NaRyu4d+OvC1WzesZsendoyvGcnBlS0Z8uOPVRVVvDyh+vZsXsvi1Zs4tl3P2bGAT2p6NCGe19ZRp8u7Rneq4zHF61m3dZd/OT08by9eguLP9rCI6+vpHP71pw6oT+d27emc/tWPPHGRyxbv40vTB/KA6+u4LzDBvOTJ95mypBuTBncjdk/+yfDepYxrEcZQ3p05K3Vm2nTsgXPvvsxEwaWs3ufU96+NcN7lrFh+24mDurKP99ZywF9OrF1517+9f46du/Zx6WfGMZtzyzhuk+OplO71uzcs5f75q9g3z7nlqfe4/Bh3fj0xOA5RmP7d+HPLy9nZO9O3PbMEiYPruDYg3pT1qYVd734IVt37mH5+u20aGGcd9hg/vO+13lr1WbmXjmdsraNvytvEl12V1VV+f56E1zl1x6qfv3tOQdy7X0LOa2qP/fMW5Z2/CU3nBD7HMABvTvx5qrNeYnxc1MGsnTddp58ew0A7/2/42nRwqqHv7FyE8f9V82D4E6Z0J8n3lzNhm27GzTfkyf0488vBw8Le/Zrn6BveXu+9/Ab3PLUe1l9/q3vHsvIbzyaVLbkhhNi6y5h2ogePBUuYzaOHt2Lxxatznr8ARXtWbpuO/3K27N8w/a6P5DG+AHl/OWSwzMuA8DpVQP4w7zanrqZu3u+cCin3fJc9fuZB/Tk1+cews//vpgf/DXT47+TfeWYkWzYtotfPv1+ddlzV3+CQ79X9+MZattu6bRt1YKde/bVPWKBnDC2Dz8/cwLfe+QNbnky/ff3rgun8JlfPp9UdvxBvTl6dG+u+EOmBx4G39s7z59Ur7jM7CV3T/sQKFUrlZB1W3cB1GvH8cHH2xo7nGorN+xg5caamFJ/dLv3Jr9ftWl7gxMDwNJ1Ncu0d19wELN6U+qz5zPbsTu3ncP7a7fkNP6H63Jb50vXBeuwvokhmEbd81yxsf7Tz2TH7uQH7iWWYcO2XVlPY83mnXy8NXn8PXvzc3BaSokBYNXG4Hu7fH3mbbNpR/w3s3rTTtbXsY7f/Si37222lBykTrv3Oa1a1HxVUncU0WEAhtEYoj/wxJmKWfbT3rk70xNE09uX4/5k6649OY3fqkXD10sOi9+oWrVMP+NWLbPfhezd57Rt1Tx3OYmDm121JK10tTgtre5f0849uX3Ps6U2h/3M+2u3snVnfKe0PccdYS5Sq1q+89AiDh3SjfIObViydmts3v9cnPOTLdNasGxj9es7nl3CgIoO3PvK8lo+kezHc9+Jlf32+YxdyeR8RJ84E8jWnn0NP0peu2UX/1dHldHT7zTO+o/6Xcp6e3PVZn719Hv89fVVWU/jt89/QL/y9kllN2ZZJfWrp7OrSixV85du4FdPv1drNWS6quQXlqxj2frazxa378rPb19tDkUWrUe9YtZwfjL3HQ4f1o1nFn9cxKhEZH+RaIeqj9raHHTmIFJiupe1Ye2W7Ovypfn6+ZkTqKrsmpdpN88KQJESdlC/LsUOodnr2qF1rcMbo/2oMZwwtg+9OrfLy7SVHERKTCM0TUgD1bUNGqP9qNQpOYiUmH1NoB1wf1dXW+yYfp0LFEnxKDlITl74+syk91WDauo7T5nQPzb+4O4dq1/fdeEUzju8stbpP3rFVL567AHV728795DYOD+I3BH8zdmjq1/PGBl/2uEvz67ify+cHCtfcN3RSe8PHlie9P7wYd2S3n/rkwdWv/7fCyfzmUkDq9+P6lOzo/jvzxzMF6YNoW+X9Kf6s0b1Slt+zfGj0pbfcf4kvn58zfo4Ylj3pOHTR/Tg95+PLx/ArWdN5P5Laxoqv3LMyKThc8b35cSxfdJ+NuHq4w7ghpMPSiob2asTQF4vS/3eyQfx9FUzah3nzMkDY2XPXz2TH356HP/9mYMbNP+60vPvLpjMI5dPbdA8cnHh1MFpf1/5pOTQTAzt0bHukerQuV0rekbqNzu2CbqoSDj9kAGxz0R3PsN7lfHN2QfGxoka3L0jXzxyaPX7QwZXxMaZPa5v9evzDq+Z/2lVwfwP7BvsrDu1bcVRo3tx2NDkHWqwLMl1yocNTU4GI3slHxkec2DvyLjduXzmcCCom47uJGaP68vVx4/i+pQdas1849eATB/RgwunDal+Hz1zmD6iBxdNq1kfN5ySPN3LZg7n8GHx5QOYcUBPBlXUbPeLpw9NGn7okG4MqOhQ/T6R6M89rLI6zpMO7scZk5J3wokG0KsiSbyxfWbSQPp3Tb7stVNKFxEDunYgVe8u7Th1Yv+k70h97Kul2qh7WVvKO7RJOijIZOKg9I3FkyqTv9flHVpTXks7x+emDOKm08Yxa1TPOufZWJQcmokWjXD3VOrPpU2rFuyO/Ihapmmki56dt2vdss55tE65oa5lmrjragxMF0dd6qrJadc6Oa7ELFq2yO0ntDfdjU4p8dYWSy43AbZqYVgkvNTVYpaciGpuNKypU2/XKr7NEvHmu0k2dVl3p9ylmM8bAhvrvqFMX8XU2I2aG+XSSWymXLZ/Qyk5NDPZVgWk2wF37dAm6X3PTu1oHRkv3U65fZuWVHQMPtcui3m3SJlGun1vpp1/m3D6ndIcndelrvbFtml2khBcdpqLjmk6SOvSPvmIsbZO1HLJe6k7knQ7lvaRhJ24QqesbSvKw5jahkmxTWTbJQ400nX3kE+p66ljm7oPNnLRq3Pb6teN1ZFduu0NNeswcdDRtWOb2O8rKvGdL+RFUkoOTdzvLpjMdbNHVx+p3HLWRAZ1qzkd/85JQW+nHdu05JazJlaX33H+JK49saY+/ztzDozVbf/63CqOjlS3tDCSpnHlUSO44IjB/OmLh3H9p8bU2tXChVMH8/1IlcldF07htnMPSTpzmDKkgm+cMCq2k3vw34/gv84Yz4yRPfnKMSOT2gcSenaq+eE/esXU6nWT2Om5O/fVciNR+zYtOfvQQdXVST07t+Obs0fzmzRtIrWJtqd8adYILvvEMK4L4/3TFw/jF5+byPdPGcsJY/vw0zT15oZx23nxeT58Wfr679Tqs2gVmGFcPH0oBw8sZ0y/zvz0MwfzH0eN4JIZw7jroil896Qx1Wd7j1w+lcOGduPhy6ZWJ4zVm3byuwsm819njOerxx7A3RdNSarOeexL06pfj+rTmT998bDq92P753a57gkH9eEPFx2aVHbGpIEM61nGvf92GH+55HD+57MTsppW9C7taFXbnedP5j9PHM03Z4/m0SumceVRI6qH/ezMgzk57I043cH7jaeO5c//dlis/AvThsZHJvg+feekMTx2xXS+PedA7jx/UtrtCsG6S1T/RQ+M7r5oSi1L2XC6Ca4JGNu/S1JXE1FHDO/OEcO7c9cLQZcLvbu047zDKrnugUWcc+ig6rrlGQf05NBIvfvhw7pz+LDufPvBRQCcdWhlbNr9yttjZhzYtzMLV2zCsKS6+X8Pd6SDu3dMaphO58qjRyZVOyViiV41cnfKziFhTL8ujAnvDbhkxjDWbN4ZG2faiB788aVl3HjqWA7o3bl63Vx51Ai+98ibODBuQHnsc1GJbsMTou0d2Yq2OVw+a3jSsGj99M/PTL+jM4MZI3ty8MByXvlwQ3X56L7J9d/pGuch2NGcOrE/f3xpGVhQ1Xfvv9UkxcvCbTaoW0cGdavZZkN7lPG/FwY7o1eWrq8uP2J4cnvH6k07eODVFQCM6NUJs6BK5ObPTqAy8h04alSv2Hf2rCmDMnZvcubkgUmfB2jdsgVz/2N69fvxdWy/hCeunM4R3/8ba7fs4oIjBvOLJ98Nl7kDI3vXbNPPTx3CTY+/DcCJY/syrn85f87QfctpVQPYnOZMqm3rFiy54QSuf2hRUm+07s5ZUwYBcHb424r2u5TowRdIatezpIOl5HayxqYzhyYgl/aEWuuzc5xv4otaUx+a4wQiMi1DfepYczn1Tsy3tgbIxtTQOuPGqHJu6JWyNV3BZT+h1KrAXXvjHdDV1lbUmLUpmb5rqVWuqVWaLepoa6ntd5hmcWOiyx/tbi+6Whqj7TBbSg5NQDY7w8R3KnXHEN1Z1XfHlWjUbMj3tj6NyJnk8gNKjLq/3NPUGD3eerhTr++UMn2X0s4rw4FDut5JW2fo+TWYQJbBZSHTdy31+5/6PaorhNq+dqn3rqT7rSUngZrX0XgL2eagaqUmIJud+g8/PY4fPvYWw3qW8cL76Tv1SzeVId078v7HW5PKbjv3EB5csDLN54MpfPXYA6itJ+cvTBtCq5bG/KUbOGtKJQ8sWFHrl/7kg/tx4rjk6/FPq+rPzAz3DKQ2agNcPnM4y9Zv49gxvZPKT53Yn7lvrOai8FLSb84ezeYde6qfR9G9rE31sFxMGdyNI4Z156pjR3LDI2+yd59z/EG131OQjdo29SUzhtK1Qxv+8dYarjlhdMbxvjRrBCs2bI+ti1ylSw6JbZJ6TX7qDvmCIwbz2vKNXHPCKK6851VatTQ+P3UIHdu2ijU8p/Prc9L2FZfkc1MGMrJXJx56bSUnjO3LLU++y7gB5bQw+MXnJnLLU+9R0bENvzy7iifeiPeWGksOlvy/rvGh5jd18fSh3P7sklrjNTOOG9ObuW+s5qbTxnPKzc9Wlyd8+eiR3Dd/Ra3TaSxKDk1ANgcTY/p14fbzkp8W5Smv033p//blI2NlMw7oyYwDaq63TuwkEqfh0fsU0rk65YavunZSPzp9fKzsxlPHZRw/3XIMqOiQts2ivEObpPJEO8K3HwjaWi6ePpTPT809ObRv05LfhQ34ibr6xlDbWdFXjgkau+uKN9O6yFZNpVI8O5S1bVX9tMKo1EuSe3ZuV71eHr2ipuH6ilkjksabNLiCF95fFztjynRgEPXdk4ILHBLtZYk6foCqygqqwnsNjhrdi6NGx6cXu/Q3h9OXcf278OqyjdU79t4ZbopMdfPnggs69kTqoaJxRO9LyTdVKzUBDanOsaTXDaxWyvuV71IKa7g+37d6t7U0QntWfeUaczRx11XjVteUo9NKd69PISg5NAH13SkbyV/iBv5+i/aUsuaklNZxLg3b9W1Tamj7SCGlW8RMcdeZPCz6WslB6indd2dQtw4cneZUOdfpZCNRJ9835SlfxZK4sSv1UtFcJNo4po9If0losSR2FOeH1V9D6rhEOC8xhLu8XNrwEzvOXNtvEtV8w8P+nM4/fHCjXryQi3TVaJ86uB/dy4J7aKI78cTlp9F7inL5PUanNW5A8j0hPTq15aTxDeseJBtqc2gCUnfqBw8sT7p2vdbPJr2u34/utKoB1f0alYLWLVukrffOxYSBXRs8jXxIbOvZ4/o2uP+g+gcR/MvlzCFxkcDXjx/F1zN0MpjO8Qf1SdoO184ezbWzMze2F0L0d/LjSHtYNGfNGd+POeP7JX3u1rOrmLtoNZ+/c15Ov7Q+XZIPul68ZlZO8daXzhyagNSden2vYy+lKgtJrxQ2UX1iKFa9eWOq63dVrOqffFFyaAIa5TvppbHjkdoV8iaoTKpvfsyhYqkU4m4sDVmU/eR2GkDJoUlI1GVOTtO9dTqJroYnDOpaEg1fTcWAiuzbXEZn0d1zOvXZRKnPqmio6hByqlZq1BCKoizs+iTdZa9R2bQt7A8/NbU57OfuvmgKU4Z04/iD+vDhum2c+ovn6vzM5CHdeO7qT9CnS/vqfnCgsHdfNjXzrz0qY8+tqV75z6NoX88eRevTLnTXhVPYtqtxuqCOyuUouClUK3Vu15oXrplJRS29p877xqxYZ4fp7A8P+ytKPjezT5vZQjPbZ2ZVKcOuNrPFZvaWmR1TjPj2J33DxqqendtVN/pl871LbeQCnTk0RHmHNlnv8Lt2bJPVsy3Sqc8mate6ptv0xlDTfUbzq1bq2aldrb0Ldy9rm9S9+f6sWGcOrwMnA7dEC81sNHAGcCDQF5hrZiPcvfEPe5qIhv7mmshvttkohe1VnRxy+Ey6Lk2as1LYjnUpSopz9zfc/a00g+YAd7v7Tnd/H1gMTEoznoSiR6CJ7qCHFuHadymMUrgLPXFdf7rHdKZqjMfTNmfRBxAVWqm1OfQDno+8XxaWxZjZRcBFAAMHxh803lz0iDzEZljPTtx+3iFMHpx7P++5XHkixVMKB+BTh/fgV2dXMT3DMyOi7vnCoby7Zmud4zUXuVTF/emLhxa0L6VUeUsOZjYXSNej2jXufl9Dp+/utwK3AlRVVWnPFjpyZG4PIC+FI1HJXqm0C83K8m7fbmVt6VZWvKPf0lX3dpw4KLurD/Mlb8nB3etzG99yIHqrbf+wTETQvShSOKXWrH4/cIaZtTWzwcBw4IUix9SkJR6wUt+rZ6QwEg+8L5ETB6mn1uGVTvW9lLmQitLmYGafAv4b6AE8ZGbz3f0Yd19oZvcAi4A9wCW6Uim/Zo7qxeUzh1d35Pbdk8ZwSGVxT2cl7r5LD+ef76wtmWql5uK/zhhPv0bsUHL6iB5cMWs45x5W2WjTzJeiJAd3vxe4N8Ow64HrCxtR89WyhfGlo2oesPK5yANRpHQM7VHG0B5lxQ6j2UntPK+hWrSw2AONSlWpVSuJiEgJUHIQEZEYJQcREYlRchARkRglBxERiVFyEBGRGCUHERGJUXIQEZEYJQcREYlRchARkRglBxERiVFyEBGRGCUHERGJUXIQEZEYJQcREYlRchARkRglBxERiVFyEBGRGCUHERGJUXIQEZEYJQcREYlRchARkRglBxERiVFyEBGRGCWHEuKe/F9EpFiUHEREJEbJoYSYJf8XESkWJQcREYlRchARkRglhxKiBmkRKRVKDiIiEqPkUELUIC0ipULJQUREYoqSHMzsB2b2ppktMLN7zaw8MuxqM1tsZm+Z2THFiE9EpLkr1pnD48AYdx8LvA1cDWBmo4EzgAOBY4H/MbOWRYqx4NQgLSKloijJwd0fc/c94dvngf7h6znA3e6+093fBxYDk4oRo4hIc1YKbQ7nA4+Er/sBSyPDloVlMWZ2kZnNM7N5a9asyXOIhaEGaREpFa3yNWEzmwv0TjPoGne/LxznGmAP8Ptcp+/utwK3AlRVVakiRkSkEeUtObj7rNqGm9m5wInATPfqWvblwIDIaP3DMhERKaBiXa10LHAV8El33xYZdD9whpm1NbPBwHDghWLEWAxqkBaRUpG3M4c6/AxoCzxuQQX78+5+sbsvNLN7gEUE1U2XuPveIsUoItJsFSU5uPuwWoZdD1xfwHBKhhqkRaRUlMLVSiIiUmKUHEREJEbJQUREYpQcREQkRslBRERiskoOZna5mXW2wK/N7GUzOzrfwYmISHFke+ZwvrtvAo4GugJnATfkLSoRESmqbJND4sr744HfuvvCSJmIiDQx2SaHl8zsMYLk8Fcz6wTsy19YIiJSTNneIX0BMB54z923mVk34Lz8hSUiIsWUVXJw931mthoYbWbF6o9JREQKJKsdvZl9HzidoEO8REd4DjyVp7hERKSIsj0LOAkY6e478xmMiIiUhmwbpN8DWuczEBERKR3ZnjlsA+ab2RNA9dmDu1+Wl6hERKSosk0O94d/IiLSDNSZHMysJXCuu88oQDwiIlIC6mxzCB/TuVIeQwMAAA7TSURBVM/MuhQgHhERKQHZVittAV4zs8eBrYlCtTmIiDRN2SaHP4d/IiLSDGR7h/Qd+Q5ERERKR7Z3SL9PcEd0Encf0ugRiYhI0WVbrVQVed0O+DRQ0fjhiIhIKcjqDml3/zjyt9zdfwKckOfYRESkSLKtVpoQeduC4ExCvbOKiDRR2e7gb4q83gO8D5zW+OGIiEgpyPphP+7+XrTAzAbnIR4RESkB2fbK+scsy0REpAmo9czBzA4ADgS6mNnJkUGdCa5aEhGRJqiuaqWRwIlAOTA7Ur4ZuDBfQYmISHHVmhzc/T7gPjM71N2fK1BMIiJSZNm2OXxsZk+Y2esAZjbWzL6Rx7hERKSIsk0OvwSuBnYDuPsC4Ix8BSUiIsWVbXLo4O4vpJTtqe9Mzew7ZrbAzOab2WNm1jcsNzP7qZktDodPqGtaIiLS+LJNDmvNbChh53tmdiqwsgHz/YG7j3X38cCDwLVh+XHA8PDvIuDmBsxDRETqKdub4C4BbgUOMLPlBHdIf7a+M3X3TZG3Hanp8XUOcKe7O/C8mZWbWR93b0giEhGRHGX7PIf3gFlm1pHgbGMbQZvDB/WdsZldD5wNbAQSz6fuByyNjLYsLIslBzO7iODsgoEDB9Y3DBERSaPWaiUz62xmV5vZz8zsKIKkcA6wmDr6VjKzuWb2epq/OQDufo27DwB+D1yaa+Dufqu7V7l7VY8ePXL9uIiI1KKuM4ffAuuB5whuersGMOBT7j6/tg+6+6wsY/g98DDwTWA5MCAyrH9YJiIiBVRXchji7gcBmNmvCKp3Brr7jobM1MyGu/s74ds5wJvh6/uBS83sbmAysFHtDSIihVdXctideOHue81sWUMTQ+gGMxsJ7CNot7g4LH8YOJ6g2mobcF4jzEtERHJUV3IYZ2aJK4sMaB++N8DdvXN9Zurup2Qod4Iro0REpIjq6lupZaECERGR0pHtTXAiItKMKDmIiEiMkoOIiMQoOYiISIySg4iIxCg5iIhIjJKDiIjEKDmIiEiMkoOIiMQoOYiISIySQwlxT/4vIlIsSg4iIhKj5FBCzJL/i4gUi5KDiIjEKDmIiEiMkkMJUYO0iJQKJQcREYlRcighapAWkVKh5CAiIjFKDiIiEqPkUELUIC0ipULJQUREYpQcSogapEWkVCg5iIhIjJKDiIjEKDmUEDVIi0ipUHIQEZEYJYcSogZpESkVSg4iIhKj5CAiIjFKDiVEDdIiUiqUHEqQkoOIFFtRk4OZXWlmbmbdw/dmZj81s8VmtsDMJhQzvkJTg7SIlIqiJQczGwAcDXwYKT4OGB7+XQTcXITQRESavWKeOfwYuAqIVqLMAe70wPNAuZn1KUp0IiLNWFGSg5nNAZa7+6spg/oBSyPvl4Vl6aZxkZnNM7N5a9asyVOkhaUGaREpFa3yNWEzmwv0TjPoGuDrBFVK9ebutwK3AlRVVTWp3anTpBZHRPZDeUsO7j4rXbmZHQQMBl61oOW1P/CymU0ClgMDIqP3D8uaheoGadQiLSLFVfBqJXd/zd17unulu1cSVB1NcPdVwP3A2eFVS1OAje6+stAxiog0d3k7c6inh4HjgcXANuC84oYjItI8FT05hGcPidcOXFK8aIqrukFabQ4iUmS6Q7oE6WolESk2JYcSojukRaRUKDmIiEiMkoOIiMQoOZQQ3SEtIqVCyaEEKTeISLEpOZSQmjukRUSKS8lBRERilBxERCRGyaGE1NwhLSJSXEoOpUjZQUSKTMmhhFTfGa0WaREpMiUHERGJUXIQEZEYJYcSUn1ntNocRKTIlBxKkJ7nICLFpuRQQvQMaREpFUoOIiISo+QgIiIxSg4iIhKj5FCC1CAtIsWm5CAiIjFKDiIiEqPkICIiMUoOIiISo+RQglzt0SJSZEoOIiISo+QgIiIxrYodgNTP/118KMN7lhU7DBFponTmsJ/q0r415R3aFDsMEWmilBxKkNqjRaTYlBxERCRGyUFERGKKkhzM7DozW25m88O/4yPDrjazxWb2lpkdU4z4RESau2JerfRjd/9htMDMRgNnAAcCfYG5ZjbC3fcWI0ARkeaq1C5lnQPc7e47gffNbDEwCXguHzN78u01fPfBRfmYdL38ZO47ALz0wfo6x9WDREUkn4qZHC41s7OBecCV7r4e6Ac8HxlnWVgWY2YXARcBDBw4sF4BlLVtxfBexb1XoGUL481VmxnUrQOjenfm0YWrOPbA3jy6cBUDKtqzdN12AA4f1o0la7exatMOThzbh2G6x0FE8ihvycHM5gK90wy6BrgZ+A7BVZvfAW4Czs9l+u5+K3ArQFVVVb2u/pw4qCsTB02sz0dFRJq0vCUHd5+VzXhm9kvgwfDtcmBAZHD/sExERAqoWFcr9Ym8/RTwevj6fuAMM2trZoOB4cALhY5PRKS5K1abw41mNp6gWmkJ8AUAd19oZvcAi4A9wCW6UklEpPCKkhzc/axahl0PXF/AcEREJIXukBYRkRglBxERiVFyEBGRGCUHERGJMW8CT7M3szXAB/X8eHdgbSOG01hKNS4o3dgUV24UV26aYlyD3L1HugFNIjk0hJnNc/eqYseRqlTjgtKNTXHlRnHlprnFpWolERGJUXIQEZEYJYew874SVKpxQenGprhyo7hy06ziavZtDiIiEqczBxERiVFyEBGRmGadHMzsWDN7y8wWm9nXCjzvAWb2dzNbZGYLzezysPw6M1tuZvPDv+Mjn7k6jPUtMzsmj7EtMbPXwvnPC8sqzOxxM3sn/N81LDcz+2kY1wIzm5CnmEZG1sl8M9tkZlcUY32Z2W/M7CMzez1SlvP6MbNzwvHfMbNz8hTXD8zszXDe95pZeVheaWbbI+vtF5HPTAy3/+Iw9gY9lTZDXDlvt8b+vWaI6w+RmJaY2fywvJDrK9O+obDfMXdvln9AS+BdYAjQBngVGF3A+fcBJoSvOwFvA6OB64Avpxl/dBhjW2BwGHvLPMW2BOieUnYj8LXw9deA74evjwceIXis9RTgXwXadquAQcVYX8A0YALwen3XD1ABvBf+7xq+7pqHuI4GWoWvvx+JqzI6Xsp0XghjtTD24/IQV07bLR+/13RxpQy/Cbi2COsr076hoN+x5nzmMAlY7O7vufsu4G5gTqFm7u4r3f3l8PVm4A0yPC87NAe42913uvv7wGKCZSiUOcAd4es7gJMi5Xd64Hmg3JIf5pQPM4F33b22u+Lztr7c/SlgXZr55bJ+jgEed/d1Hjw//XHg2MaOy90fc/c94dvnCZ6umFEYW2d3f96DPcydkWVptLhqkWm7Nfrvtba4wqP/04C7aptGntZXpn1DQb9jzTk59AOWRt4vo/adc96YWSVwMPCvsOjS8PTwN4lTRwobrwOPmdlLZnZRWNbL3VeGr1cBvYoQV8IZJP9oi72+IPf1U4z1dj7BEWbCYDN7xcyeNLOpYVm/MJZCxJXLdiv0+poKrHb3dyJlBV9fKfuGgn7HmnNyKAlmVgb8CbjC3TcBNwNDgfHASoJT20I7wt0nAMcBl5jZtOjA8AipKNdAm1kb4JPA/4VFpbC+khRz/WRiZtcQPF3x92HRSmCgux8M/Afwv2bWuYAhldx2S/EZkg9ACr6+0uwbqhXiO9ack8NyYEDkff+wrGDMrDXBxv+9u/8ZwN1Xu/ted98H/JKaqpCCxevuy8P/HwH3hjGsTlQXhf8/KnRcoeOAl919dRhj0ddXKNf1U7D4zOxc4ETgs+FOhbDa5uPw9UsE9fkjwhiiVU95iase262Q66sVcDLwh0i8BV1f6fYNFPg71pyTw4vAcDMbHB6NngHcX6iZh3WavwbecPcfRcqj9fWfAhJXUtwPnGFmbc1sMDCcoCGssePqaGadEq8JGjRfD+efuNrhHOC+SFxnh1dMTAE2Rk598yHpiK7Y6ysi1/XzV+BoM+saVqkcHZY1KjM7FrgK+KS7b4uU9zCzluHrIQTr570wtk1mNiX8jp4dWZbGjCvX7VbI3+ss4E13r64uKuT6yrRvoNDfsYa0qu/vfwSt/G8THAVcU+B5H0FwWrgAmB/+HQ/8FngtLL8f6BP5zDVhrG/RwCsiaolrCMGVIK8CCxPrBegGPAG8A8wFKsJyA34exvUaUJXHddYR+BjoEikr+PoiSE4rgd0E9bgX1Gf9ELQBLA7/zstTXIsJ6p0T37FfhOOeEm7f+cDLwOzIdKoIdtbvAj8j7EmhkePKebs19u81XVxh+e3AxSnjFnJ9Zdo3FPQ7pu4zREQkpjlXK4mISAZKDiIiEqPkICIiMUoOIiISo+QgIiIxSg4iITPba8k9v9ba86eZXWxmZzfCfJeYWfeGTkekMelSVpGQmW1x97IizHcJwbXpaws9b5FMdOYgUofwyP5GC/rsf8HMhoXl15nZl8PXl1nQ//4CM7s7LKsws7+EZc+b2diwvJuZPWZBX/2/IriJKTGvz4XzmG9mt5hZy/DvdjN7PYzhS0VYDdLMKDmI1GifUq10emTYRnc/iOAO2J+k+ezXgIPdfSxwcVj2LeCVsOzrBN05A3wT+Ke7H0jQd9VAADMbBZwOHO7u44G9wGcJOqfr5+5jwhhua8RlFkmrVbEDECkh28Odcjp3Rf7/OM3wBcDvzewvwF/CsiMIul3A3f8WnjF0JnjIzMlh+UNmtj4cfyYwEXgx6F6H9gSdqz0ADDGz/wYeAh6r/yKKZEdnDiLZ8QyvE04g6N9mAsHOvT4HXgbc4e7jw7+R7n6dBw9qGQf8g+Cs5Ff1mLZITpQcRLJzeuT/c9EBZtYCGODufwe+CnQByoCnCaqFMLMjgbUe9Mv/FHBmWH4cwSMcIehU7VQz6xkOqzCzQeGVTC3c/U/ANwgSkEheqVpJpEZ7Cx8oH3rU3ROXs3Y1swXAToJuw6NaAr8zsy4ER/8/dfcNZnYd8Jvwc9uo6W75W8BdZrYQeBb4EMDdF5nZNwiewteCoLfQS4DtwG1hGcDVjbfIIunpUlaROuhSU2mOVK0kIiIxOnMQEZEYnTmIiEiMkoOIiMQoOYiISIySg4iIxCg5iIhIzP8H5SuDHq9kvMcAAAAASUVORK5CYII=\n",
317 |       "text/plain": [
318 |        "<Figure size 432x288 with 1 Axes>"
319 |       ]
320 |      },
321 |      "metadata": {
322 |       "needs_background": "light"
323 |      },
324 |      "output_type": "display_data"
325 |     }
326 |    ],
327 |    "source": [
328 |     "actor_lr = 1e-3\n",
329 |     "critic_lr = 1e-3\n",
330 |     "hidden_dim = 128\n",
331 |     "state_dim = 4\n",
332 |     "action_dim = 2\n",
333 |     "action_bound = 1\n",
334 |     "sigma = 0.1\n",
335 |     "tau = 0.005\n",
336 |     "gamma = 0.98\n",
337 |     "num_episodes = 2000\n",
338 |     "n_train = 20\n",
339 |     "batch_size = 256\n",
340 |     "minimal_episodes = 200\n",
341 |     "buffer_size = 10000\n",
342 |     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\n",
343 |     "    \"cpu\")\n",
344 |     "\n",
345 |     "random.seed(0)\n",
346 |     "np.random.seed(0)\n",
347 |     "torch.manual_seed(0)\n",
348 |     "env = WorldEnv()\n",
349 |     "replay_buffer = ReplayBuffer_Trajectory(buffer_size)\n",
350 |     "agent = DDPG(state_dim, hidden_dim, action_dim, action_bound, actor_lr,\n",
351 |     "             critic_lr, sigma, tau, gamma, device)\n",
352 |     "\n",
353 |     "return_list = []\n",
354 |     "for i in range(10):\n",
355 |     "    with tqdm(total=int(num_episodes / 10), desc='Iteration %d' % i) as pbar:\n",
356 |     "        for i_episode in range(int(num_episodes / 10)):\n",
357 |     "            episode_return = 0\n",
358 |     "            state = env.reset()\n",
359 |     "            traj = Trajectory(state)\n",
360 |     "            done = False\n",
361 |     "            while not done:\n",
362 |     "                action = agent.take_action(state)\n",
363 |     "                state, reward, done = env.step(action)\n",
364 |     "                episode_return += reward\n",
365 |     "                traj.store_step(action, state, reward, done)\n",
366 |     "            replay_buffer.add_trajectory(traj)\n",
367 |     "            return_list.append(episode_return)\n",
368 |     "            if replay_buffer.size() >= minimal_episodes:\n",
369 |     "                for _ in range(n_train):\n",
370 |     "                    transition_dict = replay_buffer.sample(batch_size, True)\n",
371 |     "                    agent.update(transition_dict)\n",
372 |     "            if (i_episode + 1) % 10 == 0:\n",
373 |     "                pbar.set_postfix({\n",
374 |     "                    'episode':\n",
375 |     "                    '%d' % (num_episodes / 10 * i + i_episode + 1),\n",
376 |     "                    'return':\n",
377 |     "                    '%.3f' % np.mean(return_list[-10:])\n",
378 |     "                })\n",
379 |     "            pbar.update(1)\n",
380 |     "\n",
381 |     "episodes_list = list(range(len(return_list)))\n",
382 |     "plt.plot(episodes_list, return_list)\n",
383 |     "plt.xlabel('Episodes')\n",
384 |     "plt.ylabel('Returns')\n",
385 |     "plt.title('DDPG with HER on {}'.format('GridWorld'))\n",
386 |     "plt.show()\n",
387 |     "\n",
388 |     "# Iteration 0: 100%|██████████| 200/200 [00:03<00:00, 58.91it/s, episode=200,\n",
389 |     "# return=-50.000]\n",
390 |     "# Iteration 1: 100%|██████████| 200/200 [01:17<00:00,  2.56it/s, episode=400,\n",
391 |     "# return=-4.200]\n",
392 |     "# Iteration 2: 100%|██████████| 200/200 [01:18<00:00,  2.56it/s, episode=600,\n",
393 |     "# return=-4.700]\n",
394 |     "# Iteration 3: 100%|██████████| 200/200 [01:18<00:00,  2.56it/s, episode=800,\n",
395 |     "# return=-4.300]\n",
396 |     "# Iteration 4: 100%|██████████| 200/200 [01:17<00:00,  2.57it/s, episode=1000,\n",
397 |     "# return=-3.800]\n",
398 |     "# Iteration 5: 100%|██████████| 200/200 [01:17<00:00,  2.57it/s, episode=1200,\n",
399 |     "# return=-4.800]\n",
400 |     "# Iteration 6: 100%|██████████| 200/200 [01:18<00:00,  2.54it/s, episode=1400,\n",
401 |     "# return=-4.500]\n",
402 |     "# Iteration 7: 100%|██████████| 200/200 [01:19<00:00,  2.52it/s, episode=1600,\n",
403 |     "# return=-4.400]\n",
404 |     "# Iteration 8: 100%|██████████| 200/200 [01:18<00:00,  2.55it/s, episode=1800,\n",
405 |     "# return=-4.200]\n",
406 |     "# Iteration 9: 100%|██████████| 200/200 [01:18<00:00,  2.55it/s, episode=2000,\n",
407 |     "# return=-4.300]"
408 |    ]
409 |   },
410 |   {
411 |    "cell_type": "code",
412 |    "execution_count": null,
413 |    "metadata": {
414 |     "id": "Cc0b1OlFGUTV"
415 |    },
416 |    "outputs": [],
417 |    "source": [
418 |     "random.seed(0)\n",
419 |     "np.random.seed(0)\n",
420 |     "torch.manual_seed(0)\n",
421 |     "env = WorldEnv()\n",
422 |     "replay_buffer = ReplayBuffer_Trajectory(buffer_size)\n",
423 |     "agent = DDPG(state_dim, hidden_dim, action_dim, action_bound, actor_lr,\n",
424 |     "             critic_lr, sigma, tau, gamma, device)\n",
425 |     "\n",
426 |     "return_list = []\n",
427 |     "for i in range(10):\n",
428 |     "    with tqdm(total=int(num_episodes / 10), desc='Iteration %d' % i) as pbar:\n",
429 |     "        for i_episode in range(int(num_episodes / 10)):\n",
430 |     "            episode_return = 0\n",
431 |     "            state = env.reset()\n",
432 |     "            traj = Trajectory(state)\n",
433 |     "            done = False\n",
434 |     "            while not done:\n",
435 |     "                action = agent.take_action(state)\n",
436 |     "                state, reward, done = env.step(action)\n",
437 |     "                episode_return += reward\n",
438 |     "                traj.store_step(action, state, reward, done)\n",
439 |     "            replay_buffer.add_trajectory(traj)\n",
440 |     "            return_list.append(episode_return)\n",
441 |     "            if replay_buffer.size() >= minimal_episodes:\n",
442 |     "                for _ in range(n_train):\n",
443 |     "                    # 和使用HER训练的唯一区别\n",
444 |     "                    transition_dict = replay_buffer.sample(batch_size, False)\n",
445 |     "                    agent.update(transition_dict)\n",
446 |     "            if (i_episode + 1) % 10 == 0:\n",
447 |     "                pbar.set_postfix({\n",
448 |     "                    'episode':\n",
449 |     "                    '%d' % (num_episodes / 10 * i + i_episode + 1),\n",
450 |     "                    'return':\n",
451 |     "                    '%.3f' % np.mean(return_list[-10:])\n",
452 |     "                })\n",
453 |     "            pbar.update(1)\n",
454 |     "\n",
455 |     "episodes_list = list(range(len(return_list)))\n",
456 |     "plt.plot(episodes_list, return_list)\n",
457 |     "plt.xlabel('Episodes')\n",
458 |     "plt.ylabel('Returns')\n",
459 |     "plt.title('DDPG without HER on {}'.format('GridWorld'))\n",
460 |     "plt.show()\n",
461 |     "\n",
462 |     "# Iteration 0: 100%|██████████| 200/200 [00:03<00:00, 62.82it/s, episode=200,\n",
463 |     "# return=-50.000]\n",
464 |     "# Iteration 1: 100%|██████████| 200/200 [00:39<00:00,  5.01it/s, episode=400,\n",
465 |     "# return=-50.000]\n",
466 |     "# Iteration 2: 100%|██████████| 200/200 [00:41<00:00,  4.83it/s, episode=600,\n",
467 |     "# return=-50.000]\n",
468 |     "# Iteration 3: 100%|██████████| 200/200 [00:41<00:00,  4.82it/s, episode=800,\n",
469 |     "# return=-50.000]\n",
470 |     "# Iteration 4: 100%|██████████| 200/200 [00:41<00:00,  4.81it/s, episode=1000,\n",
471 |     "# return=-50.000]\n",
472 |     "# Iteration 5: 100%|██████████| 200/200 [00:41<00:00,  4.79it/s, episode=1200,\n",
473 |     "# return=-50.000]\n",
474 |     "# Iteration 6: 100%|██████████| 200/200 [00:42<00:00,  4.76it/s, episode=1400,\n",
475 |     "# return=-45.500]\n",
476 |     "# Iteration 7: 100%|██████████| 200/200 [00:41<00:00,  4.80it/s, episode=1600,\n",
477 |     "# return=-42.600]\n",
478 |     "# Iteration 8: 100%|██████████| 200/200 [00:40<00:00,  4.92it/s, episode=1800,\n",
479 |     "# return=-4.800]\n",
480 |     "# Iteration 9: 100%|██████████| 200/200 [00:40<00:00,  4.99it/s, episode=2000,\n",
481 |     "# return=-4.800]"
482 |    ]
483 |   }
484 |  ],
485 |  "metadata": {
486 |   "accelerator": "GPU",
487 |   "colab": {
488 |    "collapsed_sections": [],
489 |    "name": "第19章-目标导向的强化学习.ipynb",
490 |    "provenance": []
491 |   },
492 |   "kernelspec": {
493 |    "display_name": "Python 3",
494 |    "language": "python",
495 |    "name": "python3"
496 |   },
497 |   "language_info": {
498 |    "codemirror_mode": {
499 |     "name": "ipython",
500 |     "version": 3
501 |    },
502 |    "file_extension": ".py",
503 |    "mimetype": "text/x-python",
504 |    "name": "python",
505 |    "nbconvert_exporter": "python",
506 |    "pygments_lexer": "ipython3",
507 |    "version": "3.7.6"
508 |   }
509 |  },
510 |  "nbformat": 4,
511 |  "nbformat_minor": 1
512 | }
513 | 


--------------------------------------------------------------------------------
/第20章-多智能体强化学习入门.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "colab": {
  8 |      "base_uri": "https://localhost:8080/"
  9 |     },
 10 |     "executionInfo": {
 11 |      "elapsed": 10107,
 12 |      "status": "ok",
 13 |      "timestamp": 1650012696153,
 14 |      "user": {
 15 |       "displayName": "Sam Lu",
 16 |       "userId": "15789059763790170725"
 17 |      },
 18 |      "user_tz": -480
 19 |     },
 20 |     "id": "-_L_dhppItIk",
 21 |     "outputId": "6c1eecf0-fd72-4d13-ad05-192463636129"
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Cloning into 'ma-gym'...\n",
 29 |       "remote: Enumerating objects: 1072, done.\u001b[K\n",
 30 |       "remote: Counting objects: 100% (141/141), done.\u001b[K\n",
 31 |       "remote: Compressing objects: 100% (131/131), done.\u001b[K\n",
 32 |       "remote: Total 1072 (delta 61), reused 31 (delta 6), pack-reused 931\u001b[K\n",
 33 |       "Receiving objects: 100% (1072/1072), 3.74 MiB | 4.47 MiB/s, done.\n",
 34 |       "Resolving deltas: 100% (524/524), done.\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "import torch\n",
 40 |     "import torch.nn.functional as F\n",
 41 |     "import numpy as np\n",
 42 |     "import rl_utils\n",
 43 |     "from tqdm import tqdm\n",
 44 |     "import matplotlib.pyplot as plt\n",
 45 |     "\n",
 46 |     "! git clone https://github.com/boyu-ai/ma-gym.git\n",
 47 |     "import sys\n",
 48 |     "sys.path.append(\"./ma-gym\")\n",
 49 |     "from ma_gym.envs.combat.combat import Combat"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "id": "HdZSfYc7ItIn"
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "class PolicyNet(torch.nn.Module):\n",
 61 |     "    def __init__(self, state_dim, hidden_dim, action_dim):\n",
 62 |     "        super(PolicyNet, self).__init__()\n",
 63 |     "        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)\n",
 64 |     "        self.fc2 = torch.nn.Linear(hidden_dim, hidden_dim)\n",
 65 |     "        self.fc3 = torch.nn.Linear(hidden_dim, action_dim)\n",
 66 |     "\n",
 67 |     "    def forward(self, x):\n",
 68 |     "        x = F.relu(self.fc2(F.relu(self.fc1(x))))\n",
 69 |     "        return F.softmax(self.fc3(x), dim=1)\n",
 70 |     "\n",
 71 |     "\n",
 72 |     "class ValueNet(torch.nn.Module):\n",
 73 |     "    def __init__(self, state_dim, hidden_dim):\n",
 74 |     "        super(ValueNet, self).__init__()\n",
 75 |     "        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)\n",
 76 |     "        self.fc2 = torch.nn.Linear(hidden_dim, hidden_dim)\n",
 77 |     "        self.fc3 = torch.nn.Linear(hidden_dim, 1)\n",
 78 |     "\n",
 79 |     "    def forward(self, x):\n",
 80 |     "        x = F.relu(self.fc2(F.relu(self.fc1(x))))\n",
 81 |     "        return self.fc3(x)\n",
 82 |     "\n",
 83 |     "\n",
 84 |     "class PPO:\n",
 85 |     "    ''' PPO算法,采用截断方式 '''\n",
 86 |     "    def __init__(self, state_dim, hidden_dim, action_dim, actor_lr, critic_lr,\n",
 87 |     "                 lmbda, eps, gamma, device):\n",
 88 |     "        self.actor = PolicyNet(state_dim, hidden_dim, action_dim).to(device)\n",
 89 |     "        self.critic = ValueNet(state_dim, hidden_dim).to(device)\n",
 90 |     "        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),\n",
 91 |     "                                                lr=actor_lr)\n",
 92 |     "        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),\n",
 93 |     "                                                 lr=critic_lr)\n",
 94 |     "        self.gamma = gamma\n",
 95 |     "        self.lmbda = lmbda\n",
 96 |     "        self.eps = eps  # PPO中截断范围的参数\n",
 97 |     "        self.device = device\n",
 98 |     "\n",
 99 |     "    def take_action(self, state):\n",
100 |     "        state = torch.tensor([state], dtype=torch.float).to(self.device)\n",
101 |     "        probs = self.actor(state)\n",
102 |     "        action_dist = torch.distributions.Categorical(probs)\n",
103 |     "        action = action_dist.sample()\n",
104 |     "        return action.item()\n",
105 |     "\n",
106 |     "    def update(self, transition_dict):\n",
107 |     "        states = torch.tensor(transition_dict['states'],\n",
108 |     "                              dtype=torch.float).to(self.device)\n",
109 |     "        actions = torch.tensor(transition_dict['actions']).view(-1, 1).to(\n",
110 |     "            self.device)\n",
111 |     "        rewards = torch.tensor(transition_dict['rewards'],\n",
112 |     "                               dtype=torch.float).view(-1, 1).to(self.device)\n",
113 |     "        next_states = torch.tensor(transition_dict['next_states'],\n",
114 |     "                                   dtype=torch.float).to(self.device)\n",
115 |     "        dones = torch.tensor(transition_dict['dones'],\n",
116 |     "                             dtype=torch.float).view(-1, 1).to(self.device)\n",
117 |     "        td_target = rewards + self.gamma * self.critic(next_states) * (1 -\n",
118 |     "                                                                       dones)\n",
119 |     "        td_delta = td_target - self.critic(states)\n",
120 |     "        advantage = rl_utils.compute_advantage(self.gamma, self.lmbda,\n",
121 |     "                                               td_delta.cpu()).to(self.device)\n",
122 |     "        old_log_probs = torch.log(self.actor(states).gather(1,\n",
123 |     "                                                            actions)).detach()\n",
124 |     "\n",
125 |     "        log_probs = torch.log(self.actor(states).gather(1, actions))\n",
126 |     "        ratio = torch.exp(log_probs - old_log_probs)\n",
127 |     "        surr1 = ratio * advantage\n",
128 |     "        surr2 = torch.clamp(ratio, 1 - self.eps,\n",
129 |     "                            1 + self.eps) * advantage  # 截断\n",
130 |     "        actor_loss = torch.mean(-torch.min(surr1, surr2))  # PPO损失函数\n",
131 |     "        critic_loss = torch.mean(\n",
132 |     "            F.mse_loss(self.critic(states), td_target.detach()))\n",
133 |     "        self.actor_optimizer.zero_grad()\n",
134 |     "        self.critic_optimizer.zero_grad()\n",
135 |     "        actor_loss.backward()\n",
136 |     "        critic_loss.backward()\n",
137 |     "        self.actor_optimizer.step()\n",
138 |     "        self.critic_optimizer.step()"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {
145 |     "colab": {
146 |      "base_uri": "https://localhost:8080/"
147 |     },
148 |     "executionInfo": {
149 |      "elapsed": 2805926,
150 |      "status": "ok",
151 |      "timestamp": 1649963248923,
152 |      "user": {
153 |       "displayName": "Sam Lu",
154 |       "userId": "15789059763790170725"
155 |      },
156 |      "user_tz": -480
157 |     },
158 |     "id": "t8FsMOFPItIp",
159 |     "outputId": "2f453795-508c-45ff-91e1-fb8b81eb5e9c"
160 |    },
161 |    "outputs": [
162 |     {
163 |      "name": "stderr",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "/usr/local/lib/python3.7/dist-packages/gym/logger.py:30: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n",
167 |       "  warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n",
168 |       "Iteration 0: 100%|██████████| 10000/10000 [07:17<00:00, 22.85it/s, episode=10000, return=0.310]\n",
169 |       "Iteration 1: 100%|██████████| 10000/10000 [05:43<00:00, 29.08it/s, episode=20000, return=0.370]\n",
170 |       "Iteration 2: 100%|██████████| 10000/10000 [05:30<00:00, 30.26it/s, episode=30000, return=0.560]\n",
171 |       "Iteration 3: 100%|██████████| 10000/10000 [04:54<00:00, 33.96it/s, episode=40000, return=0.670]\n",
172 |       "Iteration 4: 100%|██████████| 10000/10000 [04:20<00:00, 38.46it/s, episode=50000, return=0.670]\n",
173 |       "Iteration 5: 100%|██████████| 10000/10000 [03:52<00:00, 43.09it/s, episode=60000, return=0.620]\n",
174 |       "Iteration 6: 100%|██████████| 10000/10000 [03:55<00:00, 42.53it/s, episode=70000, return=0.610]\n",
175 |       "Iteration 7: 100%|██████████| 10000/10000 [03:40<00:00, 45.26it/s, episode=80000, return=0.640]\n",
176 |       "Iteration 8: 100%|██████████| 10000/10000 [03:48<00:00, 43.81it/s, episode=90000, return=0.650]\n",
177 |       "Iteration 9: 100%|██████████| 10000/10000 [03:42<00:00, 44.91it/s, episode=100000, return=0.770]\n"
178 |      ]
179 |     }
180 |    ],
181 |    "source": [
182 |     "actor_lr = 3e-4\n",
183 |     "critic_lr = 1e-3\n",
184 |     "num_episodes = 100000\n",
185 |     "hidden_dim = 64\n",
186 |     "gamma = 0.99\n",
187 |     "lmbda = 0.97\n",
188 |     "eps = 0.2\n",
189 |     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\n",
190 |     "    \"cpu\")\n",
191 |     "\n",
192 |     "team_size = 2\n",
193 |     "grid_size = (15, 15)\n",
194 |     "#创建Combat环境,格子世界的大小为15x15,己方智能体和敌方智能体数量都为2\n",
195 |     "env = Combat(grid_shape=grid_size, n_agents=team_size, n_opponents=team_size)\n",
196 |     "\n",
197 |     "state_dim = env.observation_space[0].shape[0]\n",
198 |     "action_dim = env.action_space[0].n\n",
199 |     "#两个智能体共享同一个策略\n",
200 |     "agent = PPO(state_dim, hidden_dim, action_dim, actor_lr, critic_lr, lmbda, eps,\n",
201 |     "            gamma, device)\n",
202 |     "\n",
203 |     "win_list = []\n",
204 |     "for i in range(10):\n",
205 |     "    with tqdm(total=int(num_episodes / 10), desc='Iteration %d' % i) as pbar:\n",
206 |     "        for i_episode in range(int(num_episodes / 10)):\n",
207 |     "            transition_dict_1 = {\n",
208 |     "                'states': [],\n",
209 |     "                'actions': [],\n",
210 |     "                'next_states': [],\n",
211 |     "                'rewards': [],\n",
212 |     "                'dones': []\n",
213 |     "            }\n",
214 |     "            transition_dict_2 = {\n",
215 |     "                'states': [],\n",
216 |     "                'actions': [],\n",
217 |     "                'next_states': [],\n",
218 |     "                'rewards': [],\n",
219 |     "                'dones': []\n",
220 |     "            }\n",
221 |     "            s = env.reset()\n",
222 |     "            terminal = False\n",
223 |     "            while not terminal:\n",
224 |     "                a_1 = agent.take_action(s[0])\n",
225 |     "                a_2 = agent.take_action(s[1])\n",
226 |     "                next_s, r, done, info = env.step([a_1, a_2])\n",
227 |     "                transition_dict_1['states'].append(s[0])\n",
228 |     "                transition_dict_1['actions'].append(a_1)\n",
229 |     "                transition_dict_1['next_states'].append(next_s[0])\n",
230 |     "                transition_dict_1['rewards'].append(\n",
231 |     "                    r[0] + 100 if info['win'] else r[0] - 0.1)\n",
232 |     "                transition_dict_1['dones'].append(False)\n",
233 |     "                transition_dict_2['states'].append(s[1])\n",
234 |     "                transition_dict_2['actions'].append(a_2)\n",
235 |     "                transition_dict_2['next_states'].append(next_s[1])\n",
236 |     "                transition_dict_2['rewards'].append(\n",
237 |     "                    r[1] + 100 if info['win'] else r[1] - 0.1)\n",
238 |     "                transition_dict_2['dones'].append(False)\n",
239 |     "                s = next_s\n",
240 |     "                terminal = all(done)\n",
241 |     "            win_list.append(1 if info[\"win\"] else 0)\n",
242 |     "            agent.update(transition_dict_1)\n",
243 |     "            agent.update(transition_dict_2)\n",
244 |     "            if (i_episode + 1) % 100 == 0:\n",
245 |     "                pbar.set_postfix({\n",
246 |     "                    'episode':\n",
247 |     "                    '%d' % (num_episodes / 10 * i + i_episode + 1),\n",
248 |     "                    'return':\n",
249 |     "                    '%.3f' % np.mean(win_list[-100:])\n",
250 |     "                })\n",
251 |     "            pbar.update(1)\n",
252 |     "\n",
253 |     "# /usr/local/lib/python3.7/dist-packages/gym/logger.py:30: UserWarning:[33mWARN:\n",
254 |     "# Box bound precision lowered by casting to float32[0m\n",
255 |     "#   warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))\n",
256 |     "\n",
257 |     "# Iteration 0: 100%|██████████| 10000/10000 [05:22<00:00, 31.02it/s, episode=10000,\n",
258 |     "# return=0.220]\n",
259 |     "# Iteration 1: 100%|██████████| 10000/10000 [04:03<00:00, 41.07it/s, episode=20000,\n",
260 |     "# return=0.400]\n",
261 |     "# Iteration 2: 100%|██████████| 10000/10000 [03:37<00:00, 45.96it/s, episode=30000,\n",
262 |     "# return=0.670]\n",
263 |     "# Iteration 3: 100%|██████████| 10000/10000 [03:13<00:00, 51.55it/s, episode=40000,\n",
264 |     "# return=0.590]\n",
265 |     "# Iteration 4: 100%|██████████| 10000/10000 [02:58<00:00, 56.07it/s, episode=50000,\n",
266 |     "# return=0.750]\n",
267 |     "# Iteration 5: 100%|██████████| 10000/10000 [02:58<00:00, 56.09it/s, episode=60000,\n",
268 |     "# return=0.660]\n",
269 |     "# Iteration 6: 100%|██████████| 10000/10000 [02:57<00:00, 56.42it/s, episode=70000,\n",
270 |     "# return=0.660]\n",
271 |     "# Iteration 7: 100%|██████████| 10000/10000 [03:04<00:00, 54.20it/s, episode=80000,\n",
272 |     "# return=0.720]\n",
273 |     "# Iteration 8: 100%|██████████| 10000/10000 [02:59<00:00, 55.84it/s, episode=90000,\n",
274 |     "# return=0.530]\n",
275 |     "# Iteration 9: 100%|██████████| 10000/10000 [03:03<00:00, 54.55it/s, episode=100000,\n",
276 |     "# return=0.710]"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {
283 |     "colab": {
284 |      "base_uri": "https://localhost:8080/",
285 |      "height": 295
286 |     },
287 |     "executionInfo": {
288 |      "elapsed": 20,
289 |      "status": "ok",
290 |      "timestamp": 1649963248923,
291 |      "user": {
292 |       "displayName": "Sam Lu",
293 |       "userId": "15789059763790170725"
294 |      },
295 |      "user_tz": -480
296 |     },
297 |     "id": "OT2mwoZdItIq",
298 |     "outputId": "6ea70d1d-bb28-456e-ffca-fe4f8106e0b8"
299 |    },
300 |    "outputs": [
301 |     {
302 |      "data": {
303 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd5hU1dnAf+/MNsrSQZTi0gQBxYLYo0YsWGM0sURNjEaTaBK/mIKJMUZjYkyxfXzGEjWx9wqKiqDGQlMEARGU3pHetsyc749b9s6de2fu7O7szu6+v+fZZ++c286du3ve85bzvmKMQVEURVEAYk3dAUVRFKVwUKGgKIqiuKhQUBRFUVxUKCiKoiguKhQURVEUFxUKiqIoiosKBUVp5ojIFBG5rKn7obQMVCgoBYWILBGR0fb290QkISLbRWSriMwSkdPsfceKSNLet01EFojIJZ7rlIrIn0VkmYjsEpGFIvJLEZFGeo4SEbnBvu8O+7keEJGKxrh/VETkIRH5Y1P3QykcVCgohc4Hxpj2QCfgX8BTItLZ3rfK3tcB+DVwn4gMtfc9DRwPnAKUAxcBlwN3NFK/nwHOAC4AOgIjgJl2nxSlYFGhoDQLjDFJ4AGgDTDAt88YY14ANgFDReR44ETgbGPMp8aYGmPMh8CFwJUiMjDoHiKyr22K2Swic0XkDM++h0RknIiMtzWTqSIyIOQ6o4ETgDONMdPt+28xxowzxvzLPmYvEXlJRDaKyCIR+YHn/BtE5GkRecS+1xwR2UdErhWRdSKyXERO9N12gIhMszWqF0Wki+d6T4vIGhHZIiLviMgwu/1y4DvAr2yN6+Uo70Jp2ahQUJoFIlIEXAZsBxb69sVE5CwsbWIO1oA81Riz3HucMWYqsIKA2bqIFAMvA68DPYCfAI+KyGDPYecBfwA6A4uAm0O6OxqY5r+/jyfsvuwFnAP8SUS+7tl/OvCwfa+PgYlY/6+9gBuBe3zXuxj4PrAnUAPc6dn3KjDIfq6PgEcBjDH32tu3GmPaG2NOz9BfpZWgQkEpdA4Tkc3AGuB84CxjzBZ73172vg3A74GLjDELgG7A6pDrrbb3p90HaA/cYoypMsa8Bbxi39PheWPMNGNMDdZgekDIPbpmuD8i0gc4Evi1MWa3MWYWcD/WwO7wrjFmon2vp4Hudt+qsQRKhYh08hz/sK0V7QB+B3xbROIAxpgHjDHbjDGVwA3ACBHpGNY/pXVT1NQdUJQsfGiMOSpk3ypjTO+A9g1YM+Mg9rT3+9kLWG6bqRyWYs3MHdZ4tndiCZEgvgL2Cdnn3GujMWab714jPZ/XerZ3ARuMMQnPZ+z7b7a3vVrJUqAY6CYiG7A0mm9hCRbn+boBW1AUH6opKC2RN4FD7Rm5i4gcCvQB3go4ZxXQR0S8/xN9gZV1vP8oEQkSWM69uohIeQPcy8H7rH2BaizhdwFwJpZJqyNQYR/jRGFpmmQlBRUKSovDGPMmMAl4VkSGiUhcRA4DHgHuNsYsDDhtKtbs/1ciUiwix2LZ9Z+o4/3fAJ4XkYNFpEhEykXkhyLyfdvX8D7wZxEpE5H9gUvt/tWVC0VkqIi0xfI5PGNrFuVAJZb20hb4k++8tUD/etxXaWGoUFBaKmcDk4HXsJzTj2CFtP4k6GBjTBWWEBiDNcP+P+BiY8xndbz/OcAE4EksM82nWOahN+3952PN2lcBzwO/t4VJXXkYeAjLxFUG/NRu/w+WOWklMA/40Hfev7AitjaLyAv1uL/SQhAtsqMoiqI4qKagKIqiuKhQUBRFUVxUKCiKoiguKhQURVEUl2a3eK1bt26moqKiqbuhKIrSrJg5c+YGY0z3bMc1O6FQUVHBjBkzmrobiqIozQoRWRrlODUfKYqiKC4qFBRFURQXFQqKoiiKiwoFRVEUxUWFgqIoiuKiQkFRFEVxUaGgKIqiuKhQUBRFySPGGJ6esZz/fLCEmkQy6/FNTbNbvKYoitKc+OCLr/jlM7MBqEkYvn9UvybuUWZUU1AURckj2ypr3O1NO6uasCfRUKGgKIqSR2Ii2Q8qIFQoKIqi5JHmJRJUKCiKkoFtu6v53oPTWLV5V97vVZNI8qNHZvLpyi15v1euPPLhUirGjuc/HyzJemxlTYIf/GcGC9duA6CZKQoqFBRFCeeV2auZsmA9d7y5MO/3+nLDDl79dA3/8+SsvN8rV6574VMArn9xbtZjZy3bzBvz1vKb5+cAaj5KQUROFpEFIrJIRMYG7O8rIpNF5GMRmS0ip+SzP4qi5IYznBlMg17342WbWL5xZ+C+ut5p3dbdTFu8se6damCSBl77dHVW+9G6bbuZ+uVXjdOpCOQtJFVE4sA44ARgBTBdRF4yxszzHHYd8JQx5m4RGQpMACry1SdFUXIjX5Pcs/7vfUriMT6/eUztvezfxtRNLJz+v/9l7dZKltxyagP0sO44vZ+5dBMzl27ivEP6ZDz+rHHvs3Lzribvt0M+NYVRwCJjzJfGmCrgCeBM3zEG6GBvdwRW5bE/iqLUEf84nUyaOg/eu6oSAFT5FnLVVwCt3VoJ1F2o5ItsYagrbX9NofQ7n0KhF7Dc83mF3eblBuBCEVmBpSX8JI/9URQlR8Sev/uHqzF3vMtRf5kceM6pd77LfjdMDL3myixOa/+9KsaO54cPz8zWVZeaZOoVVmzaScXY8bw4a2Xka9QH/9heFI82zNYkDbe8+hkVY8eTtJ9h9orNVIwdz/tfbGjobobS1I7m84GHjDG9gVOAh0UkrU8icrmIzBCRGevXr2/0TipKq8WevfsHugVrt4UO7nNXbWXb7prAfQC7qy1NoSRtsHRuln7Oa3PXROktAJU1qRrI3FVbAXj5k6YxRBTHoqlA1Ykk97/7pbWdtJ7h/S8sX8PbCxpv3MunUFgJeI1pve02L5cCTwEYYz4AyoBu/gsZY+41xow0xozs3j1r3WlFURqARNLwOzvqxnE0z1iykUG/nVCv61Y7ZiPfWOmYT+piRHlm5gp3u8onFBL2rDsecXDOxuTP1uV0fFRNoaomSczu4/KNO/nN83OaJFdSPoXCdGCQiPQTkRLgPOAl3zHLgOMBRGRfLKGgqoCiFABfrt9eO+u2R+pz/vkB1Yn62b6dQdo/RifqYVP/xdOfuNt+oeCYk6IOztm45KHpOR1fFFEYVSWSxG3HyjVPz+axqct4b1HjRyXlTSgYY2qAq4CJwHysKKO5InKjiJxhH3YN8AMR+QR4HPieKRRvi6I0El+u386KTcHhmf59xhjeW7SB5Rt38uX67ZGuv3zjTpZ+tSOlbeHabazdujulbdXmXXzhuWZpUbz2vpB2jVyZt2orG7ZXukLFH7/vCIv6DgGVNYmUz85se9OOKhas2Zbz9RLJ+vXno2WbIh03d+VWV5vxC7bGXBad1yypxpgJWA5kb9v1nu15wJH57IOiFDpf//vbAIEhif59E+as4crHPnL3RwljPPrWyWnHnnDbO2ltR9zyVkqbf+Z+zF+nZL2Xl2TSuOYQgFPufJdu7Uu47dwDgHShkExVSupMmKbw/hdfcdLt7+Qc+nmfbeePin9Nx+drownvSx6aTocya0h2TGwNvT4kCk3taFYUJQdWbg7WKPKBd4Zcl9n7rupEWtuG7VXU2JqCf/JbH/ORF7+jubqedvmoGllD4GgKQX3esqu6UfqgQkFp9kxesI6KseNDTTDNhQG/ye7AlQh2hK27q6kYO56KseP52RMf59yPirHjmTR/LUnPIP3CrODInYv+NTX0Ojur0oUCeEJG/T4F13xU23bbG5+nnZ/tfZ9213/Z74aJVIwdT/9rx7vrIurCXyd+xlMzVqS1r99WScXY8UyYszqlvbImwQX3hX8nAIs37KBi7HhueGkuFWPHp+xzhYJPsG3ZWc2IP7zOv99fUoenyA0VCkqz56np1nKY2SsKL5FaLkSxXUdZ4LV6c62v4MWQwTwbT05fHqk/7y4Mj5/3L05zcGz8aeYjN/qo9r53TErPuRTlfTshsUlDvRzj4yZ/Edg+d5V17yemL09pX7Epe+LAD+2UFg8FDPDOd1KdttbCum7PjmVZr19fVCgozR5nMGnKtGPJpOEvr33G2q27ueftL5i/eiu7qhLc+PI8dlaFx+z7eXXOal77NDwmP0pytbpEXm7dnW6aeH3u2twv5CHNWWrzo0ctn4i3n1t2VXPjy1YGnOUbd/HszBU8OX1Z4PkmWNEIJZmDWerlT1YxaX7253bWWrzz+Xq+2l7pttc3m6wrFHwC1dGK+nRuW6/rR0HLcSrNHneQaEKpMGPpJu6e8gWfrtzCuws38NeJC/ifE/bhgfcW06ltMT89flCk6zgDZpgzNMozSh2+iH+8nm6mue3N9LZcyGbL9/bz1tc+Y44nZfY1nhBTP+4kwD49m0aTS/TQTx63zG3ZnNFe09hHyzZzwtA9ANi8s352f+fZ/Oajr3ZYqTK6l5fW6/pRUE1BafbU/ss3nVSosUNn5tmrZ2uSxh0UvQuQFq3bzvbKaJrDum2709qiPWHwIBi0EGrm0k3MX72VHRH7lAtfrNvOJnswSwYMzF5NIcz/EIT/fYdpJA5+oRAkJIwxzF6x2f0c1F8v67fVagde4ZdNK1kWkhnW3zfH5OVcznGelxXnf8hWoaA0ewpBU3BwZnRhjP7H23zvgWmRrjXq5klpbVG0gDAb+t8CtIGz736fMXe8G8kWnis/evQjjv+HFVLrz0cEqc8S5n8Iwv++/esS/PiFQJAGc/+7iznjf99zP2cLQ/3zq5+5216hlM1S9eKsVRmPcb4nJ82Fc6xzD+/akXyhQkFpMTSpTAj4R/cnk3PCOmcsjbaYKYgogi/MXOI4R4P4akdl6L76sNEWkkF98moKuaVzSPUh7a7OoimY7EJhxtLUOgzzV2+N3BuvUKjvQjfne3C67O27CBTH8/9XrkJBaQHkZ4HP9soaKsaOtwql1KEH/gE8aIboD2nMRjRNIXiQLM6Q5sEf219X/jxhfmB70BoEQbj6iY/53oPT3LULUXAudfnDM5k0f23WSm13T0mNIBr5xzddR7HDjsrUzyKS1SzlUJVI8sS0ZfS/dnykNRFORbYg/BqVV1iWFcXr5C/KFRUKSrPH+T9q6LKHSzZYaR3unLSoXtcJmvU5BMXhZyLKEwaZaiDzLLMyy2w7Kve8E2x2SQQM+iLW+ocpC9bnZj7ybN/7zpd8kGPVssqaZFqNgx2+CDGByOsbqmqS3PTKPJKGyP6iMPxJ+7zvsrQR/AmgQkFpARhfNEoUqmqS3P7m52kzRodnZq7gE9vxWGQPpu8uXM+UBakZMj9bs5WnZywP1AL83Xnuo/RFUFH7bIzhtjc+5663stdKzqYpBDlR1/jyIAWFqNYHxxHvxfvomdY7eNldneAtT5bSz9fmnssI4N3PU+/nd7TPWLqJp2emrkEIoyqRdAV+mEDOhpNGvKQodUh20n4DlBY1znCtIalKs8f5N8xFKPzngyXc/uZC4iL8JCBc1Jt105m9XfQvy0HsDVc8+fZ3AXj40lEp53v74izG+vWz4WaDbGzaWR24kCuIMLu2M/BESSfx4ZcNW+s46J7xOtjHH5+WunZhUx1DQH/17Gy+7SmTud1X/2HZxp38cXywKcxPdU3SzdsU1eTkp01JnKpdyYwmvrLi/DuZQTUFpZmyZVc1G7ZXsm7bbnf1qjcFRFVN0i0Mv2VntevwTCYNSzbscM/xrxyFdKenP/Xx9soa1m1NDxf1IqQKhrAMo1Edk2HZPYNSPWwMiYByhFtdZtd9u2ReNLV8486M9vSg50wmoWeH3FboNqSBcPGG2neyox6pMKoSSVcTyhRyevDenUP3tSuxBvxM32FjaQoqFJRmyaF/epORf3yTUTdPYqYTzeMZMa59bg5H3zqZHZU1jLjxdQ666Q0A7nxrIcf+bYo7MJYEzFa3+maNfjvvWePeY9SfUsNFM02+l361MzTDaFRrw/n3fRjYftRfJqeFZP7siWDHqwis3rKLU+/8b7SbeijKMKvfUVnD0bdO5tfPzg6tHRDkSE4ak/Pq6/ZlxbmdkIHj/jaFtz+3yrfUZ51GVSLpvkdvsR8/h/XvErqvjS0UwsyZkG5ayhcqFJRmSVAYond8cWz//kydU+yyhk4tgSB1fasvG2VRLPWYhevSs2b6hzwRcSNFVm8J1yrqG8IIZCx96SdqGmcvD186KqMT35ndPvfRSg7r35Wy4hjfPCi1HHvQoq5E0uQcTdO+NLMJ5ecn7JPT9b6w32VdfQEQ3WR0/qi+ofvalVqW/Ezhtf6/w3yhQkFpNmzbXc3BN73hJhTzs35bJfvdMDElxtw/GDl5iJwqXIFCwedk/e+iDdwVYM/3ztD9aQkSSasYjrPtp2LseD5etilUKFzyYLQFbgC/f2kul/07ezWwoniMtRkEVBg9yjObeLwDalVNkv17deKIAd1Cj3FIGpPzgsNsA2OPHNNAFDfA7DuqUIiJUF4a7MZtW5LdXxC1glt9UaGgNBvmrNzCVzuqQsM4J81fx7bdNTz43mK3zW+2cOLRnbj8IJU8KGb/7wH33LqrdoYeFFLpFF0PS30wbvKiUKEwOYdC7eNnr+bN+dnrBvfq1Caj1hJGPJZeT8ExqXUvL015hs27qiiKC2cdaGkKHdtY5p6g50wkTc5hxNmc5LnWYS5ugIE2U2oL7+PFREKd621Lssf8xFQoKEoq0xZbETFh//gfLrYG4ZpE7QzUm6DsxVkrXXPSJ8utcNOg2P2oC6kmzq3NZpop1ULY9RJJUy+zRa4kk4bVW3JPZxETYcP2VOf15V/rz5jhPenctjhlwP987XaSxhCPCd85tC+VNQn+/voCHptqRQ3ts0d791hLKETvhzEmReA7nHVgL44b3B3I7PsIIlO0T1TCHPsAZx/U23UQxwS3BrOfKJpCI8kEDUlVmg+3v2mZcMKEgiMAvLP2v7xWm6PmZ0/Moo0vrC9oUIhq57/uhU/d7UyLv8KulzTQv3s7NmzPT4qJtH4Yw/I6FiIKqvoVEyFp0p/PCWctjsfYXZ3krrdqF/95NbNc5eGUz9cHhsq2KYmz0Q4kytXu3hDmo4kZUox7o9BEJHS2H0UoNHSYcBiqKSjNjmwmB2tmbh3jX2HqH8CK4jGqE0k3myfUrSxkpjQRC0JCQNdu3Z2XMMORIaGPSZM9S2cQYd+GiGU6CdN2gt6Td9BOJE3gd71Hh2C/wM7KYG2sbXHcdXbnGstfEpc6lRqNSkzEDZWOSbhfIIr5qLFQoaA0O7LZjb2rZ0t8moB/Za0xhmufm8OBN73hrk9IBKy+zUZdFi19tmYb7y7c0OBmgTAbdzJpUqqyRSVo0DTGGvCMCf++gkw53kExkTQEndqzY5vA6wWtigZrlu0IhVyFbFEsVq/KbNkQqTX7xERCJzTtskRVNSYqFJRmRzZNodrjU/Dni/FPapPG8PInq9zzILpPwUum+PJsHDWoO98/sl+dz/fj7f5Pvj7Q3a5OJENn9ROv/hqH9guOow+bSMfEnu2HyMOg99TGYyZJGBNoWtsrpORkWC6iNiVF7rvLVVMwhAub+vAze5W8SG0Sw5hI6ISmtCie4m9pSlQoKAXFB198xdl3v091IsmcFVs46//eSxsMsvkGvatCp2SJ4vn5U5+4foUTbnubddt251S+0aE+WUZL4jGG9Cyv8/kOjqbjzW00oncndzssWR3A4J7l7NUpeIYetJbAmgGLbT4Kfvag9+T16YSZjyq6tQu83tjngtOE1EdTuP3Nzzn3nuCFgX7alcTpFfId+enV2TqurDheu35Gws138ZgwuGeHjNfUkFSlVfLLZz5h5tJNrNmym5vGz+PjZZv5eHlq/YF4LDzeG+zoo4j3M6bWHLVi0y6e/2hlnSKCcsny6ae0OJZzuGFQKG2Va/6q7f8eHcr4zSlDMl5r3AUHpZ3n8MuTBjOge/AgHYs55qPg7yse4PT1O1SDzv3J1wfyo2MHZOyzl93VCVco5Lrqd+6qrSllQDPx4lVHRRb+Zx3Yi6uOG8g1Jw6uXVWZ4c+qOC6UZem7rmhWWiXeiaMTgjp9capQmPzZejq3Kwm9RnWO5gBvZI2hbquMK+thPiqNx1L8Csfa4ZWZ+PXJ6QN9dY3Vb6+mUxQXLv9a5gH2pGFWfeGgWfuVxw0MXXUcsx3NoUIh0HyUKsz9i/7Acrr++uQhkQvKfLWjyjUfRR04/b6mbBzYtxMDe7TPWuXNoTge4xcnDaZ9aZFrRjMZpEI8FnNNnd3aB/9th4WzNjQqFJSCxDvQ+AvI76pOZMxVU51I1qs0Z52EQn3MR0WxFFtzlNj5oJxNlQlrwPIKhSgDqzNoZatLHHRekFBwZvlBj3HEgK6UFMU4pMKKkMqkYUVdQ3DqfnvWagoRz8lFE/Fety51J3550mAgNcLIn2CwKCZuqc12IVpwY61oUaGg5B1jTIpfYFdVgmTSuCkngvDnLPKTqRbyqs27s9bKDUOoWx6cbP318/UhPdzt0qJYymzcP5CfPmKvtPODBkwnAso7SEeJ23dunaswFHudgneB4P69O7paTJD56KRhPfn8j2M4fl9LO8mUFTSqDX1En07udbwRT09dcbi7/dAlh7jbS245lTMOSP9OM+FoIJmE2KzrTwhsv/CwvVlyy6kpWsy+e6b6kOIxcf0hFV2DzXX5DJ31okJByTv3vfsl+17/Guu3VbKzqoZ9r3+Nix+YxtDrJ/LWZ8ELf3b6nMtRyhw6bNxRxbptdV8QVhdNIdfoI69DtKQo1XzkH/CDnKdFGYSCt/tRVvg6AilXB3tMrHxTl/1nRtq1IFhTcAbGuHvP8Ovnstr4gD6WQ907y/Z+b34TWK7pNaJoILkk9/OHMBfFxM25FZamXDUFpcXw4iwr5HPNlt3urPK/drK4t0Oig/xaRC5Cob54hcJh/buwX6+OWc/ZlaNZwS8UvM/nn92XBZRhDDILOYO6d3APGsyuHRPseM4kDKf99nje+eVxnDB0D7ctaGD1Cjf/fm9wgNdcdup+e/LBtV9Pu5YjFO48/8DQfk39zfEA3HbuAYz/6VF0KCvmw2uP57+/Pi5j+cpc7fNRfBW5xAr4zY3xmLBuqzWRCYsCayRFQYWC0niIpK8BqApZE+BUOXP43Qtz89YvLwa405MRdVd1MmNxFPe4DKawIBz7MUBJPM52z2pdv7+krCg99j4o3t1xtqaYjwKEQljIZ6blGT3Ky+jbta07I4fgQdArCPzmn8GesFtv/yu6tXWL7XjPaV9mCZEDPGG1fvawz2tbUsSwvSzh3bNjGb07t035jv1dzdXnFCQUysuK6Ny2tr5DrmnAvRTFxTVBOuGsfjI5qhsSFQpKo1KVqJtZ6NmA+sb5wmt6Wr5xZyRTQ64+Be8gU1wkbLNNB53aFvOaJ9EeBBdsD5rpOsIgmSIU0o8Lcz7nupI7aBD0ygG/4ErJGOrZF7drT1xzwj68eNWRbvv9F4/kh8cMoE+XaGsD/ARpWGF9y4ajcT37oyO44mv93Xav/ymXS/71WyN48Hu1fo6yojg3f2M/fnB0P0ZV5LaIsKFRoaDkjfXbKpm2eGPKH7O/iMj42atdB9qX67ezcnPuWTxzJcrM38FErA4WlpcnDO9gHRNxawRfdlT6yuaSeDRNwRmgvKGlQcIjzFbvCJXHLjs0U9ddgoSlV1D4116k+BsCjvvJ8YPc2T5YGs3YMUPqPAMvDdCw3HsG5mUKv48jxA/euzNXHGNFLgmpWpnkUCy0V6c2HDekB0cM6ApYWlHfrm357alDQxfgqU9BafZ8Y9x7fPueD1La/DPqXdUJtxbAVY993Cj9yjRL9NdnNkQzNezI0XzkHZjjIm400teH7MGVx6WGS3q1CkdoBD2DM9P3ugYCndQhEUlOZbCBdrqFiw7bO+2YY/ax1lCM3rdHoLD0NvkHWe8nr1yKYt8fvW+PrMf4CXr280f1AYJn9f4Mul5O2782Wsl7rldTiPJ3cmDfTimOZCdhY3uPv8Wv3Tnhu40lFQonNZ/S4gia9QdF6ayxc/zPX7M1bV8+yDQI+aOenMRv2VifY7ST1+kqAiMrurDkllMBGLpXB3550hDOvvt9Zi7dlGLuue60oVx32lAmL0gvquP4a5JJw3cO7cvNZ+0XeO+SouDnOfOAXpx5gFUcx+mLn+G9Orr7glJGe7+rTN9bLINGEcT937VMLRVjx2c91sEvFLzPFHTPspI423z+nC7tSvjod6mhpl7NN5GjUHj+x0emfHaEQnmZRyj4hPbDlx7KkN+9pj4FpWl5d+F6KsaOZ96q+g/U3j/loBq0v3txLv9+f0mjrdjMFKb57/eXpHxOGkOfkBDB1OPC9wWViPQOAmGaixNFFOTkDPquzr33Q577aAUJu8hNGLkmjQsj6HV5xzN/H7yCwLsvV/t+VLxO9s5tU1cJBybrC/hehu2Vno/IqcEwvFfHFKGQa5grQP9u7dP65//7dDSu4RGi4BoC1RSUQJyqYjOWbmRowD9GLhhPqGSYQ/bxacus2VsDVCL713dHUhyPcfEDwXWOMxeh993fwAWj+rJ6yy7GTf4i671vPXt/huxZzhn/+57bNuFnR7NxRxUn3vaO21ZeVhu1EtYfZ0Ya5AMIG0ifmbkiY5nLO847gH2zJF6rD2EDP6QKEe++fNSUcHjy8sNYu62S/XqnDqiZzEf3XzyS7uWlVNYk0xaZgWXqeeqKwxmyZzn73/C655q5C4Xbzh3B/NXb6Nq+duLgN7sVxWM8+6PDGdi9/kkTo6BCQWlwXp+7JsV05AxuiaQJXeT12ZrgQjR14ZB+XejgGXT9ZJqZ+lesJo0hFrNs/lGEwrcP6ZPW1q19Kd3al3LhYX155EOrLGV7j6YQZj5xxFOQUMg0AHmT/PlxzEMNQVBaDMkgFLx4+5/JIVxfDu3fNfj+AX1zNLJu5aWM6BMeBgswKiDNeF30nfKy4rRrBTnWD947OCIpH6hQUBqcyx+emfI54dEUcl3526ltcUoahZF7d2bG0k0ZzsjuuDxucHc6ty2JFOZaO+7V38TxP6P3Yd6qrezZqQ1HDezmtoeNnSaT+ShMkJjg2n57V+0AACAASURBVMc3nD40ZTbaEPijic85uDdXjx5U28cMq4i9/c8UOurn6tGD2LKrmgffW5JbZ314+/LUFYcz6bO1nHdIX+55+wuG11EzFoF7LzqYxRt21KtvAD88ZgD/fDv7JCQfqFBQAnFm9w1h7XXsrjWJ3IXC94/sxz/eqE2Id+lR/Vi+aSdrt4Y7drPZqNuWFPH3b4/IKBSG9+rApyu3us49/yV7dihjzdbUKmbZ7tu1fSnP2Y5Gry06TIi5PoUAH0ioUMCqUeCfCX+vAYv4+Pvn8LdvjUj5nB6SWrudaj6KrilcPXofgHoLBe93PqpfF3e2fsvZ+9f5miLCicN61qtfDmPHDGkyoZBXR7OInCwiC0RkkYiMDTnm2yIyT0Tmishj+eyPUgcawPmb8MTP76rKbYGU33QSt3P4gxUZEkQ2226UYu1jhu8J1ApH/zW/HhAimYu/NCUdRIZZPwSHkGbSFJJJk1Vb+kaOCeGC75XZ/+P0oL9dj8Gb2C+eYj5q/HiXRoppqBfd2pfQrYG1uyjk7W2ISBwYB4wBhgLni8hQ3zGDgGuBI40xw4Cr89UfJTcaMvitdqUt7I6Yj97Bv/q2KC5u38aePITnfnxE2jnOgDk2JMdP0Mzbz7dHWr4B515eofDcj4/guMHpQiGXxUspi7yyOJqjRh+BXQ8iS/QRWLmC6ku2KKb12y1t7pC9u7Do5jHudwp+81Hj1yeui1O4sZn2m9FMs3M7NSb5FNGjgEXGmC+NMVXAE8CZvmN+AIwzxmwCMMakB18rzR5HKFz4r6nszFAHIQh/JEY8FnNnqEVxCQwjdE4J+7ePVK/AHoide3nHkJJ4LGtJ0FwIu5azyKljm3SneWhGbGMJk2yrgOuTp8ehTUnmwdwRXMN6dUjLweTVjjIlrssX+QqDbUhiMcm5Il9DkE+fQi9guefzCsC/fn4fABF5D4gDNxhjXvNfSEQuBy4H6Nu3b146q+QPb9qFTR6ncRT8pp4ij/moKB5Lm0WfP6qPO+CFaTtBSeLS7mtrE0Hmo3hM3FoBJfEYJw7bg1dmrw6UQn/8xvCs9wqbtf71W/vzzYN6MbBHekH3sEHN+a4bY82Hv7Smn3MO7k2bkjin759uqmpq81EzkAlNRlMvXisCBgHHAucD94lIWiyYMeZeY8xIY8zI7t2zlypUGo4XP17JnyfMj1zgIyhM0bvad9PO8OI4QRTHAnwK9nZRTNJSQztpGCA8gViUamSOHd9xpnq7ERNxNZgD+nbigkPDJypHeqKMwggTCuVlxZw4rGfgYruwc5y0Cw2pyYThL63ppyge48wDegXOdr3fZz5DUsNoCE2ppZLPP52VgDdou7fd5mUF8JIxptoYsxj4HEtIKE2MM6DOWLqJe975MnIW0Gw5gFZsyi3hnX9GXBwXV0DFY5JmCvL+s3ujY757eG0enygFU4rjwin79eTf3x8FpA7CIrX9shLmWdtBw0yUGWlWx3iArShM4BlXiGW/8S3f3I/v1yMq6QyP4/j+i0fmdK5XU8imcQTxm1OG8PMT9sn5PCU7+TQfTQcGiUg/LGFwHnCB75gXsDSEB0WkG5Y56cs89qnZM3/1Vgb2aJ/VLr5u624Mtfnm60umhcZLNuygS/sSOpQVu7lcwsg1hts/HsZjsRRNwT/rDzKb/OjYAfz65CH8+4OlQDSfgojwf985uLYf3mghj1BIZlgoZh0bvu/oQd14d+GGrLP6oAE+LA+O05co5qPzRtXPFNuxTXFojqRseL+zDgE+k2xc/rXcaiwr0cmbpmCMqQGuAiYC84GnjDFzReRGETnDPmwi8JWIzAMmA780xnyVrz41d5Zv3MmYO97lj6/My3rsqD9N4tA/TWqwe2cq1Xjs36bwjXFWWgd/gZgg6mPP9foU4jFJ8zl4hciBfS1LpH/FqFcoePMSZVrFKimagrjP4E2tHTQOR7FSRDVlnLr/nu522OtwhEGhR9d4BV15SKH6xmDM8IZZV9CSyOvbMMZMACb42q73bBvg5/aPkgXHHj9zWeYVvQ1D6qhjsiwv+HK9pQFsj1BXYI8OZazesjvrcUFY6xTs6KNYLM0U5B0MjxjQjU+uP5GObVNnok6W0Ll/OIlltqAFePqKw9nnulcD75tiPsJb1zh6NlA/zjWizOo//cNJlHkEYJiQdi7VFFErueCNKmuqvvq/U8VCv5FmhDPANFYFJi9JY3hz3lr2+e2rGU1EUTSFru2DF50F4Y/9L47HUjWFDEIBSBMIzjXAKvLujZHPVIfXO26J1PbK61MIPi+DUHCOifBf2L60KCVqKsw56zqaC1smFIQm4/9OFQv9RpoRzv9RAyQSzYpf8CSMYdyURVQlksxfHZ5O259QLoiu7aKv0nSeuVenNlw7Zgj9u7XzJIqTNHt+lPhz70CQqdqWl9Q6AR4B7bln0OK1TJevz7g4sEd7/v6tEVx4WKpfIJGsdcIXMoXev6hMuuaYpu5Cg6NCoRnhDDre8NBX56xm+pKNKcc9OX2Zu72jsoY7Jy1MqygG8MLHK/l05Rb389MzlrMgJFtp0hh3MN+QoaBMUEiqn7os3R/eqwNXHDOAWCw1+shPlBmo1zmdqbaCF+9lBXHvk/RGH/mc0fbB4de0f9dV8zv74N4pKbgBZq/YYvelsAfdliIUBnRPX0PS3FGh0Ixw/s+9g8iPHv2Ib/0zteTlr5+d427/deIC/vHG59biKh9XPzmL0+76r/v5l8/M5qTb30k7zrln+1LLZJFpAVoiglDo0i73aBPvLNypeeCYe44cWJseOcpYU+IrhRkFr7Dp2bEs5V045h/vlZxBL4pPoT7mwPMP6cteHdMjzAp90C0E85ESjAqFZkStySL6KLJ1tzWAV3s0hRWbdlJVU/t5+cadrN2a2fGbSBoWrttubydJJg3LvtqZdly26wB0Dklklwmv3d0xUTnpHx697DCG9LQKkEQZDL1+iKhOTue771FemuJ7MCZYsDhCLLO/wb5GpB4E07drW96/Nj0/TmNVsasrhS60WjOaOrsZURefgtcpC7BlVzVH/WVyii366Fsnh57ncP+7i5lrl+asThjGTV7E39/4PM2m+rsX52btU6c24UJh8B7lLFhba8Jy1PPDAoqleHMCuam+I5mPctcUxDeAe81H4pqPJO34zGNfujmwoSj06KNCF1qtGdUUmhHeiJcw/Pscc44ziC3faM3uZyzJLax1yue1uQprkkmmLrb8GLmuUAZoUxL+Z/fwZaNSPg/v1ZH3x36diw7bO+3YdgFpFqJpCl6ncUSh4P/sMR9l8m1kElJ+QdOQFLhMiBRxpTQNqik0QzJNLP02/drcPcJ3H5jmrnXo0aEstATmhu2VPDljeUqbsw4B4E8TPnMXfVV7zFAVY8dH6n9QfQCHoIF+r05tAo8NzKmT42KxXAcnfzrrjm2KA7OyRtEUogj5ulLo5plC719rRoVCM8IZOjKtLk6YEKEg8Pbn6932TJkpl21M9xX4WWdHINUkcyuaA5nTTEQZLCb89OjQdBm5OjD993vsB4cGT919lx3QvT1/OGMYY/brSWV1+ncQpR9BgQN15bEfHMoF903N6f5NiQqFwkWFQjPCGeCDxpBk0irB+Oa81JIUbty6b5CozrCewDv7z0ZVIvcRLVOW0iiD2dC9OjDUV0e3tmxmboON//gjBmTLaupJsndEBQArN9smtJQFbtbvTP4fN8Q4SkezcMSAbhzevysffGlliSn0QVd9CoWLWvaaEc6kPGhm6WgIVz72Ueo5IQ7YjEIhh4E+EaIp/OaU4KpnUH9NIQi/Qz2IX540mMF7lKe0NcSMOmiAu/HMYXRsU0y7DBlAG1JT8F4PCt+nUOhCqzWjmkIzwpkNuxqDZzQJMyk5i8lWbU51CFfXhI9E1TmYhDbuCF6zcOYBvWhbUsR1L3yati/TgrF8jhVXHjeQK48bmNKW++AU4McIkHFnHdibsw7snflKrqO5YaRC6qrrwh50Cz06qjWjmkIzwhn3nd9e00TYOO4Iixt9mVUzDfy5mI9uCsnYGhOhd+dgB3GmegaNvRI36tjklP30LpSrvYaT5iI3DrCzsvYKcaTniverK/SZuJqPChfVFJoRfg3BawIK0xTCLEGZzEc1DZBcKR4Tjh3cgwcvOYRLHpyesi+fSchyHWuiCqHysmLeuuYYegUIuroOcJcd1Z9jB/dgH59Jq66kRlUV9qBb6EKrNaNCoRnh1xC8g7c/6sghbNaf0XwUIaldNtyi7Xt2SNsXpRxmrjRG4tj+IXlunLE4Vy0nFpMGEwiQqqkUvPmowPvXmlHzUTOiNvrI+p3wqAEmSWDSu7CspZU14XUPcnE0h+HY2YNmrMXxGK9dfTR/OGNY4Ll3nX9gve/fmDRFKvNsFLp5RjWFwkWFQjMi6fMpeP0CCWOY5suWCvDV9uCMpksC8hY5zAi4Tq5kKgtZHI8xpGcHvntERWDq6tM9tX+bE009DpuU7QKUVB5UJhQuWYWCiLQVkd+JyH3250Eiclr+u6ak40QfWZ9qEqnRR97FSw6ZBv8wnpi+PPtBWXDMA0GaglcQhIWnXnz43nTLoRiPQ1OMNe3LLCvsNQVUSL4hTID5pNBTe7dmovgUHgRmAofbn1cCTwOv5KtTSjC1LgSDMYbtlbXhoFU5RAw1Bq6m4BSgkfQUEWD5F3bZjzH5F8e67TeeOZwbzxwe+X75SBURleJ4rM4F7PNFQ5gAldZJFPPRAGPMrUA1gDFmJ00zIWv1OGsOkgYen7ac0f+orX1wzF/TM502JY7ZKB4QrunVDrwCoj528BF2eKe/6ExrpdA1BQf/YkKl6YmiKVSJSBts24WIDADCS28pecOb++jlT1al7Cu0maFjNgpa2OVdvJZa26Du9/vTWfvxvSMq6BlQcKY1UlNgfw9BTLz6ay3ifU39zfGB+a+aK1GEwg3Aa0AfEXkUOBK4JJ+dUtIZP3s1D763GIDNO6vdHDeFjuNbKCmKsdv+x/EuXkupbVAP72NZcZz9e3eq8/ktAa8JrSHWmuSbwT1bhpawR4fmL9i8ZJ2bGWNeB74JfA94HBhpjCksW0Ur4MrHPmLG0txqIDQmp+zXM7C9OB7j6tGDeP7HR7ptXkdzQ5mPlFROH7FnU3dBaaZEiT6aZIz5yhgz3hjzijFmg4hMaozOKU3LkBxmcqfsFz4IXT16H/b1LGKLh0QfFfoq3ObC708fSmlReCI+RclEqFAQkTIR6QJ0E5HOItLF/qkAejVWB5WmI5ewwVxWqHqve4mdfhpUU2godLWwUh8yaQpXYIWiDrF/Oz8vAv+b/64pTU3QxL1NcTwwLXZdJ/nfPqRP7TVUU2gQ9GtU6kOoUDDG3GGM6Qf8whjT3xjTz/4ZYYxRodDA/Pb5OZHLWTYWQTNOkeAsp1Fmp13bZV6MpqkPGgZdGKbUh6zRR8aYu0RkODAUKPO0/yefHWttPDp1WaPdqzguKSGsb/78aylrHhzCxuiSAHu1Vyi8dNWRafsBXr36aFZv3h3aLzUf1Q8n+EjNR0p9iOJo/j1wl/1zHHArcEae+9Vq8YYVGmO4efw8Fq3b3qD36OBb4DWwRzndy0vTjgubcQbVd/auMQgLDe1RXuYuMvPiZE3VsaxhUIVLqQ9R1imcA4wAPjbGXCIiewCP5LdbrZeqRNKNHFm5eRf3vbuYVz9dU+/rdmpbzOadVj6JMrtgjAhcfbyVrycoS0SQOUdIDSN1iInwhzOG0aFN7tnYn/nhETz/8cpAYaPkjmoKSn2I8h+8yxiTFJEaEekArAP6ZDtJqRvVCUOp/VbWb6u023JbLfnPCw/mh4/MdD+PquhCu9I4kxesB6C02Bp8j92nOz8bPSj0OmEzzkBNQcQtZJ8rI/p0CtQglNxwMqOqTFDqQ5Sp2QwR6QTchxV99BHwQV571YrxJrY76//eByCR4+pUZ9B38KePcBaPpV41/R5nHpAeeXzhYXtTWpzuU2jOTuLiuFDRtW1Td6PBUE1BqQ8ZNQWxjMp/NsZsBv4pIq8BHYwxsxuld62QoGynuaYsKPM5gmMiKf6BIltKeE1GQeaj7xzal3MP6cOg374KwKKbxxCPCdOX1K6sdsxSZcXN1/Tz2U1jmroLDUp9ckgpSkahYIwxIjIB2M/+vKQxOtWaCTIVOb6AqPg1Bf+A7ySk89Z1DhI7IpJSOtOprdy2JF1TaM4raJuzluNFo4+UhiDKnOIjETkk7z1RAKisZ12Eiq5t0zSFpDEpUU1Bg6Cz/9Kj+mW9RxuPUHBqQKuTuHDQdQpKfYjyn3wo8IGIfCEis0Vkjoio+ShP1DcP/tf26Z5mPkjTFGLpmoLD8fv2yHoPr6bgrHdozppCS6OFKD5KExEl+uikvPdCcalLBbWY1FZlSxqTZj5IGpOyWO07h+7N9CWbUoRF0mN6+PGxA1LSAf/zwoNSwmLbFtf+2Th1opuzT6GloOYjpSGIsqJ5aWN0pLXjlKusiqgpXHBoXx6zV0G/cOWRzFq+metfnEvSpM8UE8awvbIGgKeuONzVRlKFgvWhOC786uTU3EYnD9+Tk4fXZkH1mo+ca6imUDioSFDqQ16ndyJysogsEJFFIjI2w3Fni4gRkZH57E8h4/wjV0fUFLz5h2Iirp8gmTRpNuWkwRUKHdoU0beLFX7pNRU5pT7jEUJXnMVrZ4zYy23zO7eVxsd57epTUOpD7stPIyIicWAccAKwApguIi8ZY+b5jisHfgZMzVdfmgPOpD0RsQC9NypIBIrtwbw6YdJyCBlj2L7bEgrlZcX06tSGWdefQMc2tekunPsWRTRIz77hRNoWx3nJLgsalCRPaVxqzUdN2w+leZPP/+RRwCJjzJfGmCrgCeDMgONuAv4ChGdKawU4/9AX/Wsaj09b5s7cw/CXsSwuskaC6kQy0KfQxc5QWl5mzQM6tS1JmVE6t/PWT85Eh7JiiuIx93qa9rpwUJ+CUh+iJMT7pogsFJEtIrJVRLaJyNYI1+4FLPd8XoGvOI+IHAT0McZkzBktIpeLyAwRmbF+/foIt27eXPvcnKwL1rz5h2IirpCoSSbT0hwkk/DgJYdw1/kHpiXDqz0mN03BYcJPj+aeiw7O6Rwlv+jiNaU+RDEf3QqcboyZ35A3FpEY8A+s2s8ZMcbcC9wLMHLkyMKvSJ4D05dsDFycli21RUoZS6n9XFVj0oWCMezRoYzTPT4AP46jOYpPwUufLm3p06XlpIhoCahPQakPUYTC2joKhJWkJs7rbbc5lAPDgSn2H3FP4CUROcMYM6MO92uWfOufwWmksvkWvDZ878rj6kQywNGcXY665iM1AzVbvnvE3qzdtpsB3do3dVeUZkwUoTBDRJ4EXgAqnUZjzHNZzpsODBKRfljC4DzgAs/5W4BuzmcRmYJV5a3VCIRMZFuvkNF85Ds2l9RJLSXlQ2vEHzqsKHUhilDoAOwETvS0GSCjUDDG1IjIVcBEIA48YIyZKyI3AjOMMS/Vsc+tgr+/viDjfu9isbhHKFTXpEuAKJqCg2oKitK6ibJ47ZK6XtwYMwGY4Gu7PuTYY+t6n5bIuws3pLWdO7IP89dsZfaKLZR7HMbi9SkkghzNOQgFDS1VlFZNqFAQkV8ZY24VkbsISKJpjPlpXnvWyjEBeUvjcWFgj/bMXrElJewwFhM3H1F5WRHiMyA54ahRUPORorRuMmkKjnNZbfxNQJDFR4CrjhtI13YlHN6/q9seExjSs5zrTxvK6SP2SjEXjerXhetPGxr5vmo+UpTWTSahMEBERgGPGmNqGqtDisWKTbvS2kSgf/f2/PbU1EHeKaLzfTvt9dqt1jrA7uWlPHXF4TndVzUFRWndZBIKvYHbgSEiMgd4D3gfeN8Ys7ExOqdEw7+CtXPbEtqVxLnu1H1zvpZqCorSugkVCsaYXwCISAkwEjgCuAS4V0Q2G2Oi2ySUQDZsr8x+kAe/r8DBP46XFMWYe+PJdeqTagqK0rqJEpLaBisstaP9swqYk89OtQamLd7It+8JXrgWhj+qqCgm1CTT6yfUB10Nqyitm0zRR/cCw4BtWBlM3wf+YYzZFHaOEp23P1+X8zn+4bqsOM72ypo0YaEoilJXMgWl9wVKgTVYK5JXAJsbo1MtiWdmruCapz5Ja1+9JfeksP5ZvBNq6q2qpiiKUh8y+RROFmsUGoblT7gGGC4iG4EPjDG/b6Q+Nmt+8bQlEP7+7REp7burE6HnHDWwG/9dlL54zc8jlx7Ky7NX0a199HUIYbzyk6OYtVxlvqK0djIuXzUWn2KtSn4VKwJpAFZRHKUehDmNAU7ZLzV/zWhPhTQvfbu25crjBjaIH2B4r45ceNje9b6OoijNm1ChICI/FZEnRGQZ8DZwGvAZ8E2gSyP1r8Vg/KvRfOP4IRWda3f59jmnqu9AUZR8k0lTqACeBg41xgwwxlxkjLnbGPOJMSZaIWHFpdKX9dQ/vj9y2aFZr5FJu1AURWkIMvkUft6YHWnp7K5OUFYcdz/7TT6ZwkrVjawoSmOhKTEbid3VmTWFeAah4OQyUvORoij5RoVCI+GPNvIP8FEK36tMUBQl36hQaCR2+YVCDufmUCNHURSlXqhQyDNOLqF0TSG6WHBkgpqPFEXJNyoU8kxx3BrJ/ZpCwlMNzZ+Z1AlPPefg3gAM7G4VYte8RIqi5JsoCfGUelASj7G7Okmlz9GcyFAic2CPcpbccioAf/vWCO6e8gWgPgVFUfKPCoU8sW7bbkbdPMn9nElT8NKtfWlam1uaU6WCoih5RoVCnvhy/Y6Uz36fQo1HKDhbz//4CHp1bhN6TV28pihKvlGhkCf8w7ejKRhjuHXiAuav3pp2zoF9O6e1Wec0dO8URVGCUaGQJ/xOYWfx2o6qhOsjiMpFh+/NvFVbueJr/Rusf4qiKEFo9FEj8frcNcxavjlFg2hfGk0mdygrZtx3DqJzu/qnyFYURcmECoU84V+gPHXxRr4x7r0U30KRHa6alkFVURSliVChkCfClhR4s6UWx/XrVxSlsNBRqZE54pa33O312yoBzYKqKErhoEKhAFDrkaIohYIKhTyR0DJEiqI0Q1Qo5IlMaSwc+ndv1wg9URRFiY4KhTwRRShcckRF/juiKIqSAyoU8kQigqOg1FOeU1EUpRBQoZAnkhE0hdIi/foVRSksdFTKA9WJJJc8ND3rcWWqKSiKUmCoUMgD23bXRDpONQVFUQoNHZXyQNS0FaVFqikoilJYqFDIA0GRR53aFqe1lRXr168oSmGho1IeqPYJhVP335M7zjsw7biimH79iqIUFnkdlUTkZBFZICKLRGRswP6fi8g8EZktIpNEZO989qexqK5JXc4cFyEekCGvXamajxRFKSzyJhREJA6MA8YAQ4HzRWSo77CPgZHGmP2BZ4Bb89WfxqQmmSoUimJC3JdL+8Yzh9E1oB6zoihKU5LPymujgEXGmC8BROQJ4ExgnnOAMWay5/gPgQvz2J8G49KHpnNwRWd+fOzAtH0/e+LjNJ9CPCZu7QSHPTu2CU2vrSiK0lTk03zUC1ju+bzCbgvjUuDVoB0icrmIzBCRGevXr2/ALtaNSZ+t49bXFgTue3HWKl6ZvTqlLR4TYgESIKhNURSlKSkIT6eIXAiMBP4atN8Yc68xZqQxZmT37t0bt3M5MPXLrwLb4zGhyF+KjfTqbIqiKE1NPs1HK4E+ns+97bYURGQ08FvgGGNMZR77k3fOvffDwPbzR/UNbFdNQVGUQiOfmsJ0YJCI9BOREuA84CXvASJyIHAPcIYxZl0e+9JkLLx5DMN7dUxzNBtj1KegKErBkTehYIypAa4CJgLzgaeMMXNF5EYROcM+7K9Ae+BpEZklIi+FXK7Z4tRh9puPDKopKIpSeOTTfIQxZgIwwdd2vWd7dD7vX0gEJb9TkaAoSqFREI7m1kDbknShoJqCoiiFhgqFRqJtSapSZgzqU1AUpeBQodBIBCW/E5UKiqIUGCoU8siBfTu52+kCoHbV8zUn7NNIPVIURclMXh3NLZGgWgm7qhJpaSwAnv/xkVmvt+SWUxukX4qiKA2BCoUc8ZdK+GT5Zs4c917TdEZRFKWBUfNRjviT3U1fsjHyuZOuOYaRe3cGLEezoihKoaFCIUeSntG8sibBH8fPj3zugO7t6dmxDNDII0VRChM1H+WIV1N4f1FwArxM/P70YXRuW8Lx++7RkN1SFEVpEFQo5EjCoynsrErkfH738lJu+sbwhuySoihKg6HmoxxJJr1CoaYJe6IoitLwqFDIEa/5aFd1qqZw7ODufPS7Exq7S4qiKA2GCoUc8ZqPdvnMRzFJL7upKIrSnFChkCPJpGfbF1YaE6Ekrl+poijNFx3BcsSrKSR9iw3isfS6CYqiKM0JFQo54nU0VyeSKfviMUmrsKYoitKcUKEQkQVrtrFxR1WKo3nt1tSS0iKimU8VRWnWqFCIyEm3v8OYO95JMR89Pm1ZyjGlHn/CmQfs1Wh9UxRFaSh08VoOrN1amWI+8tO9QykAn/9xjPoWFEVplqhQiIA3XXYiQya7wXuUA1BSpAqYoijNExUKEahOhK9NcLjt3BF844BejdUlRVGUvKBT2gDGz17NlAXr3M9PzVjubn//oemB55x1YG91MiuK0uxRTSGAKx/7CKitinbdC5+6+zbtrE47/venD22cjimKouQZ1RQysPSrHeyozJ707pIj+zVCbxRFUfKPagoZOOavUzjYrpSmKIrSGlBNIQszl25q6i4oiqI0GioUFEVRFBc1H9WDZ390RFpSPEVRlOaMCgUPL3y8kiF7lkc+Xv0NiqK0NFQoeLj6yVmRjrvpG8N5edaqPPdGURSl8Wm1QmHTjiricaFDWTEAyzfujHzuRYftzUWH7Z2vrimKojQZrdbRfOBNbzDq5jcBKy320bdObuIeKYqiND2tVdVGZAAACa9JREFUVigA7K62iuQs3rC9iXuiKIpSGLQ681EyaTjvvg/dz4vWbU9JeKcoitKaaXWawpqtu5m2eKP7+e4pX6SV1VQURWmttDqh4HcoT5izmiemL09p69zWcj53KCvilP16NlrfFEVRmprWJxQ27Ur5vKs6kaI5AJQWxeleXsqVxw2kslq1CEVRWg+tTyhECD0Vgem/Hc0Vxwxg/fZKwFq9rCiK0tLJq1AQkZNFZIGILBKRsQH7S0XkSXv/VBGpyGd/AJZvyi4UDupbu1J5eK+OAPTt0jZvfVIURSkU8hZ9JCJxYBxwArACmC4iLxlj5nkOuxTYZIwZKCLnAX8Bzs1Xn8DSFA7o04lfnDiY/3ywhNfnrU3Z/8KVR7q1lgGuP20olxxRQffyUmZcN5oajVRSFKUFk09NYRSwyBjzpTGmCngCONN3zJnAv+3tZ4DjJU81LZ+avpwT/vE2nyzfwoDu7TlqUDf6BMz+9+/VkTYlcfdzWXGcQbaQ6Na+lJ4dy/LRPUVRlIIgn+sUegHesJ4VwKFhxxhjakRkC9AV2OA9SEQuBy4H6Nu3b50606ltMYP2aM8+e5Rz/qg+AFw9ehBrtuwmFhNG79uD9dsqicW0zrKiKK2XZrF4zRhzL3AvwMiRI+tkvzlxWE9OHJYaXlpeVsy47xxU/w4qiqK0EPJpPloJ9PF87m23BR4jIkVAR+CrPPZJURRFyUA+hcJ0YJCI9BOREuA84CXfMS8B37W3zwHeMkar1iiKojQVeTMf2T6Cq4CJQBx4wBgzV0RuBGYYY14C/gU8LCKLgI1YgkNRFEVpIvLqUzDGTAAm+Nqu92zvBr6Vzz4oiqIo0Wl1K5oVRVGUcFQoKIqiKC4qFBRFURQXFQqKoiiKizS3CFARWQ8srePp3fCtlm4F6DO3DvSZWwf1eea9jTHdsx3U7IRCfRCRGcaYkU3dj8ZEn7l1oM/cOmiMZ1bzkaIoiuKiQkFRFEVxaW1C4d6m7kAToM/cOtBnbh3k/ZlblU9BURRFyUxr0xQURVGUDKhQUBRFUVxajVAQkZNFZIGILBKRsU3dn1wQkT4iMllE5onIXBH5md3eRUTeEJGF9u/OdruIyJ32s84WkYM81/quffxCEfmup/1gEZljn3Nnvsqi5oqIxEXkYxF5xf7cT0Sm2v180k7LjoiU2p8X2fsrPNe41m5fICInedoL7m9CRDqJyDMi8pmIzBeRw1v6exaR/7H/rj8VkcdFpKylvWcReUBE1onIp562vL/XsHtkxBjT4n+wUnd/AfQHSoBPgKFN3a8c+r8ncJC9XQ58DgwFbgXG2u1jgb/Y26cArwICHAZMtdu7AF/avzvb253tfdPsY8U+d0xTP7fdr58DjwGv2J+fAs6zt/8J/Mje/jHwT3v7POBJe3uo/b5LgX7230G8UP8msGqWX2ZvlwCdWvJ7xirJuxho43m/32tp7xn4GnAQ8KmnLe/vNeweGfva1P8EjfRCDgcmej5fC1zb1P2qx/O8CJwALAD2tNv2BBbY2/cA53uOX2DvPx+4x9N+j922J/CZpz3luCZ8zt7AJODrwCv2H/wGoMj/XrHqdhxubxfZx4n/XTvHFeLfBFblwcXYASD+99cS3zO1ddq72O/tFeCklviegQpShULe32vYPTL9tBbzkfOH57DCbmt22OrygcBUYA9jzGp71xpgD3s77Hkzta8IaG9qbgd+BSTtz12BzcaYGvuzt5/us9n7t9jH5/pdNCX9gPXAg7bJ7H4RaUcLfs/GmJXA34BlwGqs9zaTlv2eHRrjvYbdI5TWIhRaBCLSHngWuNoYs9W7z1hTgRYTXywipwHrjDEzm7ovjUgRlonhbmPMgcAOLJXfpQW+587AmVgCcS+gHXByk3aqCWiM9xr1Hq1FKKwE+ng+97bbmg0iUowlEB41xjxnN68VkT3t/XsC6+z2sOfN1N47oL0pORI4Q0SWAE9gmZDuADqJiFMx0NtP99ns/R2Br8j9u2hKVgArjDFT7c/PYAmJlvyeRwOLjTHrjTHVwHNY774lv2eHxnivYfcIpbUIhenAIDuioQTLQfVSE/cpMnYkwb+A+caYf3h2vQQ4EQjfxfI1OO0X21EMhwFbbBVyInCiiHS2Z2gnYtlbVwNbReQw+14Xe67VJBhjrjXG9DbGVGC9r7eMMd8BJgPn2If5n9n5Ls6xjzd2+3l21Eo/YBCWU67g/iaMMWuA5SIy2G46HphHC37PWGajw0Skrd0n55lb7Hv20BjvNewe4TSlk6mRnTynYEXtfAH8tqn7k2Pfj8JS+2YDs+yfU7BsqZOAhcCbQBf7eAHG2c86Bxjpudb3gUX2zyWe9pHAp/Y5/4vP2dnEz38stdFH/bH+2RcBTwOldnuZ/XmRvb+/5/zf2s+1AE+0TSH+TQAHADPsd/0CVpRJi37PwB+Az+x+PYwVQdSi3jPwOJbPpBpLI7y0Md5r2D0y/WiaC0VRFMWltZiPFEVRlAioUFAURVFcVCgoiqIoLioUFEVRFBcVCoqiKIqLCgWlVSIiCRGZ5fnJmD1TRH4oIhc3wH2XiEi3+l5HUfKFhqQqrRIR2W6Mad8E912CFXe+obHvrShRUE1BUTzYM/lb7dz000RkoN1+g4j8wt7+qVi1LWaLyBN2WxcRecFu+1BE9rfbu4rI62LVC7gfa2GSc68L7XvMEpF7xKodEReRh8SqLTBHRP6nCb4GpRWjQkFprbTxmY/O9ezbYozZD2tl6O0B544FDjTG7A/80G77A/Cx3fYb4D92+++B/xpjhgHPA30BRGRf4FzgSGPMAUAC+A7WiuZexpjhdh8ebMBnVpSsFGU/RFFaJLvswTiIxz2/bwvYPxt4VERewEpFAVYqkrMBjDFv2RpCB6ziKt+028eLyCb7+OOBg4HpdpGsNljJyl4G+ovIXcB44PW6P6Ki5I5qCoqSjgnZdjgVKzfNQViDel0mVwL82xhzgP0z2BhzgzFmEzACmIKlhdxfh2srSp1RoaAo6Zzr+f2Bd4eIxIA+xpjJwK+xUje3B97FMv8gIscCG4xV8+Id4AK7fQxWgjuwkpSdIyI97H1dRGRvOzIpZox5FrgOS/AoSqOh5iOltdJGRGZ5Pr9mjHHCUjuLyGygEqu0oZc48IiIdMSa7d9pjNksIjcAD9jn7aQ2XfEfgMdFZC7wPlaqaIwx80TkOuB1W9BUA1cCu7AqrzkTtmsb7pEVJTsakqooHjRkVGntqPlIURRFcVFNQVEURXFRTUFRFEVxUaGgKIqiuKhQUBRFUVxUKCiKoiguKhQURVEUl/8HWZEthZf0+MwAAAAASUVORK5CYII=\n",
304 |       "text/plain": [
305 |        "<Figure size 432x288 with 1 Axes>"
306 |       ]
307 |      },
308 |      "metadata": {
309 |       "needs_background": "light"
310 |      },
311 |      "output_type": "display_data"
312 |     }
313 |    ],
314 |    "source": [
315 |     "win_array = np.array(win_list)\n",
316 |     "#每100条轨迹取一次平均\n",
317 |     "win_array = np.mean(win_array.reshape(-1, 100), axis=1)\n",
318 |     "\n",
319 |     "episodes_list = np.arange(win_array.shape[0]) * 100\n",
320 |     "plt.plot(episodes_list, win_array)\n",
321 |     "plt.xlabel('Episodes')\n",
322 |     "plt.ylabel('Win rate')\n",
323 |     "plt.title('IPPO on Combat')\n",
324 |     "plt.show()"
325 |    ]
326 |   }
327 |  ],
328 |  "metadata": {
329 |   "colab": {
330 |    "collapsed_sections": [],
331 |    "name": "第20章-多智能体强化学习入门.ipynb",
332 |    "provenance": []
333 |   },
334 |   "kernelspec": {
335 |    "display_name": "Python 3",
336 |    "language": "python",
337 |    "name": "python3"
338 |   },
339 |   "language_info": {
340 |    "codemirror_mode": {
341 |     "name": "ipython",
342 |     "version": 3
343 |    },
344 |    "file_extension": ".py",
345 |    "mimetype": "text/x-python",
346 |    "name": "python",
347 |    "nbconvert_exporter": "python",
348 |    "pygments_lexer": "ipython3",
349 |    "version": "3.7.6"
350 |   }
351 |  },
352 |  "nbformat": 4,
353 |  "nbformat_minor": 1
354 | }
355 | 


--------------------------------------------------------------------------------
/第3章-马尔可夫决策过程.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "colab": {
  8 |      "base_uri": "https://localhost:8080/"
  9 |     },
 10 |     "executionInfo": {
 11 |      "elapsed": 5,
 12 |      "status": "ok",
 13 |      "timestamp": 1649954662434,
 14 |      "user": {
 15 |       "displayName": "Sam Lu",
 16 |       "userId": "15789059763790170725"
 17 |      },
 18 |      "user_tz": -480
 19 |     },
 20 |     "id": "5OzU9RtB9fWZ",
 21 |     "outputId": "146722e2-c641-46dc-a690-01e8eed9160c"
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "根据本序列计算得到回报为:-2.5。\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "import numpy as np\n",
 34 |     "np.random.seed(0)\n",
 35 |     "# 定义状态转移概率矩阵P\n",
 36 |     "P = [\n",
 37 |     "    [0.9, 0.1, 0.0, 0.0, 0.0, 0.0],\n",
 38 |     "    [0.5, 0.0, 0.5, 0.0, 0.0, 0.0],\n",
 39 |     "    [0.0, 0.0, 0.0, 0.6, 0.0, 0.4],\n",
 40 |     "    [0.0, 0.0, 0.0, 0.0, 0.3, 0.7],\n",
 41 |     "    [0.0, 0.2, 0.3, 0.5, 0.0, 0.0],\n",
 42 |     "    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],\n",
 43 |     "]\n",
 44 |     "P = np.array(P)\n",
 45 |     "\n",
 46 |     "rewards = [-1, -2, -2, 10, 1, 0]  # 定义奖励函数\n",
 47 |     "gamma = 0.5  # 定义折扣因子\n",
 48 |     "\n",
 49 |     "\n",
 50 |     "# 给定一条序列,计算从某个索引(起始状态)开始到序列最后(终止状态)得到的回报\n",
 51 |     "def compute_return(start_index, chain, gamma):\n",
 52 |     "    G = 0\n",
 53 |     "    for i in reversed(range(start_index, len(chain))):\n",
 54 |     "        G = gamma * G + rewards[chain[i] - 1]\n",
 55 |     "    return G\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "# 一个状态序列,s1-s2-s3-s6\n",
 59 |     "chain = [1, 2, 3, 6]\n",
 60 |     "start_index = 0\n",
 61 |     "G = compute_return(start_index, chain, gamma)\n",
 62 |     "print(\"根据本序列计算得到回报为:%s。\" % G)\n",
 63 |     "\n",
 64 |     "# 根据本序列计算得到回报为:-2.5。"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 2,
 70 |    "metadata": {
 71 |     "colab": {
 72 |      "base_uri": "https://localhost:8080/"
 73 |     },
 74 |     "executionInfo": {
 75 |      "elapsed": 3,
 76 |      "status": "ok",
 77 |      "timestamp": 1649954662902,
 78 |      "user": {
 79 |       "displayName": "Sam Lu",
 80 |       "userId": "15789059763790170725"
 81 |      },
 82 |      "user_tz": -480
 83 |     },
 84 |     "id": "8sywqMFs9fWd",
 85 |     "outputId": "d5c626fd-70c9-44f7-a4c3-3b112e4654c4"
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "MRP中每个状态价值分别为\n",
 93 |       " [[-2.01950168]\n",
 94 |       " [-2.21451846]\n",
 95 |       " [ 1.16142785]\n",
 96 |       " [10.53809283]\n",
 97 |       " [ 3.58728554]\n",
 98 |       " [ 0.        ]]\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "def compute(P, rewards, gamma, states_num):\n",
104 |     "    ''' 利用贝尔曼方程的矩阵形式计算解析解,states_num是MRP的状态数 '''\n",
105 |     "    rewards = np.array(rewards).reshape((-1, 1))  #将rewards写成列向量形式\n",
106 |     "    value = np.dot(np.linalg.inv(np.eye(states_num, states_num) - gamma * P),\n",
107 |     "                   rewards)\n",
108 |     "    return value\n",
109 |     "\n",
110 |     "\n",
111 |     "V = compute(P, rewards, gamma, 6)\n",
112 |     "print(\"MRP中每个状态价值分别为\\n\", V)\n",
113 |     "\n",
114 |     "# MRP中每个状态价值分别为\n",
115 |     "#  [[-2.01950168]\n",
116 |     "#  [-2.21451846]\n",
117 |     "#  [ 1.16142785]\n",
118 |     "#  [10.53809283]\n",
119 |     "#  [ 3.58728554]\n",
120 |     "#  [ 0.        ]]"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "metadata": {
127 |     "executionInfo": {
128 |      "elapsed": 340,
129 |      "status": "ok",
130 |      "timestamp": 1649954667427,
131 |      "user": {
132 |       "displayName": "Sam Lu",
133 |       "userId": "15789059763790170725"
134 |      },
135 |      "user_tz": -480
136 |     },
137 |     "id": "5ILxWaLR9fWd"
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "S = [\"s1\", \"s2\", \"s3\", \"s4\", \"s5\"]  # 状态集合\n",
142 |     "A = [\"保持s1\", \"前往s1\", \"前往s2\", \"前往s3\", \"前往s4\", \"前往s5\", \"概率前往\"]  # 动作集合\n",
143 |     "# 状态转移函数\n",
144 |     "P = {\n",
145 |     "    \"s1-保持s1-s1\": 1.0,\n",
146 |     "    \"s1-前往s2-s2\": 1.0,\n",
147 |     "    \"s2-前往s1-s1\": 1.0,\n",
148 |     "    \"s2-前往s3-s3\": 1.0,\n",
149 |     "    \"s3-前往s4-s4\": 1.0,\n",
150 |     "    \"s3-前往s5-s5\": 1.0,\n",
151 |     "    \"s4-前往s5-s5\": 1.0,\n",
152 |     "    \"s4-概率前往-s2\": 0.2,\n",
153 |     "    \"s4-概率前往-s3\": 0.4,\n",
154 |     "    \"s4-概率前往-s4\": 0.4,\n",
155 |     "}\n",
156 |     "# 奖励函数\n",
157 |     "R = {\n",
158 |     "    \"s1-保持s1\": -1,\n",
159 |     "    \"s1-前往s2\": 0,\n",
160 |     "    \"s2-前往s1\": -1,\n",
161 |     "    \"s2-前往s3\": -2,\n",
162 |     "    \"s3-前往s4\": -2,\n",
163 |     "    \"s3-前往s5\": 0,\n",
164 |     "    \"s4-前往s5\": 10,\n",
165 |     "    \"s4-概率前往\": 1,\n",
166 |     "}\n",
167 |     "gamma = 0.5  # 折扣因子\n",
168 |     "MDP = (S, A, P, R, gamma)\n",
169 |     "\n",
170 |     "# 策略1,随机策略\n",
171 |     "Pi_1 = {\n",
172 |     "    \"s1-保持s1\": 0.5,\n",
173 |     "    \"s1-前往s2\": 0.5,\n",
174 |     "    \"s2-前往s1\": 0.5,\n",
175 |     "    \"s2-前往s3\": 0.5,\n",
176 |     "    \"s3-前往s4\": 0.5,\n",
177 |     "    \"s3-前往s5\": 0.5,\n",
178 |     "    \"s4-前往s5\": 0.5,\n",
179 |     "    \"s4-概率前往\": 0.5,\n",
180 |     "}\n",
181 |     "# 策略2\n",
182 |     "Pi_2 = {\n",
183 |     "    \"s1-保持s1\": 0.6,\n",
184 |     "    \"s1-前往s2\": 0.4,\n",
185 |     "    \"s2-前往s1\": 0.3,\n",
186 |     "    \"s2-前往s3\": 0.7,\n",
187 |     "    \"s3-前往s4\": 0.5,\n",
188 |     "    \"s3-前往s5\": 0.5,\n",
189 |     "    \"s4-前往s5\": 0.1,\n",
190 |     "    \"s4-概率前往\": 0.9,\n",
191 |     "}\n",
192 |     "\n",
193 |     "\n",
194 |     "# 把输入的两个字符串通过“-”连接,便于使用上述定义的P、R变量\n",
195 |     "def join(str1, str2):\n",
196 |     "    return str1 + '-' + str2"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 4,
202 |    "metadata": {
203 |     "colab": {
204 |      "base_uri": "https://localhost:8080/"
205 |     },
206 |     "executionInfo": {
207 |      "elapsed": 3,
208 |      "status": "ok",
209 |      "timestamp": 1649954670178,
210 |      "user": {
211 |       "displayName": "Sam Lu",
212 |       "userId": "15789059763790170725"
213 |      },
214 |      "user_tz": -480
215 |     },
216 |     "id": "juDFPGkP9fWe",
217 |     "outputId": "20903c97-0f0e-4fb2-93e1-5b6b650fda1a"
218 |    },
219 |    "outputs": [
220 |     {
221 |      "name": "stdout",
222 |      "output_type": "stream",
223 |      "text": [
224 |       "MDP中每个状态价值分别为\n",
225 |       " [[-1.22555411]\n",
226 |       " [-1.67666232]\n",
227 |       " [ 0.51890482]\n",
228 |       " [ 6.0756193 ]\n",
229 |       " [ 0.        ]]\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "gamma = 0.5\n",
235 |     "# 转化后的MRP的状态转移矩阵\n",
236 |     "P_from_mdp_to_mrp = [\n",
237 |     "    [0.5, 0.5, 0.0, 0.0, 0.0],\n",
238 |     "    [0.5, 0.0, 0.5, 0.0, 0.0],\n",
239 |     "    [0.0, 0.0, 0.0, 0.5, 0.5],\n",
240 |     "    [0.0, 0.1, 0.2, 0.2, 0.5],\n",
241 |     "    [0.0, 0.0, 0.0, 0.0, 1.0],\n",
242 |     "]\n",
243 |     "P_from_mdp_to_mrp = np.array(P_from_mdp_to_mrp)\n",
244 |     "R_from_mdp_to_mrp = [-0.5, -1.5, -1.0, 5.5, 0]\n",
245 |     "\n",
246 |     "V = compute(P_from_mdp_to_mrp, R_from_mdp_to_mrp, gamma, 5)\n",
247 |     "print(\"MDP中每个状态价值分别为\\n\", V)\n",
248 |     "\n",
249 |     "# MDP中每个状态价值分别为\n",
250 |     "#  [[-1.22555411]\n",
251 |     "#  [-1.67666232]\n",
252 |     "#  [ 0.51890482]\n",
253 |     "#  [ 6.0756193 ]\n",
254 |     "#  [ 0.        ]]"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 6,
260 |    "metadata": {
261 |     "colab": {
262 |      "base_uri": "https://localhost:8080/"
263 |     },
264 |     "executionInfo": {
265 |      "elapsed": 317,
266 |      "status": "ok",
267 |      "timestamp": 1649954714601,
268 |      "user": {
269 |       "displayName": "Sam Lu",
270 |       "userId": "15789059763790170725"
271 |      },
272 |      "user_tz": -480
273 |     },
274 |     "id": "3gKVFNen9scC",
275 |     "outputId": "3d5b5f1b-d5d2-4a26-fde2-76843910507b"
276 |    },
277 |    "outputs": [
278 |     {
279 |      "name": "stdout",
280 |      "output_type": "stream",
281 |      "text": [
282 |       "第一条序列\n",
283 |       " [('s1', '前往s2', 0, 's2'), ('s2', '前往s3', -2, 's3'), ('s3', '前往s5', 0, 's5')]\n",
284 |       "第二条序列\n",
285 |       " [('s4', '概率前往', 1, 's4'), ('s4', '前往s5', 10, 's5')]\n",
286 |       "第五条序列\n",
287 |       " [('s2', '前往s3', -2, 's3'), ('s3', '前往s4', -2, 's4'), ('s4', '前往s5', 10, 's5')]\n"
288 |      ]
289 |     }
290 |    ],
291 |    "source": [
292 |     "def sample(MDP, Pi, timestep_max, number):\n",
293 |     "    ''' 采样函数,策略Pi,限制最长时间步timestep_max,总共采样序列数number '''\n",
294 |     "    S, A, P, R, gamma = MDP\n",
295 |     "    episodes = []\n",
296 |     "    for _ in range(number):\n",
297 |     "        episode = []\n",
298 |     "        timestep = 0\n",
299 |     "        s = S[np.random.randint(4)]  # 随机选择一个除s5以外的状态s作为起点\n",
300 |     "        # 当前状态为终止状态或者时间步太长时,一次采样结束\n",
301 |     "        while s != \"s5\" and timestep <= timestep_max:\n",
302 |     "            timestep += 1\n",
303 |     "            rand, temp = np.random.rand(), 0\n",
304 |     "            # 在状态s下根据策略选择动作\n",
305 |     "            for a_opt in A:\n",
306 |     "                temp += Pi.get(join(s, a_opt), 0)\n",
307 |     "                if temp > rand:\n",
308 |     "                    a = a_opt\n",
309 |     "                    r = R.get(join(s, a), 0)\n",
310 |     "                    break\n",
311 |     "            rand, temp = np.random.rand(), 0\n",
312 |     "            # 根据状态转移概率得到下一个状态s_next\n",
313 |     "            for s_opt in S:\n",
314 |     "                temp += P.get(join(join(s, a), s_opt), 0)\n",
315 |     "                if temp > rand:\n",
316 |     "                    s_next = s_opt\n",
317 |     "                    break\n",
318 |     "            episode.append((s, a, r, s_next))  # 把(s,a,r,s_next)元组放入序列中\n",
319 |     "            s = s_next  # s_next变成当前状态,开始接下来的循环\n",
320 |     "        episodes.append(episode)\n",
321 |     "    return episodes\n",
322 |     "\n",
323 |     "\n",
324 |     "# 采样5次,每个序列最长不超过1000步\n",
325 |     "episodes = sample(MDP, Pi_1, 20, 5)\n",
326 |     "print('第一条序列\\n', episodes[0])\n",
327 |     "print('第二条序列\\n', episodes[1])\n",
328 |     "print('第五条序列\\n', episodes[4])\n",
329 |     "\n",
330 |     "# 第一条序列\n",
331 |     "#  [('s1', '前往s2', 0, 's2'), ('s2', '前往s3', -2, 's3'), ('s3', '前往s5', 0, 's5')]\n",
332 |     "# 第二条序列\n",
333 |     "#  [('s4', '概率前往', 1, 's4'), ('s4', '前往s5', 10, 's5')]\n",
334 |     "# 第五条序列\n",
335 |     "#  [('s2', '前往s3', -2, 's3'), ('s3', '前往s4', -2, 's4'), ('s4', '前往s5', 10, 's5')]"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": 7,
341 |    "metadata": {
342 |     "colab": {
343 |      "base_uri": "https://localhost:8080/"
344 |     },
345 |     "executionInfo": {
346 |      "elapsed": 292,
347 |      "status": "ok",
348 |      "timestamp": 1649954717890,
349 |      "user": {
350 |       "displayName": "Sam Lu",
351 |       "userId": "15789059763790170725"
352 |      },
353 |      "user_tz": -480
354 |     },
355 |     "id": "uZR44aSO9fWf",
356 |     "outputId": "7354c75f-1bc7-44b2-accc-85019278720d"
357 |    },
358 |    "outputs": [
359 |     {
360 |      "name": "stdout",
361 |      "output_type": "stream",
362 |      "text": [
363 |       "使用蒙特卡洛方法计算MDP的状态价值为\n",
364 |       " {'s1': -1.228923788722258, 's2': -1.6955696284402704, 's3': 0.4823809701532294, 's4': 5.967514743019431, 's5': 0}\n"
365 |      ]
366 |     }
367 |    ],
368 |    "source": [
369 |     "# 对所有采样序列计算所有状态的价值\n",
370 |     "def MC(episodes, V, N, gamma):\n",
371 |     "    for episode in episodes:\n",
372 |     "        G = 0\n",
373 |     "        for i in range(len(episode) - 1, -1, -1):  #一个序列从后往前计算\n",
374 |     "            (s, a, r, s_next) = episode[i]\n",
375 |     "            G = r + gamma * G\n",
376 |     "            N[s] = N[s] + 1\n",
377 |     "            V[s] = V[s] + (G - V[s]) / N[s]\n",
378 |     "\n",
379 |     "\n",
380 |     "timestep_max = 20\n",
381 |     "# 采样1000次,可以自行修改\n",
382 |     "episodes = sample(MDP, Pi_1, timestep_max, 1000)\n",
383 |     "gamma = 0.5\n",
384 |     "V = {\"s1\": 0, \"s2\": 0, \"s3\": 0, \"s4\": 0, \"s5\": 0}\n",
385 |     "N = {\"s1\": 0, \"s2\": 0, \"s3\": 0, \"s4\": 0, \"s5\": 0}\n",
386 |     "MC(episodes, V, N, gamma)\n",
387 |     "print(\"使用蒙特卡洛方法计算MDP的状态价值为\\n\", V)\n",
388 |     "\n",
389 |     "# 使用蒙特卡洛方法计算MDP的状态价值为\n",
390 |     "#  {'s1': -1.228923788722258, 's2': -1.6955696284402704, 's3': 0.4823809701532294,\n",
391 |     "# 's4': 5.967514743019431, 's5': 0}"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": 8,
397 |    "metadata": {
398 |     "colab": {
399 |      "base_uri": "https://localhost:8080/"
400 |     },
401 |     "executionInfo": {
402 |      "elapsed": 303,
403 |      "status": "ok",
404 |      "timestamp": 1649954723228,
405 |      "user": {
406 |       "displayName": "Sam Lu",
407 |       "userId": "15789059763790170725"
408 |      },
409 |      "user_tz": -480
410 |     },
411 |     "id": "COkP4ZDh9fWg",
412 |     "outputId": "943a07c9-f8db-4646-841b-d2960785eb17"
413 |    },
414 |    "outputs": [
415 |     {
416 |      "name": "stdout",
417 |      "output_type": "stream",
418 |      "text": [
419 |       "0.112567796310472 0.23199480615618912\n"
420 |      ]
421 |     }
422 |    ],
423 |    "source": [
424 |     "def occupancy(episodes, s, a, timestep_max, gamma):\n",
425 |     "    ''' 计算状态动作对(s,a)出现的频率,以此来估算策略的占用度量 '''\n",
426 |     "    rho = 0\n",
427 |     "    total_times = np.zeros(timestep_max)  # 记录每个时间步t各被经历过几次\n",
428 |     "    occur_times = np.zeros(timestep_max)  # 记录(s_t,a_t)=(s,a)的次数\n",
429 |     "    for episode in episodes:\n",
430 |     "        for i in range(len(episode)):\n",
431 |     "            (s_opt, a_opt, r, s_next) = episode[i]\n",
432 |     "            total_times[i] += 1\n",
433 |     "            if s == s_opt and a == a_opt:\n",
434 |     "                occur_times[i] += 1\n",
435 |     "    for i in reversed(range(timestep_max)):\n",
436 |     "        if total_times[i]:\n",
437 |     "            rho += gamma**i * occur_times[i] / total_times[i]\n",
438 |     "    return (1 - gamma) * rho\n",
439 |     "\n",
440 |     "\n",
441 |     "gamma = 0.5\n",
442 |     "timestep_max = 1000\n",
443 |     "\n",
444 |     "episodes_1 = sample(MDP, Pi_1, timestep_max, 1000)\n",
445 |     "episodes_2 = sample(MDP, Pi_2, timestep_max, 1000)\n",
446 |     "rho_1 = occupancy(episodes_1, \"s4\", \"概率前往\", timestep_max, gamma)\n",
447 |     "rho_2 = occupancy(episodes_2, \"s4\", \"概率前往\", timestep_max, gamma)\n",
448 |     "print(rho_1, rho_2)\n",
449 |     "\n",
450 |     "# 0.112567796310472 0.23199480615618912"
451 |    ]
452 |   }
453 |  ],
454 |  "metadata": {
455 |   "colab": {
456 |    "collapsed_sections": [],
457 |    "name": "第3章-马尔可夫决策过程.ipynb",
458 |    "provenance": []
459 |   },
460 |   "kernelspec": {
461 |    "display_name": "Python 3",
462 |    "language": "python",
463 |    "name": "python3"
464 |   },
465 |   "language_info": {
466 |    "codemirror_mode": {
467 |     "name": "ipython",
468 |     "version": 3
469 |    },
470 |    "file_extension": ".py",
471 |    "mimetype": "text/x-python",
472 |    "name": "python",
473 |    "nbconvert_exporter": "python",
474 |    "pygments_lexer": "ipython3",
475 |    "version": "3.7.6"
476 |   }
477 |  },
478 |  "nbformat": 4,
479 |  "nbformat_minor": 1
480 | }
481 | 


--------------------------------------------------------------------------------
/第4章-动态规划算法.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "executionInfo": {
  8 |      "elapsed": 4,
  9 |      "status": "ok",
 10 |      "timestamp": 1649954819944,
 11 |      "user": {
 12 |       "displayName": "Sam Lu",
 13 |       "userId": "15789059763790170725"
 14 |      },
 15 |      "user_tz": -480
 16 |     },
 17 |     "id": "oXP3ykOT95VF"
 18 |    },
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import copy\n",
 22 |     "\n",
 23 |     "\n",
 24 |     "class CliffWalkingEnv:\n",
 25 |     "    \"\"\" 悬崖漫步环境\"\"\"\n",
 26 |     "    def __init__(self, ncol=12, nrow=4):\n",
 27 |     "        self.ncol = ncol  # 定义网格世界的列\n",
 28 |     "        self.nrow = nrow  # 定义网格世界的行\n",
 29 |     "        # 转移矩阵P[state][action] = [(p, next_state, reward, done)]包含下一个状态和奖励\n",
 30 |     "        self.P = self.createP()\n",
 31 |     "\n",
 32 |     "    def createP(self):\n",
 33 |     "        # 初始化\n",
 34 |     "        P = [[[] for j in range(4)] for i in range(self.nrow * self.ncol)]\n",
 35 |     "        # 4种动作, change[0]:上,change[1]:下, change[2]:左, change[3]:右。坐标系原点(0,0)\n",
 36 |     "        # 定义在左上角\n",
 37 |     "        change = [[0, -1], [0, 1], [-1, 0], [1, 0]]\n",
 38 |     "        for i in range(self.nrow):\n",
 39 |     "            for j in range(self.ncol):\n",
 40 |     "                for a in range(4):\n",
 41 |     "                    # 位置在悬崖或者目标状态,因为无法继续交互,任何动作奖励都为0\n",
 42 |     "                    if i == self.nrow - 1 and j > 0:\n",
 43 |     "                        P[i * self.ncol + j][a] = [(1, i * self.ncol + j, 0,\n",
 44 |     "                                                    True)]\n",
 45 |     "                        continue\n",
 46 |     "                    # 其他位置\n",
 47 |     "                    next_x = min(self.ncol - 1, max(0, j + change[a][0]))\n",
 48 |     "                    next_y = min(self.nrow - 1, max(0, i + change[a][1]))\n",
 49 |     "                    next_state = next_y * self.ncol + next_x\n",
 50 |     "                    reward = -1\n",
 51 |     "                    done = False\n",
 52 |     "                    # 下一个位置在悬崖或者终点\n",
 53 |     "                    if next_y == self.nrow - 1 and next_x > 0:\n",
 54 |     "                        done = True\n",
 55 |     "                        if next_x != self.ncol - 1:  # 下一个位置在悬崖\n",
 56 |     "                            reward = -100\n",
 57 |     "                    P[i * self.ncol + j][a] = [(1, next_state, reward, done)]\n",
 58 |     "        return P"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 2,
 64 |    "metadata": {
 65 |     "executionInfo": {
 66 |      "elapsed": 2012,
 67 |      "status": "ok",
 68 |      "timestamp": 1649954830279,
 69 |      "user": {
 70 |       "displayName": "Sam Lu",
 71 |       "userId": "15789059763790170725"
 72 |      },
 73 |      "user_tz": -480
 74 |     },
 75 |     "id": "N95QAxfi95VJ"
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "class PolicyIteration:\n",
 80 |     "    \"\"\" 策略迭代算法 \"\"\"\n",
 81 |     "    def __init__(self, env, theta, gamma):\n",
 82 |     "        self.env = env\n",
 83 |     "        self.v = [0] * self.env.ncol * self.env.nrow  # 初始化价值为0\n",
 84 |     "        self.pi = [[0.25, 0.25, 0.25, 0.25]\n",
 85 |     "                   for i in range(self.env.ncol * self.env.nrow)]  # 初始化为均匀随机策略\n",
 86 |     "        self.theta = theta  # 策略评估收敛阈值\n",
 87 |     "        self.gamma = gamma  # 折扣因子\n",
 88 |     "\n",
 89 |     "    def policy_evaluation(self):  # 策略评估\n",
 90 |     "        cnt = 1  # 计数器\n",
 91 |     "        while 1:\n",
 92 |     "            max_diff = 0\n",
 93 |     "            new_v = [0] * self.env.ncol * self.env.nrow\n",
 94 |     "            for s in range(self.env.ncol * self.env.nrow):\n",
 95 |     "                qsa_list = []  # 开始计算状态s下的所有Q(s,a)价值\n",
 96 |     "                for a in range(4):\n",
 97 |     "                    qsa = 0\n",
 98 |     "                    for res in self.env.P[s][a]:\n",
 99 |     "                        p, next_state, r, done = res\n",
100 |     "                        qsa += p * (r + self.gamma * self.v[next_state] *\n",
101 |     "                                    (1 - done))\n",
102 |     "                        # 本章环境比较特殊,奖励和下一个状态有关,所以需要和状态转移概率相乘\n",
103 |     "                    qsa_list.append(self.pi[s][a] * qsa)\n",
104 |     "                new_v[s] = sum(qsa_list)  # 状态价值函数和动作价值函数之间的关系\n",
105 |     "                max_diff = max(max_diff, abs(new_v[s] - self.v[s]))\n",
106 |     "            self.v = new_v\n",
107 |     "            if max_diff < self.theta: break  # 满足收敛条件,退出评估迭代\n",
108 |     "            cnt += 1\n",
109 |     "        print(\"策略评估进行%d轮后完成\" % cnt)\n",
110 |     "\n",
111 |     "    def policy_improvement(self):  # 策略提升\n",
112 |     "        for s in range(self.env.nrow * self.env.ncol):\n",
113 |     "            qsa_list = []\n",
114 |     "            for a in range(4):\n",
115 |     "                qsa = 0\n",
116 |     "                for res in self.env.P[s][a]:\n",
117 |     "                    p, next_state, r, done = res\n",
118 |     "                    qsa += p * (r + self.gamma * self.v[next_state] *\n",
119 |     "                                (1 - done))\n",
120 |     "                qsa_list.append(qsa)\n",
121 |     "            maxq = max(qsa_list)\n",
122 |     "            cntq = qsa_list.count(maxq)  # 计算有几个动作得到了最大的Q值\n",
123 |     "            # 让这些动作均分概率\n",
124 |     "            self.pi[s] = [1 / cntq if q == maxq else 0 for q in qsa_list]\n",
125 |     "        print(\"策略提升完成\")\n",
126 |     "        return self.pi\n",
127 |     "\n",
128 |     "    def policy_iteration(self):  # 策略迭代\n",
129 |     "        while 1:\n",
130 |     "            self.policy_evaluation()\n",
131 |     "            old_pi = copy.deepcopy(self.pi)  # 将列表进行深拷贝,方便接下来进行比较\n",
132 |     "            new_pi = self.policy_improvement()\n",
133 |     "            if old_pi == new_pi: break"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 3,
139 |    "metadata": {
140 |     "colab": {
141 |      "base_uri": "https://localhost:8080/"
142 |     },
143 |     "executionInfo": {
144 |      "elapsed": 4,
145 |      "status": "ok",
146 |      "timestamp": 1649954831712,
147 |      "user": {
148 |       "displayName": "Sam Lu",
149 |       "userId": "15789059763790170725"
150 |      },
151 |      "user_tz": -480
152 |     },
153 |     "id": "yZCGJazo95VK",
154 |     "outputId": "c84ef7a4-7c75-42f6-92ec-bf0c3e4ea167"
155 |    },
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "策略评估进行60轮后完成\n",
162 |       "策略提升完成\n",
163 |       "策略评估进行72轮后完成\n",
164 |       "策略提升完成\n",
165 |       "策略评估进行44轮后完成\n",
166 |       "策略提升完成\n",
167 |       "策略评估进行12轮后完成\n",
168 |       "策略提升完成\n",
169 |       "策略评估进行1轮后完成\n",
170 |       "策略提升完成\n",
171 |       "状态价值:\n",
172 |       "-7.712 -7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 \n",
173 |       "-7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900 \n",
174 |       "-7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900 -1.000 \n",
175 |       "-7.458  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000 \n",
176 |       "策略:\n",
177 |       "ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo \n",
178 |       "ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo \n",
179 |       "ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ovoo \n",
180 |       "^ooo **** **** **** **** **** **** **** **** **** **** EEEE \n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "def print_agent(agent, action_meaning, disaster=[], end=[]):\n",
186 |     "    print(\"状态价值:\")\n",
187 |     "    for i in range(agent.env.nrow):\n",
188 |     "        for j in range(agent.env.ncol):\n",
189 |     "            # 为了输出美观,保持输出6个字符\n",
190 |     "            print('%6.6s' % ('%.3f' % agent.v[i * agent.env.ncol + j]),\n",
191 |     "                  end=' ')\n",
192 |     "        print()\n",
193 |     "\n",
194 |     "    print(\"策略:\")\n",
195 |     "    for i in range(agent.env.nrow):\n",
196 |     "        for j in range(agent.env.ncol):\n",
197 |     "            # 一些特殊的状态,例如悬崖漫步中的悬崖\n",
198 |     "            if (i * agent.env.ncol + j) in disaster:\n",
199 |     "                print('****', end=' ')\n",
200 |     "            elif (i * agent.env.ncol + j) in end:  # 目标状态\n",
201 |     "                print('EEEE', end=' ')\n",
202 |     "            else:\n",
203 |     "                a = agent.pi[i * agent.env.ncol + j]\n",
204 |     "                pi_str = ''\n",
205 |     "                for k in range(len(action_meaning)):\n",
206 |     "                    pi_str += action_meaning[k] if a[k] > 0 else 'o'\n",
207 |     "                print(pi_str, end=' ')\n",
208 |     "        print()\n",
209 |     "\n",
210 |     "\n",
211 |     "env = CliffWalkingEnv()\n",
212 |     "action_meaning = ['^', 'v', '<', '>']\n",
213 |     "theta = 0.001\n",
214 |     "gamma = 0.9\n",
215 |     "agent = PolicyIteration(env, theta, gamma)\n",
216 |     "agent.policy_iteration()\n",
217 |     "print_agent(agent, action_meaning, list(range(37, 47)), [47])\n",
218 |     "\n",
219 |     "# 策略评估进行60轮后完成\n",
220 |     "# 策略提升完成\n",
221 |     "# 策略评估进行72轮后完成\n",
222 |     "# 策略提升完成\n",
223 |     "# 策略评估进行44轮后完成\n",
224 |     "# 策略提升完成\n",
225 |     "# 策略评估进行12轮后完成\n",
226 |     "# 策略提升完成\n",
227 |     "# 策略评估进行1轮后完成\n",
228 |     "# 策略提升完成\n",
229 |     "# 状态价值:\n",
230 |     "# -7.712 -7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710\n",
231 |     "# -7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900\n",
232 |     "# -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900 -1.000\n",
233 |     "# -7.458  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000\n",
234 |     "# 策略:\n",
235 |     "# ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo\n",
236 |     "# ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo\n",
237 |     "# ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ovoo\n",
238 |     "# ^ooo **** **** **** **** **** **** **** **** **** **** EEEE"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 4,
244 |    "metadata": {
245 |     "executionInfo": {
246 |      "elapsed": 3,
247 |      "status": "ok",
248 |      "timestamp": 1649954834592,
249 |      "user": {
250 |       "displayName": "Sam Lu",
251 |       "userId": "15789059763790170725"
252 |      },
253 |      "user_tz": -480
254 |     },
255 |     "id": "qs8Xd7LI95VL"
256 |    },
257 |    "outputs": [],
258 |    "source": [
259 |     "class ValueIteration:\n",
260 |     "    \"\"\" 价值迭代算法 \"\"\"\n",
261 |     "    def __init__(self, env, theta, gamma):\n",
262 |     "        self.env = env\n",
263 |     "        self.v = [0] * self.env.ncol * self.env.nrow  # 初始化价值为0\n",
264 |     "        self.theta = theta  # 价值收敛阈值\n",
265 |     "        self.gamma = gamma\n",
266 |     "        # 价值迭代结束后得到的策略\n",
267 |     "        self.pi = [None for i in range(self.env.ncol * self.env.nrow)]\n",
268 |     "\n",
269 |     "    def value_iteration(self):\n",
270 |     "        cnt = 0\n",
271 |     "        while 1:\n",
272 |     "            max_diff = 0\n",
273 |     "            new_v = [0] * self.env.ncol * self.env.nrow\n",
274 |     "            for s in range(self.env.ncol * self.env.nrow):\n",
275 |     "                qsa_list = []  # 开始计算状态s下的所有Q(s,a)价值\n",
276 |     "                for a in range(4):\n",
277 |     "                    qsa = 0\n",
278 |     "                    for res in self.env.P[s][a]:\n",
279 |     "                        p, next_state, r, done = res\n",
280 |     "                        qsa += p * (r + self.gamma * self.v[next_state] *\n",
281 |     "                                    (1 - done))\n",
282 |     "                    qsa_list.append(qsa)  # 这一行和下一行代码是价值迭代和策略迭代的主要区别\n",
283 |     "                new_v[s] = max(qsa_list)\n",
284 |     "                max_diff = max(max_diff, abs(new_v[s] - self.v[s]))\n",
285 |     "            self.v = new_v\n",
286 |     "            if max_diff < self.theta: break  # 满足收敛条件,退出评估迭代\n",
287 |     "            cnt += 1\n",
288 |     "        print(\"价值迭代一共进行%d轮\" % cnt)\n",
289 |     "        self.get_policy()\n",
290 |     "\n",
291 |     "    def get_policy(self):  # 根据价值函数导出一个贪婪策略\n",
292 |     "        for s in range(self.env.nrow * self.env.ncol):\n",
293 |     "            qsa_list = []\n",
294 |     "            for a in range(4):\n",
295 |     "                qsa = 0\n",
296 |     "                for res in self.env.P[s][a]:\n",
297 |     "                    p, next_state, r, done = res\n",
298 |     "                    qsa += p * (r + self.gamma * self.v[next_state] * (1 - done))\n",
299 |     "                qsa_list.append(qsa)\n",
300 |     "            maxq = max(qsa_list)\n",
301 |     "            cntq = qsa_list.count(maxq)  # 计算有几个动作得到了最大的Q值\n",
302 |     "            # 让这些动作均分概率\n",
303 |     "            self.pi[s] = [1 / cntq if q == maxq else 0 for q in qsa_list]\n",
304 |     "\n",
305 |     "\n",
306 |     "# env = CliffWalkingEnv()\n",
307 |     "# action_meaning = ['^', 'v', '<', '>']\n",
308 |     "# theta = 0.001\n",
309 |     "# gamma = 0.9\n",
310 |     "# agent = ValueIteration(env, theta, gamma)\n",
311 |     "# agent.value_iteration()\n",
312 |     "# print_agent(agent, action_meaning, list(range(37, 47)), [47])\n",
313 |     "\n",
314 |     "# 价值迭代一共进行14轮\n",
315 |     "# 状态价值:\n",
316 |     "# -7.712 -7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710\n",
317 |     "# -7.458 -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900\n",
318 |     "# -7.176 -6.862 -6.513 -6.126 -5.695 -5.217 -4.686 -4.095 -3.439 -2.710 -1.900 -1.000\n",
319 |     "# -7.458  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.000\n",
320 |     "# 策略:\n",
321 |     "# ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo\n",
322 |     "# ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovo> ovoo\n",
323 |     "# ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ooo> ovoo\n",
324 |     "# ^ooo **** **** **** **** **** **** **** **** **** **** EEEE"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 5,
330 |    "metadata": {
331 |     "colab": {
332 |      "base_uri": "https://localhost:8080/"
333 |     },
334 |     "executionInfo": {
335 |      "elapsed": 3,
336 |      "status": "ok",
337 |      "timestamp": 1649954837446,
338 |      "user": {
339 |       "displayName": "Sam Lu",
340 |       "userId": "15789059763790170725"
341 |      },
342 |      "user_tz": -480
343 |     },
344 |     "id": "2ZFlb2dB95VM",
345 |     "outputId": "0d4f2c7a-3589-4bb1-f95a-e9e4696d6201"
346 |    },
347 |    "outputs": [
348 |     {
349 |      "name": "stdout",
350 |      "output_type": "stream",
351 |      "text": [
352 |       "\n",
353 |       "\u001b[41mS\u001b[0mFFF\n",
354 |       "FHFH\n",
355 |       "FFFH\n",
356 |       "HFFG\n",
357 |       "冰洞的索引: {11, 12, 5, 7}\n",
358 |       "目标的索引: {15}\n",
359 |       "[(0.3333333333333333, 10, 0.0, False), (0.3333333333333333, 13, 0.0, False), (0.3333333333333333, 14, 0.0, False)]\n",
360 |       "[(0.3333333333333333, 13, 0.0, False), (0.3333333333333333, 14, 0.0, False), (0.3333333333333333, 15, 1.0, True)]\n",
361 |       "[(0.3333333333333333, 14, 0.0, False), (0.3333333333333333, 15, 1.0, True), (0.3333333333333333, 10, 0.0, False)]\n",
362 |       "[(0.3333333333333333, 15, 1.0, True), (0.3333333333333333, 10, 0.0, False), (0.3333333333333333, 13, 0.0, False)]\n"
363 |      ]
364 |     }
365 |    ],
366 |    "source": [
367 |     "import gym\n",
368 |     "env = gym.make(\"FrozenLake-v0\")  # 创建环境\n",
369 |     "env = env.unwrapped  # 解封装才能访问状态转移矩阵P\n",
370 |     "env.render()  # 环境渲染,通常是弹窗显示或打印出可视化的环境\n",
371 |     "\n",
372 |     "holes = set()\n",
373 |     "ends = set()\n",
374 |     "for s in env.P:\n",
375 |     "    for a in env.P[s]:\n",
376 |     "        for s_ in env.P[s][a]:\n",
377 |     "            if s_[2] == 1.0:  # 获得奖励为1,代表是目标\n",
378 |     "                ends.add(s_[1])\n",
379 |     "            if s_[3] == True:\n",
380 |     "                holes.add(s_[1])\n",
381 |     "holes = holes - ends\n",
382 |     "print(\"冰洞的索引:\", holes)\n",
383 |     "print(\"目标的索引:\", ends)\n",
384 |     "\n",
385 |     "for a in env.P[14]:  # 查看目标左边一格的状态转移信息\n",
386 |     "    print(env.P[14][a])\n",
387 |     "\n",
388 |     "# SFFF\n",
389 |     "# FHFH\n",
390 |     "# FFFH\n",
391 |     "# HFFG\n",
392 |     "# 冰洞的索引: {11, 12, 5, 7}\n",
393 |     "# 目标的索引: {15}\n",
394 |     "# [(0.3333333333333333, 10, 0.0, False), (0.3333333333333333, 13, 0.0, False),\n",
395 |     "#  (0.3333333333333333, 14, 0.0, False)]\n",
396 |     "# [(0.3333333333333333, 13, 0.0, False), (0.3333333333333333, 14, 0.0, False),\n",
397 |     "#  (0.3333333333333333, 15, 1.0, True)]\n",
398 |     "# [(0.3333333333333333, 14, 0.0, False), (0.3333333333333333, 15, 1.0, True),\n",
399 |     "#  (0.3333333333333333, 10, 0.0, False)]\n",
400 |     "# [(0.3333333333333333, 15, 1.0, True), (0.3333333333333333, 10, 0.0, False),\n",
401 |     "#  (0.3333333333333333, 13, 0.0, False)]"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": 6,
407 |    "metadata": {
408 |     "colab": {
409 |      "base_uri": "https://localhost:8080/"
410 |     },
411 |     "executionInfo": {
412 |      "elapsed": 7,
413 |      "status": "ok",
414 |      "timestamp": 1649954839116,
415 |      "user": {
416 |       "displayName": "Sam Lu",
417 |       "userId": "15789059763790170725"
418 |      },
419 |      "user_tz": -480
420 |     },
421 |     "id": "4gf_IaeZ95VM",
422 |     "outputId": "20ba5b95-4481-4b40-8015-54ea7fa0c2cb"
423 |    },
424 |    "outputs": [
425 |     {
426 |      "name": "stdout",
427 |      "output_type": "stream",
428 |      "text": [
429 |       "策略评估进行25轮后完成\n",
430 |       "策略提升完成\n",
431 |       "策略评估进行58轮后完成\n",
432 |       "策略提升完成\n",
433 |       "状态价值:\n",
434 |       " 0.069  0.061  0.074  0.056 \n",
435 |       " 0.092  0.000  0.112  0.000 \n",
436 |       " 0.145  0.247  0.300  0.000 \n",
437 |       " 0.000  0.380  0.639  0.000 \n",
438 |       "策略:\n",
439 |       "<ooo ooo^ <ooo ooo^ \n",
440 |       "<ooo **** <o>o **** \n",
441 |       "ooo^ ovoo <ooo **** \n",
442 |       "**** oo>o ovoo EEEE \n"
443 |      ]
444 |     }
445 |    ],
446 |    "source": [
447 |     "# 这个动作意义是Gym库针对冰湖环境事先规定好的\n",
448 |     "action_meaning = ['<', 'v', '>', '^']\n",
449 |     "theta = 1e-5\n",
450 |     "gamma = 0.9\n",
451 |     "agent = PolicyIteration(env, theta, gamma)\n",
452 |     "agent.policy_iteration()\n",
453 |     "print_agent(agent, action_meaning, [5, 7, 11, 12], [15])\n",
454 |     "\n",
455 |     "# 策略评估进行25轮后完成\n",
456 |     "# 策略提升完成\n",
457 |     "# 策略评估进行58轮后完成\n",
458 |     "# 策略提升完成\n",
459 |     "# 状态价值:\n",
460 |     "#  0.069  0.061  0.074  0.056\n",
461 |     "#  0.092  0.000  0.112  0.000\n",
462 |     "#  0.145  0.247  0.300  0.000\n",
463 |     "#  0.000  0.380  0.639  0.000\n",
464 |     "# 策略:\n",
465 |     "# <ooo ooo^ <ooo ooo^\n",
466 |     "# <ooo **** <o>o ****\n",
467 |     "# ooo^ ovoo <ooo ****\n",
468 |     "# **** oo>o ovoo EEEE"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 7,
474 |    "metadata": {
475 |     "colab": {
476 |      "base_uri": "https://localhost:8080/"
477 |     },
478 |     "executionInfo": {
479 |      "elapsed": 6,
480 |      "status": "ok",
481 |      "timestamp": 1649954839117,
482 |      "user": {
483 |       "displayName": "Sam Lu",
484 |       "userId": "15789059763790170725"
485 |      },
486 |      "user_tz": -480
487 |     },
488 |     "id": "cqPm4jxd95VN",
489 |     "outputId": "ab6bdc64-5a4a-47f0-c6af-08bf4c2aeb0f"
490 |    },
491 |    "outputs": [
492 |     {
493 |      "name": "stdout",
494 |      "output_type": "stream",
495 |      "text": [
496 |       "价值迭代一共进行60轮\n",
497 |       "状态价值:\n",
498 |       " 0.069  0.061  0.074  0.056 \n",
499 |       " 0.092  0.000  0.112  0.000 \n",
500 |       " 0.145  0.247  0.300  0.000 \n",
501 |       " 0.000  0.380  0.639  0.000 \n",
502 |       "策略:\n",
503 |       "<ooo ooo^ <ooo ooo^ \n",
504 |       "<ooo **** <o>o **** \n",
505 |       "ooo^ ovoo <ooo **** \n",
506 |       "**** oo>o ovoo EEEE \n"
507 |      ]
508 |     }
509 |    ],
510 |    "source": [
511 |     "action_meaning = ['<', 'v', '>', '^']\n",
512 |     "theta = 1e-5\n",
513 |     "gamma = 0.9\n",
514 |     "agent = ValueIteration(env, theta, gamma)\n",
515 |     "agent.value_iteration()\n",
516 |     "print_agent(agent, action_meaning, [5, 7, 11, 12], [15])\n",
517 |     "\n",
518 |     "# 价值迭代一共进行60轮\n",
519 |     "# 状态价值:\n",
520 |     "#  0.069  0.061  0.074  0.056\n",
521 |     "#  0.092  0.000  0.112  0.000\n",
522 |     "#  0.145  0.247  0.300  0.000\n",
523 |     "#  0.000  0.380  0.639  0.000\n",
524 |     "# 策略:\n",
525 |     "# <ooo ooo^ <ooo ooo^\n",
526 |     "# <ooo **** <o>o ****\n",
527 |     "# ooo^ ovoo <ooo ****\n",
528 |     "# **** oo>o ovoo EEEE"
529 |    ]
530 |   }
531 |  ],
532 |  "metadata": {
533 |   "colab": {
534 |    "collapsed_sections": [],
535 |    "name": "第4章-动态规划算法.ipynb",
536 |    "provenance": [],
537 |    "toc_visible": true
538 |   },
539 |   "kernelspec": {
540 |    "display_name": "Python 3",
541 |    "language": "python",
542 |    "name": "python3"
543 |   },
544 |   "language_info": {
545 |    "codemirror_mode": {
546 |     "name": "ipython",
547 |     "version": 3
548 |    },
549 |    "file_extension": ".py",
550 |    "mimetype": "text/x-python",
551 |    "name": "python",
552 |    "nbconvert_exporter": "python",
553 |    "pygments_lexer": "ipython3",
554 |    "version": "3.7.6"
555 |   }
556 |  },
557 |  "nbformat": 4,
558 |  "nbformat_minor": 1
559 | }
560 | 


--------------------------------------------------------------------------------