├── .gitignore
├── LICENSE.md
├── README.md
├── ch01
├── avg.py
├── bandit.py
├── bandit_avg.py
└── non_stationary.py
├── ch04
├── dp.py
├── dp_inplace.py
├── gridworld_play.py
├── policy_eval.py
├── policy_iter.py
└── value_iter.py
├── ch05
├── dice.py
├── importance_sampling.py
├── mc_control.py
├── mc_control_offpolicy.py
└── mc_eval.py
├── ch06
├── q_learning.py
├── q_learning_simple.py
├── sarsa.py
├── sarsa_off_policy.py
└── td_eval.py
├── ch07
├── dezero1.py
├── dezero2.py
├── dezero3.py
├── dezero4.py
└── q_learning_nn.py
├── ch08
├── dqn.py
├── gym_play.py
└── replay_buffer.py
├── ch09
├── actor_critic.py
├── reinforce.py
└── simple_pg.py
├── common
├── gridworld.py
├── gridworld_render.py
└── utils.py
├── cover.jpeg
├── equations_and_figures_4.zip
├── notebooks
├── 01_bandit.ipynb
├── 04_dynamic_programming.ipynb
├── 05_montecarlo.ipynb
├── 06_temporal_difference.ipynb
├── 07_neural_networks.ipynb
├── 08_dqn.ipynb
└── 09_policy_gradient.ipynb
├── pytorch
├── actor_critic.py
├── dqn.py
├── reinforce.py
└── simple_pg.py
└── series overview.png
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *~
3 |
4 | build/
5 | dist/
6 | dezero.egg-info/
7 | tmp/
8 |
9 | *.dot
10 | *.json
11 | src/.idea/*
12 | .idea/*
13 |
14 | # Byte-compiled / optimized / DLL files
15 | __pycache__/
16 | *.py[cod]
17 | *$py.class
18 |
19 | # C extensions
20 | *.so
21 |
22 | # Distribution / packaging
23 | .Python
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | pip-wheel-metadata/
37 | share/python-wheels/
38 | *.egg-info/
39 | .installed.cfg
40 | *.egg
41 | MANIFEST
42 |
43 | # PyInstaller
44 | # Usually these files are written by a python script from a template
45 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
46 | *.manifest
47 | *.spec
48 |
49 | # Installer logs
50 | pip-log.txt
51 | pip-delete-this-directory.txt
52 |
53 | # Unit test / coverage reports
54 | htmlcov/
55 | .tox/
56 | .nox/
57 | .coverage
58 | .coverage.*
59 | .cache
60 | nosetests.xml
61 | coverage.xml
62 | *.cover
63 | .hypothesis/
64 | .pytest_cache/
65 |
66 | # Translations
67 | *.mo
68 | *.pot
69 |
70 | # Django stuff:
71 | *.log
72 | local_settings.py
73 | db.sqlite3
74 | db.sqlite3-journal
75 |
76 | # Flask stuff:
77 | instance/
78 | .webassets-cache
79 |
80 | # Scrapy stuff:
81 | .scrapy
82 |
83 | # Sphinx documentation
84 | docs/_build/
85 |
86 | # PyBuilder
87 | target/
88 |
89 | # Jupyter Notebook
90 | .ipynb_checkpoints
91 |
92 | # IPython
93 | profile_default/
94 | ipython_config.py
95 |
96 | # pyenv
97 | .python-version
98 |
99 | # pipenv
100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | # install all needed dependencies.
104 | #Pipfile.lock
105 |
106 | # celery beat schedule file
107 | celerybeat-schedule
108 |
109 | # SageMath parsed files
110 | *.sage.py
111 |
112 | # Environments
113 | .env
114 | .venv
115 | env/
116 | venv/
117 | ENV/
118 | env.bak/
119 | venv.bak/
120 |
121 | # Spyder project settings
122 | .spyderproject
123 | .spyproject
124 |
125 | # Rope project settings
126 | .ropeproject
127 |
128 | # mkdocs documentation
129 | /site
130 |
131 | # mypy
132 | .mypy_cache/
133 | .dmypy.json
134 | dmypy.json
135 |
136 | # Pyre type checker
137 | .pyre/
138 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2021 Koki Saitoh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 『밑바닥부터 시작하는 딥러닝 ❹』
: 이번엔 강화 학습이다!
2 |
3 |
4 |
5 | **강화 학습 핵심 이론부터 문제 풀이, 심층 강화 학습까지 한 권에!**
6 |
7 | 이 책의 특징은 제목 그대로 ‘밑바닥부터 만들어가는 것’입니다. 속을 알 수 없는 외부 라이브러리에 의존하지 않고 강화 학습 알고리즘을 처음부터 구현하면서 배웁니다. 그림으로 원리를 이해하고 수학으로 강화 학습 문제를 풀어본 다음, 코드로 구현해 배운 내용을 되짚어봅니다. 코드는 최대한 간결하면서도 강화 학습에서 중요한 아이디어가 명확하게 드러나도록 짰습니다. 단계적으로 수준을 높이면서 다양한 문제에 접할 수 있도록 구성하였으니 강화 학습의 어려움과 재미를 모두 느낄 수 있을 것입니다.
8 |
9 |
10 | [미리보기](https://preview2.hanbit.co.kr/books/yyxd/#p=1) | [알려진 오류(정오표)](https://docs.google.com/document/d/1fsPVXyPF0gpmN57VV6k0uxMfWXUbiQCwno8vCTYpMc8/edit) | [본문 그림과 수식 이미지 모음](https://github.com/WegraLee/deep-learning-from-scratch-4/blob/master/equations_and_figures_4.zip?raw=true)
11 |
12 | ---
13 |
14 | ## 파일 구성
15 |
16 | |폴더 이름 |설명 |
17 | |:-- |:-- |
18 | |ch01 |1장에서 사용하는 소스 코드 |
19 | |... |... |
20 | |ch09 |9장에서 사용하는 소스 코드 |
21 | |common |공통으로 사용하는 소스 코드 |
22 | |notebooks |주피터 노트북 형태의 소스 코드 |
23 | |pytorch |파이토치용으로 포팅된 소스 코드 |
24 |
25 | ## 주피터 노트북
26 | 이 책의 코드는 주피터 노트북으로도 제공됩니다. 다음 표의 링크를 클릭하면 구글과 캐글 같은 클라우드 서비스에서 노트북을 실행할 수 있습니다.
27 |
28 | | 장 | Colab | 캐글 | Studio Lab |
29 | | :--- | :--- | :--- | :--- |
30 | | 1장 밴디트 문제| [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/01_bandit.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/01_bandit.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/01_bandit.ipynb) |
31 | | 4장 동적 프로그래밍 | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/04_dynamic_programming.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/04_dynamic_programming.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/04_dynamic_programming.ipynb) |
32 | | 5장 몬테카를로법 | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/05_montecarlo.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/05_montecarlo.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/05_montecarlo.ipynb) |
33 | | 6장 TD법 | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/06_temporal_difference.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/06_temporal_difference.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/06_temporal_difference.ipynb) |
34 | | 7장 신경망과 Q 러닝 | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/07_neural_networks.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/07_neural_networks.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/07_neural_networks.ipynb) |
35 | | 8장 DQN | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/08_dqn.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/08_dqn.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/08_dqn.ipynb) |
36 | | 9장 정책 경사법 | [](https://colab.research.google.com/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/09_policy_gradient.ipynb) | [](https://kaggle.com/kernels/welcome?src=https://github.com/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/09_policy_gradient.ipynb) | [](https://studiolab.sagemaker.aws/import/github/oreilly-japan/deep-learning-from-scratch-4/blob/master/notebooks/09_policy_gradient.ipynb) |
37 |
38 |
39 | ## 요구사항
40 | 소스 코드를 실행하려면 아래의 소프트웨어가 설치되어 있어야 합니다.
41 |
42 | * 파이썬 3.x
43 | * NumPy
44 | * Matplotlib
45 | * OpenAI Gym
46 | * DeZero (혹은 파이토치)
47 |
48 | 이 책은 딥러닝 프레임워크로 DeZero를 사용합니다. DeZero는 시리즈 3편에서 만든 프레임워크입니다('pip install dezero' 명령으로 설치할 수 있습니다).
49 |
50 | 파이토치를 사용한 구현은 [pytorch 디렉터리](https://github.com/WegraLee/deep-learning-from-scratch-4/tree/master/pytorch)에서 제공합니다.
51 |
52 | ## 실행 방법
53 |
54 | 예제 코드들은 장별로 나눠 저장되어 있습니다. 실행하려면 다음과 같이 파이썬 명령을 실행하세요.
55 |
56 | ```
57 | $ python ch01/avg.py
58 | $ python ch08/dqn.py
59 |
60 | $ cd ch09
61 | $ python actor_critic.py
62 | ```
63 |
64 | 보다시피 각 디렉터리로 이동 후 실행해도 되고, 상위 디렉터리에서 ch0x 디렉터리를 지정해 실행해도 됩니다.
65 |
66 | ---
67 |
68 | ## 팬픽 - 바닷속 딥러닝 어드벤처 (5부작)
69 |
70 |
71 |
72 | "<밑바닥부터 시작하는 딥러닝>의 주인공 생선들은 딥러닝 기술로 바닷속 생태계를 어떻게 혁신하고 있을까요? 어공지능의 첨단을 이끌어가는 밑시딥 생선들과 신나는 모험을 떠나보세요."
73 |
74 | 바닷속 세계를 배경으로, 해양 생물들이 자신의 특성과 필요에 맞는 딥러닝 기술을 개발하여 문제를 해결해 나가는 모험을 그린 연작 소설입니다. 시리즈를 읽으신 분은 더 많은 재미를 느끼실 수 있도록 딥러닝 요소들을 곳곳에 삽입하였습니다.
75 |
76 | 각 편의 주인공과 주제는 다음과 같습니다.
77 |
78 | 1. **시야를 찾아서**: 쏨뱅이(쏨)가 **이미지 처리 기술**을 개발하여 주변 환경을 선명하게 파악
79 | 1. **상어공주**: 괭이상어 공주(꽹)가 **자연어 처리** 기술로 돌고래 왕자와의 사랑을 쟁취
80 | 1. **DeZero의 창조자**: 나뭇잎해룡(잎룡)이 **딥러닝 프레임워크**를 만들어 기술 보급과 협업 촉진
81 | 1. **제발, 가즈아!**: 가자미(가즈아)가 **심층 강화 학습**으로 먹이가 풍부한 새로운 바다 개척
82 | 1. **피쉬카소와 천재의 초상**: 유령실고기(피쉬카소)가 **이미지 생성 모델**로 바닷속 예술계 혁신
83 |
84 | 소설 보러 가기
85 |
86 | ---
87 |
88 | ## 라이선스
89 |
90 | 이 저장소의 소스 코드는 [MIT 라이선스](http://www.opensource.org/licenses/MIT)를 따릅니다.
91 | 상업적 목적으로도 자유롭게 이용하실 수 있습니다.
92 |
--------------------------------------------------------------------------------
/ch01/avg.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # 기본 구현
4 | np.random.seed(0) # 시드 고정
5 | rewards = []
6 |
7 | for n in range(1, 11): # 10번 플레이
8 | reward = np.random.rand() # 보상(무작위수로 시뮬레이션)
9 | rewards.append(reward)
10 | Q = sum(rewards) / n
11 | print(Q)
12 |
13 | print('---')
14 |
15 | # 증분 구현
16 | np.random.seed(0)
17 | Q = 0
18 |
19 | for n in range(1, 11):
20 | reward = np.random.rand()
21 | Q = Q + (reward - Q) / n # [식 1.5]
22 | print(Q)
23 |
--------------------------------------------------------------------------------
/ch01/bandit.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 |
5 | class Bandit:
6 | def __init__(self, arms=10): # arms = 슬롯머신 대수
7 | self.rates = np.random.rand(arms # 슬롯머신 각각의 승률 설정(무작위)
8 |
9 | def play(self, arm):
10 | rate = self.rates[arm]
11 | if rate > np.random.rand():
12 | return 1
13 | else:
14 | return 0
15 |
16 |
17 | class Agent:
18 | def __init__(self, epsilon, action_size=10):
19 | self.epsilon = epsilon # 무작위로 행동할 확률(탐색 확률)
20 | self.Qs = np.zeros(action_size)
21 | self.ns = np.zeros(action_size)
22 |
23 | # 슬롯머신의 가치 추정
24 | def update(self, action, reward):
25 | self.ns[action] += 1
26 | self.Qs[action] += (reward - self.Qs[action]) / self.ns[action]
27 |
28 | # 행동 선택(ε-탐욕 정책)
29 | def get_action(self):
30 | if np.random.rand() < self.epsilon:
31 | return np.random.randint(0, len(self.Qs)) # 무작위 행동 선택
32 | return np.argmax(self.Qs) # 탐욕 행동 선택
33 |
34 |
35 | if __name__ == '__main__':
36 | steps = 1000
37 | epsilon = 0.1
38 |
39 | bandit = Bandit()
40 | agent = Agent(epsilon)
41 | total_reward = 0
42 | total_rewards = [] # 보상 합
43 | rates = [] # 승률
44 |
45 | for step in range(steps):
46 | action = agent.get_action() # 행동 선택
47 | reward = bandit.play(action) # 실제로 플레이하고 보상을 받음
48 | agent.update(action, reward) # 행동과 보상을 통해 학습
49 | total_reward += reward
50 |
51 | total_rewards.append(total_reward) # 현재까지의 보상 합 저장
52 | rates.append(total_reward / (step + 1)) # 현재까지의 승률 저장
53 |
54 | print(total_reward)
55 |
56 | # [그림 1-12] 단계별 보상 총합
57 | plt.ylabel('Total reward')
58 | plt.xlabel('Steps')
59 | plt.plot(total_rewards)
60 | plt.show()
61 |
62 | # [그림 1-13] 단계별 승률
63 | plt.ylabel('Rates')
64 | plt.xlabel('Steps')
65 | plt.plot(rates)
66 | plt.show()
67 |
--------------------------------------------------------------------------------
/ch01/bandit_avg.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from bandit import Bandit, Agent
4 |
5 |
6 | runs = 200
7 | steps = 1000
8 | epsilon = 0.1
9 | all_rates = np.zeros((runs, steps)) # (200, 1000) 형상 배열
10 |
11 | for run in range(runs): # 200번 실험
12 | bandit = Bandit()
13 | agent = Agent(epsilon)
14 | total_reward = 0
15 | rates = []
16 |
17 | for step in range(steps):
18 | action = agent.get_action()
19 | reward = bandit.play(action)
20 | agent.update(action, reward)
21 | total_reward += reward
22 | rates.append(total_reward / (step + 1))
23 |
24 | all_rates[run] = rates # 보상 결과 기록
25 |
26 | avg_rates = np.average(all_rates, axis=0) # 각 단계의 평균 저장
27 |
28 | # [그림 1-16] 단계별 승률(200번 실험 후 평균)
29 | plt.ylabel('Rates')
30 | plt.xlabel('Steps')
31 | plt.plot(avg_rates)
32 | plt.show()
33 |
--------------------------------------------------------------------------------
/ch01/non_stationary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from bandit import Agent
4 |
5 |
6 | class NonStatBandit:
7 | def __init__(self, arms=10):
8 | self.arms = arms
9 | self.rates = np.random.rand(arms)
10 |
11 | def play(self, arm):
12 | rate = self.rates[arm]
13 | self.rates += 0.1 * np.random.randn(self.arms) # 노이즈 추가
14 | if rate > np.random.rand():
15 | return 1
16 | else:
17 | return 0
18 |
19 |
20 | class AlphaAgent:
21 | def __init__(self, epsilon, alpha, actions=10):
22 | self.epsilon = epsilon
23 | self.Qs = np.zeros(actions)
24 | self.alpha = alpha # 고정값 α
25 |
26 | def update(self, action, reward):
27 | # α로 갱신
28 | self.Qs[action] += (reward - self.Qs[action]) * self.alpha
29 |
30 | def get_action(self):
31 | if np.random.rand() < self.epsilon:
32 | return np.random.randint(0, len(self.Qs))
33 | return np.argmax(self.Qs)
34 |
35 |
36 | runs = 200
37 | steps = 1000
38 | epsilon = 0.1
39 | alpha = 0.8
40 | agent_types = ['sample average', 'alpha const update']
41 | results = {}
42 |
43 | for agent_type in agent_types:
44 | all_rates = np.zeros((runs, steps)) # (200, 1000)
45 |
46 | for run in range(runs):
47 | if agent_type == 'sample average':
48 | agent = Agent(epsilon)
49 | else:
50 | agent = AlphaAgent(epsilon, alpha)
51 |
52 | bandit = NonStatBandit()
53 | total_reward = 0
54 | rates = []
55 |
56 | for step in range(steps):
57 | action = agent.get_action()
58 | reward = bandit.play(action)
59 | agent.update(action, reward)
60 | total_reward += reward
61 | rates.append(total_reward / (step + 1))
62 |
63 | all_rates[run] = rates
64 |
65 | avg_rates = np.average(all_rates, axis=0)
66 | results[agent_type] = avg_rates
67 |
68 | # [그림 1-20] 표본 평균과 고정값 α에 의한 갱신 비교
69 | plt.figure()
70 | plt.ylabel('Average Rates')
71 | plt.xlabel('Steps')
72 | for key, avg_rates in results.items():
73 | plt.plot(avg_rates, label=key)
74 | plt.legend()
75 | plt.show()
76 |
--------------------------------------------------------------------------------
/ch04/dp.py:
--------------------------------------------------------------------------------
1 | V = {'L1': 0.0, 'L2': 0.0}
2 | new_V = V.copy()
3 |
4 | cnt = 0 # 갱신 횟수 기록
5 | while True:
6 | new_V['L1'] = 0.5 * (-1 + 0.9 * V['L1']) + 0.5 * (1 + 0.9 * V['L2'])
7 | new_V['L2'] = 0.5 * (0 + 0.9 * V['L1']) + 0.5 * (-1 + 0.9 * V['L2'])
8 |
9 | # 갱신된 양의 최댓값
10 | delta = abs(new_V['L1'] - V['L1'])
11 | delta = max(delta, abs(new_V['L2'] - V['L2']))
12 | V = new_V.copy()
13 |
14 | cnt += 1
15 | if delta < 0.0001: # 임계값 = 0.0001
16 | print(V)
17 | print('갱신 횟수:', cnt)
18 | break
19 |
--------------------------------------------------------------------------------
/ch04/dp_inplace.py:
--------------------------------------------------------------------------------
1 | V = {'L1': 0.0, 'L2': 0.0}
2 |
3 | cnt = 0
4 | while True:
5 | t = 0.5 * (-1 + 0.9 * V['L1']) + 0.5 * (1 + 0.9 * V['L2'])
6 | delta = abs(t - V['L1'])
7 | V['L1'] = t
8 |
9 | t = 0.5 * (0 + 0.9 * V['L1']) + 0.5 * (-1 + 0.9 * V['L2'])
10 | delta = max(delta, abs(t - V['L2']))
11 | V['L2'] = t
12 |
13 | cnt += 1
14 | if delta < 0.0001:
15 | print(V)
16 | print('갱신 횟수:', cnt)
17 | break
18 |
--------------------------------------------------------------------------------
/ch04/gridworld_play.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | import numpy as np
5 | from common.gridworld import GridWorld
6 |
7 | env = GridWorld()
8 | V = {}
9 | for state in env.states():
10 | V[state] = np.random.randn() # 더미 상태 가치 함수
11 | env.render_v(V)
12 |
--------------------------------------------------------------------------------
/ch04/policy_eval.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | from collections import defaultdict
5 | from common.gridworld import GridWorld
6 |
7 |
8 | def eval_onestep(pi, V, env, gamma=0.9):
9 | for state in env.states(): # 각 상태에 접근
10 | if state == env.goal_state: # ❷ 목표 상태에서의 가치 함수는 항상 0
11 | V[state] = 0
12 | continue
13 |
14 | action_probs = pi[state]
15 | new_V = 0
16 |
17 | # 각 행동에 접근
18 | for action, action_prob in action_probs.items():
19 | next_state = env.next_state(state, action)
20 | r = env.reward(state, action, next_state)
21 | # 새로운 가치 함수
22 | new_V += action_prob * (r + gamma * V[next_state])
23 |
24 | V[state] = new_V
25 | return V
26 |
27 |
28 | def policy_eval(pi, V, env, gamma, threshold=0.001):
29 | while True:
30 | old_V = V.copy() # 갱신 전 가치 함수
31 | V = eval_onestep(pi, V, env, gamma)
32 |
33 | # 갱신된 양의 최댓값 계산
34 | delta = 0
35 | for state in V.keys():
36 | t = abs(V[state] - old_V[state])
37 | if delta < t:
38 | delta = t
39 |
40 | # 임계값과 비교
41 | if delta < threshold:
42 | break
43 | return V
44 |
45 |
46 | if __name__ == '__main__':
47 | env = GridWorld()
48 | gamma = 0.9 # 할인율
49 |
50 | pi = defaultdict(lambda: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}) # 정책
51 | V = defaultdict(lambda: 0) # 가치 함수
52 |
53 | V = policy_eval(pi, V, env, gamma) # 정책 평가
54 |
55 | # [그림 4-13] 무작위 정책의 가치 함수
56 | env.render_v(V, pi)
57 |
58 |
--------------------------------------------------------------------------------
/ch04/policy_iter.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | from collections import defaultdict
5 | from common.gridworld import GridWorld
6 | from ch04.policy_eval import policy_eval
7 |
8 |
9 | def argmax(d):
10 | """d (dict)"""
11 | max_value = max(d.values())
12 | max_key = -1
13 | for key, value in d.items():
14 | if value == max_value:
15 | max_key = key
16 | return max_key
17 |
18 |
19 | def greedy_policy(V, env, gamma):
20 | pi = {}
21 |
22 | for state in env.states():
23 | action_values = {}
24 |
25 | for action in env.actions():
26 | next_state = env.next_state(state, action)
27 | r = env.reward(state, action, next_state)
28 | value = r + gamma * V[next_state]
29 | action_values[action] = value
30 |
31 | max_action = argmax(action_values)
32 | action_probs = {0: 0, 1: 0, 2: 0, 3: 0}
33 | action_probs[max_action] = 1.0
34 | pi[state] = action_probs
35 | return pi
36 |
37 |
38 | def policy_iter(env, gamma, threshold=0.001, is_render=True):
39 | pi = defaultdict(lambda: {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25})
40 | V = defaultdict(lambda: 0)
41 |
42 | while True:
43 | V = policy_eval(pi, V, env, gamma, threshold) # 평가
44 | new_pi = greedy_policy(V, env, gamma) # 개선
45 |
46 | if is_render:
47 | env.render_v(V, pi)
48 |
49 | if new_pi == pi: # 갱신 여부 확인
50 | break
51 | pi = new_pi
52 |
53 | return pi
54 |
55 |
56 | # 가치 함수와 정책 시각화(책의 [그림 4-16]은 처음과 마지막 그래프만 표시했음)
57 | if __name__ == '__main__':
58 | env = GridWorld()
59 | gamma = 0.9
60 | pi = policy_iter(env, gamma)
61 |
--------------------------------------------------------------------------------
/ch04/value_iter.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | from collections import defaultdict
5 | from common.gridworld import GridWorld
6 | from ch04.policy_iter import greedy_policy
7 |
8 |
9 | def value_iter_onestep(V, env, gamma):
10 | for state in env.states(): # 모든 상태에 차례로 접근
11 | if state == env.goal_state: # 목표 상태에서의 가치 함수는 항상 0
12 | V[state] = 0
13 | continue
14 |
15 | action_values = []
16 | for action in env.actions(): # 모든 행동에 차례로 접근
17 | next_state = env.next_state(state, action)
18 | r = env.reward(state, action, next_state)
19 | value = r + gamma * V[next_state] # 새로운 가치 함수
20 | action_values.append(value)
21 |
22 | V[state] = max(action_values) # 최댓값 추출
23 | return V
24 |
25 |
26 | def value_iter(V, env, gamma, threshold=0.001, is_render=True):
27 | while True:
28 | if is_render:
29 | env.render_v(V)
30 |
31 | old_V = V.copy() # 갱신 전 가치 함수
32 | V = value_iter_onestep(V, env, gamma)
33 |
34 | # 갱신된 양의 최댓값 구하기
35 | delta = 0
36 | for state in V.keys():
37 | t = abs(V[state] - old_V[state])
38 | if delta < t:
39 | delta = t
40 |
41 | # 임계값과 비교
42 | if delta < threshold:
43 | break
44 | return V
45 |
46 |
47 | # [그림 4-24] 및 [그림 4-25]
48 | if __name__ == '__main__':
49 | V = defaultdict(lambda: 0)
50 | env = GridWorld()
51 | gamma = 0.9
52 |
53 | V = value_iter(V, env, gamma) # 최적 가치 함수 찾기
54 |
55 | pi = greedy_policy(V, env, gamma) # 최적 정책 찾기
56 | env.render_v(V, pi)
57 |
--------------------------------------------------------------------------------
/ch05/dice.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def sample(dices=2):
5 | x = 0
6 | for _ in range(dices):
7 | x += np.random.choice([1, 2, 3, 4, 5, 6])
8 | return x
9 |
10 |
11 | trial = 1000
12 | V, n = 0, 0
13 |
14 | for _ in range(trial):
15 | s = sample()
16 | n += 1
17 | V += (s - V) / n
18 | print(V)
19 |
--------------------------------------------------------------------------------
/ch05/importance_sampling.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | x = np.array([1, 2, 3]) # 확률 변수
4 | pi = np.array([0.1, 0.1, 0.8]) # 확률 분포
5 |
6 | # =========== 기댓값의 참값 계산 ==================
7 | e = np.sum(x * pi)
8 | print('참값(E_pi[x]):', e)
9 |
10 | # =========== 몬테카를로법으로 계산 ==================
11 | n = 100 # 샘플 개수
12 | samples = []
13 | for _ in range(n):
14 | s = np.random.choice(x, p=pi) # pi를 이용한 샘플링
15 | samples.append(s)
16 |
17 | mean = np.mean(samples) # 샘플들의 평균
18 | var = np.var(samples) # 샘플들의 분산
19 | print('몬테카를로법: {:.2f} (분산: {:.2f})'.format(np.mean(samples), np.var(samples)))
20 |
21 | # =========== 중요도 샘플링으로 계산 ===========
22 | b = np.array([0.2, 0.2, 0.6]) #b = np.array([1/3, 1/3, 1/3])
23 | samples = []
24 | for _ in range(n):
25 | idx = np.arange(len(b)) # b의 인덱스([0, 1, 2])
26 | i = np.random.choice(idx, p=b) # b를 사용하여 샘플링
27 | s = x[i]
28 | rho = pi[i] / b[i] # 가중치
29 | samples.append(rho * s) # 샘플 데이터에 가중치를 곱해 저장
30 |
31 | mean = np.mean(samples)
32 | var = np.var(samples)
33 | print('중요도 샘플링: {:.2f} (분산: {:.2f})'.format(np.mean(samples), np.var(samples)))
34 |
--------------------------------------------------------------------------------
/ch05/mc_control.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | import numpy as np
3 | from collections import defaultdict
4 | from common.gridworld import GridWorld
5 | # from common.utils import greedy_probs
6 |
7 |
8 | def greedy_probs(Q, state, epsilon=0, action_size=4):
9 | qs = [Q[(state, action)] for action in range(action_size)]
10 | max_action = np.argmax(qs)
11 |
12 | base_prob = epsilon / action_size
13 | action_probs = {action: base_prob for action in range(action_size)} #{0: ε/4, 1: ε/4, 2: ε/4, 3: ε/4}
14 | action_probs[max_action] += (1 - epsilon)
15 | return action_probs
16 |
17 |
18 | class McAgent:
19 | def __init__(self):
20 | self.gamma = 0.9
21 | self.epsilon = 0.1 # (첫 번째 개선) ε-탐욕 정책의 ε
22 | self.alpha = 0.1 # (두 번째 개선) Q 함수 갱신 시의 고정값 α
23 | self.action_size = 4
24 |
25 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
26 | self.pi = defaultdict(lambda: random_actions)
27 | self.Q = defaultdict(lambda: 0)
28 | # self.cnts = defaultdict(lambda: 0)
29 | self.memory = []
30 |
31 | def get_action(self, state):
32 | action_probs = self.pi[state]
33 | actions = list(action_probs.keys())
34 | probs = list(action_probs.values())
35 | return np.random.choice(actions, p=probs)
36 |
37 | def add(self, state, action, reward):
38 | data = (state, action, reward)
39 | self.memory.append(data)
40 |
41 | def reset(self):
42 | self.memory.clear()
43 |
44 | def update(self):
45 | G = 0
46 | for data in reversed(self.memory):
47 | state, action, reward = data
48 | G = self.gamma * G + reward
49 | key = (state, action)
50 | # self.cnts[key] += 1
51 | # self.Q[key] += (G - self.Q[key]) / self.cnts[key]
52 | self.Q[key] += (G - self.Q[key]) * self.alpha
53 | self.pi[state] = greedy_probs(self.Q, state, self.epsilon)
54 |
55 |
56 | env = GridWorld()
57 | agent = McAgent()
58 |
59 | episodes = 10000
60 | for episode in range(episodes):
61 | state = env.reset()
62 | agent.reset()
63 |
64 | while True:
65 | action = agent.get_action(state)
66 | next_state, reward, done = env.step(action)
67 |
68 | agent.add(state, action, reward)
69 | if done:
70 | agent.update()
71 | break
72 |
73 | state = next_state
74 |
75 | # [그림 5-17] 및 [그림 5-18]
76 | env.render_q(agent.Q)
77 |
--------------------------------------------------------------------------------
/ch05/mc_control_offpolicy.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 | from common.utils import greedy_probs
6 |
7 |
8 | class McOffPolicyAgent:
9 | def __init__(self):
10 | self.gamma = 0.9
11 | self.epsilon = 0.1
12 | self.alpha = 0.2
13 | self.action_size = 4
14 |
15 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
16 | self.pi = defaultdict(lambda: random_actions)
17 | self.b = defaultdict(lambda: random_actions)
18 | self.Q = defaultdict(lambda: 0)
19 | self.memory = []
20 |
21 | def get_action(self, state):
22 | action_probs = self.b[state]
23 | actions = list(action_probs.keys())
24 | probs = list(action_probs.values())
25 | return np.random.choice(actions, p=probs)
26 |
27 | def add(self, state, action, reward):
28 | data = (state, action, reward)
29 | self.memory.append(data)
30 |
31 | def reset(self):
32 | self.memory.clear()
33 |
34 | def update(self):
35 | G = 0
36 | rho = 1
37 |
38 | for data in reversed(self.memory):
39 | state, action, reward = data
40 | key = (state, action)
41 |
42 | G = self.gamma * rho * G + reward
43 | self.Q[key] += (G - self.Q[key]) * self.alpha
44 | rho *= self.pi[state][action] / self.b[state][action]
45 |
46 | self.pi[state] = greedy_probs(self.Q, state, epsilon=0)
47 | self.b[state] = greedy_probs(self.Q, state, self.epsilon)
48 |
49 |
50 | env = GridWorld()
51 | agent = McOffPolicyAgent()
52 |
53 | episodes = 10000
54 | for episode in range(episodes):
55 | state = env.reset()
56 | agent.reset()
57 |
58 | while True:
59 | action = agent.get_action(state)
60 | next_state, reward, done = env.step(action)
61 |
62 | agent.add(state, action, reward)
63 | if done:
64 | agent.update()
65 | break
66 |
67 | state = next_state
68 |
69 | env.render_q(agent.Q)
--------------------------------------------------------------------------------
/ch05/mc_eval.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 |
6 |
7 | class RandomAgent:
8 | def __init__(self):
9 | self.gamma = 0.9
10 | self.action_size = 4
11 |
12 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
13 | self.pi = defaultdict(lambda: random_actions)
14 | self.V = defaultdict(lambda: 0)
15 | self.cnts = defaultdict(lambda: 0)
16 | self.memory = []
17 |
18 | def get_action(self, state):
19 | action_probs = self.pi[state]
20 | actions = list(action_probs.keys())
21 | probs = list(action_probs.values())
22 | return np.random.choice(actions, p=probs)
23 |
24 | def add(self, state, action, reward):
25 | data = (state, action, reward)
26 | self.memory.append(data)
27 |
28 | def reset(self):
29 | self.memory.clear()
30 |
31 | def eval(self):
32 | G = 0
33 | for data in reversed(self.memory): # 역방향으로(reserved) 따라가기
34 | state, action, reward = data
35 | G = self.gamma * G + reward
36 | self.cnts[state] += 1
37 | self.V[state] += (G - self.V[state]) / self.cnts[state]
38 |
39 |
40 | env = GridWorld()
41 | agent = RandomAgent()
42 |
43 | episodes = 1000
44 | for episode in range(episodes): # 에피소드 1000번 수행
45 | state = env.reset()
46 | agent.reset()
47 |
48 | while True:
49 | action = agent.get_action(state) # 행동 선택
50 | next_state, reward, done = env.step(action) # 행동 수행
51 |
52 | agent.add(state, action, reward) # (상태, 행동, 보상) 저장
53 | if done: # 목표에 도달 시
54 | agent.eval() # 몬테카를로법으로 가치 함수 갱신
55 | break # 다음 에피소드 시작
56 |
57 | state = next_state
58 |
59 | # [그림 5-12] 몬테카를로법으로 얻은 가치 함수
60 | env.render_v(agent.V)
61 |
--------------------------------------------------------------------------------
/ch06/q_learning.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 | from common.utils import greedy_probs
6 |
7 |
8 | class QLearningAgent:
9 | def __init__(self):
10 | self.gamma = 0.9
11 | self.alpha = 0.8
12 | self.epsilon = 0.1
13 | self.action_size = 4
14 |
15 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
16 | self.pi = defaultdict(lambda: random_actions)
17 | self.b = defaultdict(lambda: random_actions) # 행동 정책
18 | self.Q = defaultdict(lambda: 0)
19 |
20 | def get_action(self, state):
21 | action_probs = self.b[state] # 행동 정책에서 가져옴
22 | actions = list(action_probs.keys())
23 | probs = list(action_probs.values())
24 | return np.random.choice(actions, p=probs)
25 |
26 | def update(self, state, action, reward, next_state, done):
27 | if done: # 목표에 도달
28 | next_q_max = 0
29 | else: # 그 외에는 다음 상태에서 Q 함수의 최댓값 계산
30 | next_qs = [self.Q[next_state, a] for a in range(self.action_size)]
31 | next_q_max = max(next_qs)
32 |
33 | # Q 함수 갱신
34 | target = reward + self.gamma * next_q_max
35 | self.Q[state, action] += (target - self.Q[state, action]) * self.alpha
36 |
37 | # 행동 정책과 대상 정책 갱신
38 | self.pi[state] = greedy_probs(self.Q, state, epsilon=0)
39 | self.b[state] = greedy_probs(self.Q, state, self.epsilon)
40 |
41 |
42 | env = GridWorld()
43 | agent = QLearningAgent()
44 |
45 | episodes = 10000
46 | for episode in range(episodes):
47 | state = env.reset()
48 |
49 | while True:
50 | action = agent.get_action(state)
51 | next_state, reward, done = env.step(action)
52 |
53 | agent.update(state, action, reward, next_state, done)
54 | if done:
55 | break
56 | state = next_state
57 |
58 | # [그림 6-15] Q 러닝으로 얻은 Q 함수와 정책
59 | env.render_q(agent.Q)
60 |
--------------------------------------------------------------------------------
/ch06/q_learning_simple.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 |
6 |
7 | class QLearningAgent:
8 | def __init__(self):
9 | self.gamma = 0.9
10 | self.alpha = 0.8
11 | self.epsilon = 0.1
12 | self.action_size = 4
13 | self.Q = defaultdict(lambda: 0)
14 |
15 | def get_action(self, state):
16 | if np.random.rand() < self.epsilon: # epsilon의 확률로 무작위 행동
17 | return np.random.choice(self.action_size)
18 | else: # (1 - epsilon)의 확률로 탐욕 행동
19 | qs = [self.Q[state, a] for a in range(self.action_size)]
20 | return np.argmax(qs)
21 |
22 | def update(self, state, action, reward, next_state, done):
23 | if done:
24 | next_q_max = 0
25 | else:
26 | next_qs = [self.Q[next_state, a] for a in range(self.action_size)]
27 | next_q_max = max(next_qs)
28 |
29 | target = reward + self.gamma * next_q_max
30 | self.Q[state, action] += (target - self.Q[state, action]) * self.alpha
31 |
32 |
33 | env = GridWorld()
34 | agent = QLearningAgent()
35 |
36 | episodes = 1000
37 | for episode in range(episodes):
38 | state = env.reset()
39 |
40 | while True:
41 | action = agent.get_action(state)
42 | next_state, reward, done = env.step(action)
43 |
44 | agent.update(state, action, reward, next_state, done)
45 | if done:
46 | break
47 | state = next_state
48 |
49 | env.render_q(agent.Q)
50 |
--------------------------------------------------------------------------------
/ch06/sarsa.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict, deque
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 | from common.utils import greedy_probs
6 |
7 |
8 | class SarsaAgent:
9 | def __init__(self):
10 | self.gamma = 0.9
11 | self.alpha = 0.8
12 | self.epsilon = 0.1
13 | self.action_size = 4
14 |
15 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
16 | self.pi = defaultdict(lambda: random_actions)
17 | self.Q = defaultdict(lambda: 0)
18 | self.memory = deque(maxlen=2) # deque 사용
19 |
20 | def get_action(self, state):
21 | action_probs = self.pi[state] # pi에서 선택
22 | actions = list(action_probs.keys())
23 | probs = list(action_probs.values())
24 | return np.random.choice(actions, p=probs)
25 |
26 | def reset(self):
27 | self.memory.clear()
28 |
29 | def update(self, state, action, reward, done):
30 | self.memory.append((state, action, reward, done))
31 | if len(self.memory) < 2:
32 | return
33 |
34 | state, action, reward, done = self.memory[0]
35 | next_state, next_action, _, _ = self.memory[1]
36 | next_q = 0 if done else self.Q[next_state, next_action] # 다음 Q 함수
37 |
38 | # TD법으로 self.Q 갱신
39 | target = reward + self.gamma * next_q
40 | self.Q[state, action] += (target - self.Q[state, action]) * self.alpha
41 |
42 | # 정책 개선
43 | self.pi[state] = greedy_probs(self.Q, state, self.epsilon)
44 |
45 |
46 | env = GridWorld()
47 | agent = SarsaAgent()
48 |
49 | episodes = 10000
50 | for episode in range(episodes):
51 | state = env.reset()
52 | agent.reset()
53 |
54 | while True:
55 | action = agent.get_action(state)
56 | next_state, reward, done = env.step(action)
57 |
58 | agent.update(state, action, reward, done) # 매번 호출
59 |
60 | if done:
61 | # 목표에 도달했을 때도 호출
62 | agent.update(next_state, None, None, None)
63 | break
64 | state = next_state
65 |
66 | # [그림 6-7] SARSA로 얻은 결과
67 | env.render_q(agent.Q)
68 |
--------------------------------------------------------------------------------
/ch06/sarsa_off_policy.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | from collections import defaultdict, deque
5 | import numpy as np
6 | from common.gridworld import GridWorld
7 | from common.utils import greedy_probs
8 |
9 |
10 | class SarsaOffPolicyAgent:
11 | def __init__(self):
12 | self.gamma = 0.9
13 | self.alpha = 0.8
14 | self.epsilon = 0.1
15 | self.action_size = 4
16 |
17 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
18 | self.pi = defaultdict(lambda: random_actions)
19 | self.b = defaultdict(lambda: random_actions)
20 | self.Q = defaultdict(lambda: 0)
21 | self.memory = deque(maxlen=2)
22 |
23 | def get_action(self, state):
24 | action_probs = self.b[state] # 행동 정책에서 가져옴
25 | actions = list(action_probs.keys())
26 | probs = list(action_probs.values())
27 | return np.random.choice(actions, p=probs)
28 |
29 | def reset(self):
30 | self.memory.clear()
31 |
32 | def update(self, state, action, reward, done):
33 | self.memory.append((state, action, reward, done))
34 | if len(self.memory) < 2:
35 | return
36 |
37 | state, action, reward, done = self.memory[0]
38 | next_state, next_action, _, _ = self.memory[1]
39 |
40 | if done:
41 | next_q = 0
42 | rho = 1
43 | else:
44 | next_q = self.Q[next_state, next_action]
45 | rho = self.pi[next_state][next_action] / self.b[next_state][next_action] # 가중치 rho 계산
46 |
47 | # rho로 TD 목표 보정
48 | target = rho * (reward + self.gamma * next_q)
49 | self.Q[state, action] += (target - self.Q[state, action]) * self.alpha
50 |
51 | # 각각의 정책 개선
52 | self.pi[state] = greedy_probs(self.Q, state, 0)
53 | self.b[state] = greedy_probs(self.Q, state, self.epsilon)
54 |
55 |
56 | env = GridWorld()
57 | agent = SarsaOffPolicyAgent()
58 |
59 | episodes = 10000
60 | for episode in range(episodes):
61 | state = env.reset()
62 | agent.reset()
63 |
64 | while True:
65 | action = agent.get_action(state)
66 | next_state, reward, done = env.step(action)
67 |
68 | agent.update(state, action, reward, done)
69 |
70 | if done:
71 | agent.update(next_state, None, None, None)
72 | break
73 | state = next_state
74 |
75 | # [그림 6-9] 오프-정책 SARSA로 얻은 결과
76 | env.render_q(agent.Q)
77 |
--------------------------------------------------------------------------------
/ch06/td_eval.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | from collections import defaultdict
3 | import numpy as np
4 | from common.gridworld import GridWorld
5 |
6 |
7 | class TdAgent:
8 | def __init__(self):
9 | self.gamma = 0.9
10 | self.alpha = 0.01
11 | self.action_size = 4
12 |
13 | random_actions = {0: 0.25, 1: 0.25, 2: 0.25, 3: 0.25}
14 | self.pi = defaultdict(lambda: random_actions)
15 | self.V = defaultdict(lambda: 0)
16 |
17 | def get_action(self, state):
18 | action_probs = self.pi[state]
19 | actions = list(action_probs.keys())
20 | probs = list(action_probs.values())
21 | return np.random.choice(actions, p=probs)
22 |
23 | def eval(self, state, reward, next_state, done):
24 | next_V = 0 if done else self.V[next_state] # 목표 지점의 가치 함수는 0
25 | target = reward + self.gamma * next_V
26 | self.V[state] += (target - self.V[state]) * self.alpha
27 |
28 |
29 | env = GridWorld()
30 | agent = TdAgent()
31 |
32 | episodes = 1000
33 | for episode in range(episodes):
34 | state = env.reset()
35 |
36 | while True:
37 | action = agent.get_action(state)
38 | next_state, reward, done = env.step(action)
39 |
40 | agent.eval(state, reward, next_state, done) # 매번 호출
41 | if done:
42 | break
43 | state = next_state
44 |
45 | # [그림 6-5] TD법으로 얻은 가치 함수
46 | env.render_v(agent.V)
47 |
--------------------------------------------------------------------------------
/ch07/dezero1.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from dezero import Variable
3 | import dezero.functions as F
4 |
5 | # 벡터의 내적
6 | a = np.array([1, 2, 3])
7 | b = np.array([4, 5, 6])
8 | a, b = Variable(a), Variable(b) # 생략 가능
9 | c = F.matmul(a, b)
10 | print(c)
11 |
12 | # 행렬의 곱
13 | a = np.array([[1, 2], [3, 4]])
14 | b = np.array([[5, 6], [7, 8]])
15 | c = F.matmul(a, b)
16 | print(c)
17 |
--------------------------------------------------------------------------------
/ch07/dezero2.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from dezero import Variable
3 |
4 | def rosenbrock(x0, x1):
5 | y = 100 * (x1 - x0 ** 2) ** 2 + (x0 - 1) ** 2
6 | return y
7 |
8 | x0 = Variable(np.array(0.0))
9 | x1 = Variable(np.array(2.0))
10 |
11 | iters = 10000 # 반복 횟수
12 | lr = 0.001 # 학습률
13 |
14 | for i in range(iters): # 갱신 반복
15 | y = rosenbrock(x0, x1)
16 |
17 | # 이전 반복에서 더해진 미분 초기화
18 | x0.cleargrad()
19 | x1.cleargrad()
20 |
21 | # 미분(역전파)
22 | y.backward()
23 |
24 | # 변수 갱신
25 | x0.data -= lr * x0.grad.data
26 | x1.data -= lr * x1.grad.data
27 |
28 | print(x0, x1)
29 |
--------------------------------------------------------------------------------
/ch07/dezero3.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from dezero import Variable
4 | import dezero.functions as F
5 |
6 | # 토이 데이터셋
7 | np.random.seed(0)
8 | x = np.random.rand(100, 1)
9 | y = 5 + 2 * x + np.random.rand(100, 1)
10 | x, y = Variable(x), Variable(y) # 생략 가능
11 |
12 | # 매개변수 정의
13 | W = Variable(np.zeros((1, 1)))
14 | b = Variable(np.zeros(1))
15 |
16 | # 예측 함수
17 | def predict(x):
18 | y = F.matmul(x, W) + b # 행렬의 곱으로 여러 데이터 일괄 계산
19 | return y
20 |
21 | # 평균 제곱 오차(식 7.2) 계산 함수
22 | def mean_squared_error(x0, x1):
23 | diff = x0 - x1
24 | return F.sum(diff ** 2) / len(diff)
25 |
26 | # 경사 하강법으로 매개변수 갱신
27 | lr = 0.1
28 | iters = 100
29 |
30 | for i in range(iters):
31 | y_pred = predict(x)
32 | loss = mean_squared_error(y, y_pred)
33 | # 또는 loss = F.mean_squared_error(y, y_pred)
34 |
35 | W.cleargrad()
36 | b.cleargrad()
37 | loss.backward()
38 |
39 | W.data -= lr * W.grad.data
40 | b.data -= lr * b.grad.data
41 |
42 | if i % 10 == 0: # 10회 반복마다 출력
43 | print(loss.data)
44 |
45 | print('====')
46 | print('W =', W.data)
47 | print('b =', b.data)
48 |
49 | # [그림 7-9] 학습 후 모델
50 | plt.scatter(x.data, y.data, s=10)
51 | plt.xlabel('x')
52 | plt.ylabel('y')
53 | t = np.arange(0, 1, .01)[:, np.newaxis]
54 | y_pred = predict(t)
55 | plt.plot(t, y_pred.data, color='r')
56 | plt.show()
57 |
--------------------------------------------------------------------------------
/ch07/dezero4.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | from dezero import Model
4 | from dezero import optimizers # 옵티마이저들이 들어 있는 패키지 임포트
5 | import dezero.layers as L
6 | import dezero.functions as F
7 |
8 | # 데이터셋 생성
9 | np.random.seed(0)
10 | x = np.random.rand(100, 1)
11 | y = np.sin(2 * np.pi * x) + np.random.rand(100, 1)
12 |
13 | lr = 0.2
14 | iters = 10000
15 |
16 | class TwoLayerNet(Model):
17 | def __init__(self, hidden_size, out_size):
18 | super().__init__()
19 | self.l1 = L.Linear(hidden_size)
20 | self.l2 = L.Linear(out_size)
21 |
22 | def forward(self, x):
23 | y = F.sigmoid(self.l1(x))
24 | y = self.l2(y)
25 | return y
26 |
27 | model = TwoLayerNet(10, 1)
28 | optimizer = optimizers.SGD(lr) # 옵티마이저 생성
29 | optimizer.setup(model) # 최적화할 모델을 옵티마이저에 등록
30 |
31 | for i in range(iters):
32 | y_pred = model(x)
33 | loss = F.mean_squared_error(y, y_pred)
34 |
35 | model.cleargrads()
36 | loss.backward()
37 |
38 | optimizer.update() # 옵티마이저로 매개변수 갱신
39 | if i % 1000 == 0:
40 | print(loss.data)
41 |
42 | # 그래프로 시각화([그림 7-12]와 같음)
43 | plt.scatter(x, y, s=10)
44 | plt.xlabel('x')
45 | plt.ylabel('y')
46 | t = np.arange(0, 1, .01)[:, np.newaxis]
47 | y_pred = model(t)
48 | plt.plot(t, y_pred.data, color='r')
49 | plt.show()
50 |
--------------------------------------------------------------------------------
/ch07/q_learning_nn.py:
--------------------------------------------------------------------------------
1 | import os, sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..')) # for importing the parent dirs
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | from dezero import Model
5 | from dezero import optimizers
6 | import dezero.functions as F
7 | import dezero.layers as L
8 | from common.gridworld import GridWorld
9 |
10 |
11 | def one_hot(state):
12 | HEIGHT, WIDTH = 3, 4
13 | vec = np.zeros(HEIGHT * WIDTH, dtype=np.float32)
14 | y, x = state
15 | idx = WIDTH * y + x
16 | vec[idx] = 1.0
17 | return vec[np.newaxis, :]
18 |
19 |
20 | class QNet(Model):
21 | def __init__(self):
22 | super().__init__()
23 | self.l1 = L.Linear(100) # 중간층의 크기
24 | self.l2 = L.Linear(4) # 행동의 크기(가능한 행동의 개수)
25 |
26 | def forward(self, x):
27 | x = F.relu(self.l1(x))
28 | x = self.l2(x)
29 | return x
30 |
31 |
32 | class QLearningAgent:
33 | def __init__(self):
34 | self.gamma = 0.9
35 | self.lr = 0.01
36 | self.epsilon = 0.1
37 | self.action_size = 4
38 |
39 | self.qnet = QNet() # 신경망 초기화
40 | self.optimizer = optimizers.SGD(self.lr) # 옵티마이저 생성
41 | self.optimizer.setup(self.qnet) # 옵티마이저에 신경망 등록
42 |
43 | def get_action(self, state_vec):
44 | if np.random.rand() < self.epsilon:
45 | return np.random.choice(self.action_size)
46 | else:
47 | qs = self.qnet(state_vec)
48 | return qs.data.argmax()
49 |
50 | def update(self, state, action, reward, next_state, done):
51 | # 다음 상태에서 최대가 되는 Q 함수의 값(next_q) 계산
52 | if done: # 목표 상태에 도달
53 | next_q = np.zeros(1) # [0.] # [0.] (목표 상태에서의 Q 함수는 항상 0)
54 | else: # 그 외 상태
55 | next_qs = self.qnet(next_state)
56 | next_q = next_qs.max(axis=1)
57 | next_q.unchain() # next_q를 역전파 대상에서 제외
58 |
59 | # 목표
60 | target = self.gamma * next_q + reward
61 | # 현재 상태에서의 Q 함수 값(q) 계산
62 | qs = self.qnet(state)
63 | q = qs[:, action]
64 | # 목표(target)와 q의 오차 계산
65 | loss = F.mean_squared_error(target, q)
66 |
67 | # 역전파 → 매개변수 갱신
68 | self.qnet.cleargrads()
69 | loss.backward()
70 | self.optimizer.update()
71 |
72 | return loss.data
73 |
74 |
75 | env = GridWorld()
76 | agent = QLearningAgent()
77 |
78 | episodes = 1000 # 에피소드 수
79 | loss_history = []
80 |
81 | for episode in range(episodes):
82 | state = env.reset()
83 | state = one_hot(state)
84 | total_loss, cnt = 0, 0
85 | done = False
86 |
87 | while not done:
88 | action = agent.get_action(state)
89 | next_state, reward, done = env.step(action)
90 | next_state = one_hot(next_state)
91 |
92 | loss = agent.update(state, action, reward, next_state, done)
93 | total_loss += loss
94 | cnt += 1
95 | state = next_state
96 |
97 | average_loss = total_loss / cnt
98 | loss_history.append(average_loss)
99 |
100 |
101 | # [그림 7-14] 에피소드별 손실 추이
102 | plt.xlabel('episode')
103 | plt.ylabel('loss')
104 | plt.plot(range(len(loss_history)), loss_history)
105 | plt.show()
106 |
107 | # [그림 7-15] 신경망을 이용한 Q 러닝으로 얻은 Q 함수와 정책
108 | Q = {}
109 | for state in env.states():
110 | for action in env.action_space:
111 | q = agent.qnet(one_hot(state))[:, action]
112 | Q[state, action] = float(q.data)
113 | env.render_q(Q)
114 |
--------------------------------------------------------------------------------
/ch08/dqn.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from collections import deque
3 | import random
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | import gym
7 | from dezero import Model
8 | from dezero import optimizers
9 | import dezero.functions as F
10 | import dezero.layers as L
11 |
12 |
13 | class ReplayBuffer:
14 | def __init__(self, buffer_size, batch_size):
15 | self.buffer = deque(maxlen=buffer_size)
16 | self.batch_size = batch_size
17 |
18 | def add(self, state, action, reward, next_state, done):
19 | data = (state, action, reward, next_state, done)
20 | self.buffer.append(data)
21 |
22 | def __len__(self):
23 | return len(self.buffer)
24 |
25 | def get_batch(self):
26 | data = random.sample(self.buffer, self.batch_size)
27 |
28 | state = np.stack([x[0] for x in data])
29 | action = np.array([x[1] for x in data])
30 | reward = np.array([x[2] for x in data])
31 | next_state = np.stack([x[3] for x in data])
32 | done = np.array([x[4] for x in data]).astype(np.int32)
33 | return state, action, reward, next_state, done
34 |
35 |
36 | class QNet(Model): # 신경망 클래스
37 | def __init__(self, action_size):
38 | super().__init__()
39 | self.l1 = L.Linear(128)
40 | self.l2 = L.Linear(128)
41 | self.l3 = L.Linear(action_size)
42 |
43 | def forward(self, x):
44 | x = F.relu(self.l1(x))
45 | x = F.relu(self.l2(x))
46 | x = self.l3(x)
47 | return x
48 |
49 |
50 | class DQNAgent: # 에이전트 클래스
51 | def __init__(self):
52 | self.gamma = 0.98
53 | self.lr = 0.0005
54 | self.epsilon = 0.1
55 | self.buffer_size = 10000 # 경험 재생 버퍼 크기
56 | self.batch_size = 32 # 미니배치 크기
57 | self.action_size = 2
58 |
59 | self.replay_buffer = ReplayBuffer(self.buffer_size, self.batch_size)
60 | self.qnet = QNet(self.action_size) # 원본 신경망
61 | self.qnet_target = QNet(self.action_size) # 목표 신경망
62 | self.optimizer = optimizers.Adam(self.lr)
63 | self.optimizer.setup(self.qnet) # 옵티마이저에 qnet 등록
64 |
65 | def get_action(self, state):
66 | if np.random.rand() < self.epsilon:
67 | return np.random.choice(self.action_size)
68 | else:
69 | state = state[np.newaxis, :] # 배치 처리용 차원 추가
70 | qs = self.qnet(state)
71 | return qs.data.argmax()
72 |
73 | def update(self, state, action, reward, next_state, done):
74 | # 경험 재생 버퍼에 경험 데이터 추가
75 | self.replay_buffer.add(state, action, reward, next_state, done)
76 | if len(self.replay_buffer) < self.batch_size:
77 | return # 데이터가 미니배치 크기만큼 쌓이지 않았다면 여기서 끝
78 |
79 | # 미니배치 크기 이상이 쌓이면 미니배치 생성
80 | state, action, reward, next_state, done = self.replay_buffer.get_batch()
81 | qs = self.qnet(state)
82 | q = qs[np.arange(self.batch_size), action]
83 |
84 | next_qs = self.qnet_target(next_state)
85 | next_q = next_qs.max(axis=1)
86 | next_q.unchain()
87 | target = reward + (1 - done) * self.gamma * next_q
88 |
89 | loss = F.mean_squared_error(q, target)
90 |
91 | self.qnet.cleargrads()
92 | loss.backward()
93 | self.optimizer.update()
94 |
95 | def sync_qnet(self): # 두 신경망 동기화
96 | self.qnet_target = copy.deepcopy(self.qnet)
97 |
98 | episodes = 300 # 에피소드 수
99 | sync_interval = 20 # 신경망 동기화 주기(20번째 에피소드마다 동기화)
100 | env = gym.make('CartPole-v0', render_mode='rgb_array')
101 | agent = DQNAgent()
102 | reward_history = [] # 에피소드별 보상 기록
103 |
104 | for episode in range(episodes):
105 | state = env.reset()[0]
106 | done = False
107 | total_reward = 0
108 |
109 | while not done:
110 | action = agent.get_action(state)
111 | next_state, reward, terminated, truncated, info = env.step(action)
112 | done = terminated | truncated
113 |
114 | agent.update(state, action, reward, next_state, done)
115 | state = next_state
116 | total_reward += reward
117 |
118 | if episode % sync_interval == 0:
119 | agent.sync_qnet()
120 |
121 | reward_history.append(total_reward)
122 | if episode % 10 == 0:
123 | print("episode :{}, total reward : {}".format(episode, total_reward))
124 |
125 |
126 | # [그림 8-8] 「카트 폴」에서 에피소드별 보상 총합의 추이
127 | plt.xlabel('Episode')
128 | plt.ylabel('Total Reward')
129 | plt.plot(range(len(reward_history)), reward_history)
130 | plt.show()
131 |
132 |
133 | # 학습이 끝난 에이전트에 탐욕 행동을 선택하도록 하여 플레이
134 | agent.epsilon = 0 # 탐욕 정책(무작위로 행동할 확률 ε을 0로 설정)
135 | state = env.reset()[0]
136 | done = False
137 | total_reward = 0
138 |
139 | while not done:
140 | action = agent.get_action(state)
141 | next_state, reward, terminated, truncated, info = env.step(action)
142 | done = terminated | truncated
143 | state = next_state
144 | total_reward += reward
145 | env.render()
146 | print('Total Reward:', total_reward)
147 |
--------------------------------------------------------------------------------
/ch08/gym_play.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 |
4 |
5 | env = gym.make('CartPole-v0', render_mode='human')
6 | state = env.reset()[0]
7 | done = False
8 |
9 | while not done: # 에피소드가 끝날 때까지 반복
10 | env.render() # 진행 과정 시각화
11 | action = np.random.choice([0, 1]) # 행동 선택(무작위)
12 | next_state, reward, terminated, truncated, info = env.step(action)
13 | done = terminated | truncated # 둘 중 하나만 True면 에피소드 종료
14 | env.close()
15 |
--------------------------------------------------------------------------------
/ch08/replay_buffer.py:
--------------------------------------------------------------------------------
1 | from collections import deque
2 | import random
3 | import numpy as np
4 | import gym
5 |
6 |
7 | class ReplayBuffer:
8 | def __init__(self, buffer_size, batch_size):
9 | self.buffer = deque(maxlen=buffer_size)
10 | self.batch_size = batch_size
11 |
12 | def add(self, state, action, reward, next_state, done):
13 | data = (state, action, reward, next_state, done)
14 | self.buffer.append(data)
15 |
16 | def __len__(self):
17 | return len(self.buffer)
18 |
19 | def get_batch(self):
20 | data = random.sample(self.buffer, self.batch_size)
21 |
22 | state = np.stack([x[0] for x in data])
23 | action = np.array([x[1] for x in data])
24 | reward = np.array([x[2] for x in data])
25 | next_state = np.stack([x[3] for x in data])
26 | done = np.array([x[4] for x in data]).astype(np.int32)
27 | return state, action, reward, next_state, done
28 |
29 |
30 | env = gym.make('CartPole-v0', render_mode='human')
31 | replay_buffer = ReplayBuffer(buffer_size=10000, batch_size=32)
32 |
33 | for episode in range(10): # 에피소드 10회 수행
34 | state = env.reset()[0]
35 | done = False
36 |
37 | while not done:
38 | action = 0 # 항상 0번째 행동만 수행
39 | next_state, reward, terminated, truncated, info = env.step(action) # 경험 데이터 획득
40 | done = terminated | truncated
41 |
42 | replay_buffer.add(state, action, reward, next_state, done) # 버퍼에 추가
43 | state = next_state
44 |
45 | # 경험 데이터 버퍼로부터 미니배치 생성
46 | state, action, reward, next_state, done = replay_buffer.get_batch()
47 | print(state.shape) # (32, 4)
48 | print(action.shape) # (32,)
49 | print(reward.shape) # (32,)
50 | print(next_state.shape) # (32, 4)
51 | print(done.shape) # (32,)
52 |
--------------------------------------------------------------------------------
/ch09/actor_critic.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | import numpy as np
5 | import gym
6 | from dezero import Model
7 | from dezero import optimizers
8 | import dezero.functions as F
9 | import dezero.layers as L
10 |
11 |
12 | class PolicyNet(Model): # 정책 신경망
13 | def __init__(self, action_size=2):
14 | super().__init__()
15 | self.l1 = L.Linear(128)
16 | self.l2 = L.Linear(action_size)
17 |
18 | def forward(self, x):
19 | x = F.relu(self.l1(x))
20 | x = self.l2(x)
21 | x = F.softmax(x) # 확률 출력
22 | return x
23 |
24 |
25 | class ValueNet(Model): # 가치 함수 신경망
26 | def __init__(self):
27 | super().__init__()
28 | self.l1 = L.Linear(128)
29 | self.l2 = L.Linear(1)
30 |
31 | def forward(self, x):
32 | x = F.relu(self.l1(x))
33 | x = self.l2(x)
34 | return x
35 |
36 |
37 | class Agent:
38 | def __init__(self):
39 | self.gamma = 0.98
40 | self.lr_pi = 0.0002
41 | self.lr_v = 0.0005
42 | self.action_size = 2
43 |
44 | self.pi = PolicyNet()
45 | self.v = ValueNet()
46 | self.optimizer_pi = optimizers.Adam(self.lr_pi).setup(self.pi)
47 | self.optimizer_v = optimizers.Adam(self.lr_v).setup(self.v)
48 |
49 | def get_action(self, state):
50 | state = state[np.newaxis, :] # 배치 처리용 축 추가
51 | probs = self.pi(state)
52 | probs = probs[0]
53 | action = np.random.choice(len(probs), p=probs.data)
54 | return action, probs[action] # 선택된 행동과 해당 행동의 확률 반환
55 |
56 | def update(self, state, action_prob, reward, next_state, done):
57 | # 배치 처리용 축 추가
58 | state = state[np.newaxis, :]
59 | next_state = next_state[np.newaxis, :]
60 |
61 | # 가치 함수(self.v)의 손실 계산
62 | target = reward + self.gamma * self.v(next_state) * (1 - done) # TD 목표
63 | target.unchain()
64 | v = self.v(state) # 현재 상태의 가치 함수
65 | loss_v = F.mean_squared_error(v, target) # 두 값의 평균 제곱 오차
66 |
67 | # 정책(self.pi)의 손실 계산
68 | delta = target - v
69 | delta.unchain()
70 | loss_pi = -F.log(action_prob) * delta
71 |
72 | # 신경망 학습
73 | self.v.cleargrads()
74 | self.pi.cleargrads()
75 | loss_v.backward()
76 | loss_pi.backward()
77 | self.optimizer_v.update()
78 | self.optimizer_pi.update()
79 |
80 |
81 | episodes = 3000
82 | env = gym.make('CartPole-v0', render_mode='rgb_array')
83 | agent = Agent()
84 | reward_history = []
85 |
86 | for episode in range(episodes):
87 | state = env.reset()[0]
88 | done = False
89 | total_reward = 0
90 |
91 | while not done:
92 | action, prob = agent.get_action(state)
93 | next_state, reward, terminated, truncated, info = env.step(action)
94 | done = terminated | truncated
95 |
96 | agent.update(state, prob, reward, next_state, done)
97 |
98 | state = next_state
99 | total_reward += reward
100 |
101 | reward_history.append(total_reward)
102 | if episode % 100 == 0:
103 | print("episode :{}, total reward : {:.1f}".format(episode, total_reward))
104 |
105 |
106 | # [그림 9-11]의 왼쪽 그래프
107 | from common.utils import plot_total_reward
108 | plot_total_reward(reward_history)
109 |
--------------------------------------------------------------------------------
/ch09/reinforce.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | import numpy as np
5 | import gym
6 | from dezero import Model
7 | from dezero import optimizers
8 | import dezero.functions as F
9 | import dezero.layers as L
10 |
11 |
12 | class Policy(Model):
13 | def __init__(self, action_size):
14 | super().__init__()
15 | self.l1 = L.Linear(128)
16 | self.l2 = L.Linear(action_size)
17 |
18 | def forward(self, x):
19 | x = F.relu(self.l1(x))
20 | x = F.softmax(self.l2(x))
21 | return x
22 |
23 |
24 | class Agent:
25 | def __init__(self):
26 | self.gamma = 0.98
27 | self.lr = 0.0002
28 | self.action_size = 2
29 |
30 | self.memory = []
31 | self.pi = Policy(self.action_size)
32 | self.optimizer = optimizers.Adam(self.lr)
33 | self.optimizer.setup(self.pi)
34 |
35 | def get_action(self, state):
36 | state = state[np.newaxis, :]
37 | probs = self.pi(state)
38 | probs = probs[0]
39 | action = np.random.choice(len(probs), p=probs.data)
40 | return action, probs[action]
41 |
42 | def add(self, reward, prob):
43 | data = (reward, prob)
44 | self.memory.append(data)
45 |
46 | def update(self):
47 | self.pi.cleargrads()
48 |
49 | G, loss = 0, 0
50 | for reward, prob in reversed(self.memory):
51 | G = reward + self.gamma * G # 수익 G 계산
52 | loss += -F.log(prob) * G # 손실 함수 계산
53 |
54 | loss.backward()
55 | self.optimizer.update()
56 | self.memory = []
57 |
58 |
59 | episodes = 3000
60 | env = gym.make('CartPole-v0', render_mode='rgb_array')
61 | agent = Agent()
62 | reward_history = []
63 |
64 | for episode in range(episodes):
65 | state = env.reset()[0]
66 | done = False
67 | sum_reward = 0
68 |
69 | while not done:
70 | action, prob = agent.get_action(state)
71 | next_state, reward, terminated, truncated, info = env.step(action)
72 | done = terminated | truncated
73 |
74 | agent.add(reward, prob)
75 | state = next_state
76 | sum_reward += reward
77 |
78 | agent.update()
79 |
80 | reward_history.append(sum_reward)
81 | if episode % 100 == 0:
82 | print("episode :{}, total reward : {:.1f}".format(episode, sum_reward))
83 |
84 |
85 | # [그림 9-4]의 왼쪽 그래프
86 | from common.utils import plot_total_reward
87 | plot_total_reward(reward_history)
88 |
--------------------------------------------------------------------------------
/ch09/simple_pg.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | import numpy as np
5 | import gym
6 | from dezero import Model
7 | from dezero import optimizers
8 | import dezero.functions as F
9 | import dezero.layers as L
10 |
11 |
12 | class Policy(Model):
13 | def __init__(self, action_size):
14 | super().__init__()
15 | self.l1 = L.Linear(128) # 첫 번째 계층
16 | self.l2 = L.Linear(action_size) # 두 번째 계층
17 |
18 | def forward(self, x):
19 | x = F.relu(self.l1(x)) # 첫 번째 계층에서는 ReLU 함수 사용
20 | x = F.softmax(self.l2(x)) # 두 번째 계층에서는 소프트맥스 함수 사용
21 | return x
22 |
23 |
24 | class Agent:
25 | def __init__(self):
26 | self.gamma = 0.98
27 | self.lr = 0.0002
28 | self.action_size = 2
29 |
30 | self.memory = []
31 | self.pi = Policy(self.action_size)
32 | self.optimizer = optimizers.Adam(self.lr)
33 | self.optimizer.setup(self.pi)
34 |
35 | def get_action(self, state):
36 | state = state[np.newaxis, :] # 배치 처리용 축 추가
37 | probs = self.pi(state) # 순전파 수행
38 | probs = probs[0]
39 | action = np.random.choice(len(probs), p=probs.data) # 행동 선택
40 | return action, probs[action] # 선택된 행동과 확률 반환
41 |
42 | def add(self, reward, prob):
43 | data = (reward, prob)
44 | self.memory.append(data)
45 |
46 | def update(self):
47 | self.pi.cleargrads()
48 |
49 | G, loss = 0, 0
50 | for reward, prob in reversed(self.memory): # 수익 G 계산
51 | G = reward + self.gamma * G
52 |
53 | for reward, prob in self.memory: # 손실 함수 계산
54 | loss += -F.log(prob) * G
55 |
56 | loss.backward()
57 | self.optimizer.update()
58 | self.memory = [] # 메모리 초기화
59 |
60 |
61 | episodes = 3000
62 | env = gym.make('CartPole-v0', render_mode='rgb_array')
63 | agent = Agent()
64 | reward_history = []
65 |
66 | for episode in range(episodes):
67 | state = env.reset()[0]
68 | done = False
69 | total_reward = 0
70 |
71 | while not done:
72 | action, prob = agent.get_action(state) # 행동 선택
73 | next_state, reward, terminated, truncated, info = env.step(action) # 행동 수행
74 | done = terminated | truncated
75 |
76 | agent.add(reward, prob) # 보상과 행동의 확률을 에이전트에 추가
77 | state = next_state # 상태 전이
78 | total_reward += reward # 보상 총합 계산
79 |
80 | agent.update() # 정책 갱신
81 |
82 | reward_history.append(total_reward)
83 | if episode % 100 == 0:
84 | print("episode :{}, total reward : {:.1f}".format(episode, total_reward))
85 |
86 |
87 | # [그림 9-2] 에피소드별 보상 합계 추이
88 | from common.utils import plot_total_reward
89 | plot_total_reward(reward_history)
90 |
--------------------------------------------------------------------------------
/common/gridworld.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import common.gridworld_render as render_helper
3 |
4 |
5 | class GridWorld:
6 | def __init__(self):
7 | self.action_space = [0, 1, 2, 3] # 행동 공간(가능한 행동들)
8 | self.action_meaning = { # 행동의 의미
9 | 0: "UP",
10 | 1: "DOWN",
11 | 2: "LEFT",
12 | 3: "RIGHT",
13 | }
14 |
15 | self.reward_map = np.array( # 보상 맵(각 좌표의 보상 값)
16 | [[0, 0, 0, 1.0],
17 | [0, None, 0, -1.0],
18 | [0, 0, 0, 0]]
19 | )
20 | self.goal_state = (0, 3) # 목표 상태(좌표)
21 | self.wall_state = (1, 1) # 벽 상태(좌표)
22 | self.start_state = (2, 0) # 시작 상태(좌표)
23 | self.agent_state = self.start_state # 에이전트 초기 상태(좌표)
24 |
25 | @property
26 | def height(self):
27 | return len(self.reward_map)
28 |
29 | @property
30 | def width(self):
31 | return len(self.reward_map[0])
32 |
33 | @property
34 | def shape(self):
35 | return self.reward_map.shape
36 |
37 | def actions(self):
38 | return self.action_space
39 |
40 | def states(self):
41 | for h in range(self.height):
42 | for w in range(self.width):
43 | yield (h, w)
44 |
45 | def next_state(self, state, action):
46 | # 이동 위치 계산
47 | action_move_map = [(-1, 0), (1, 0), (0, -1), (0, 1)]
48 | move = action_move_map[action]
49 | next_state = (state[0] + move[0], state[1] + move[1])
50 | ny, nx = next_state
51 |
52 | # 이동한 위치가 그리드 월드의 테두리 밖이나 벽인가?
53 | if nx < 0 or nx >= self.width or ny < 0 or ny >= self.height:
54 | next_state = state
55 | elif next_state == self.wall_state:
56 | next_state = state
57 |
58 | return next_state # 다음 상태 반환
59 |
60 | def reward(self, state, action, next_state):
61 | return self.reward_map[next_state]
62 |
63 | def reset(self):
64 | self.agent_state = self.start_state
65 | return self.agent_state
66 |
67 | def step(self, action):
68 | state = self.agent_state
69 | next_state = self.next_state(state, action)
70 | reward = self.reward(state, action, next_state)
71 | done = (next_state == self.goal_state)
72 |
73 | self.agent_state = next_state
74 | return next_state, reward, done
75 |
76 | def render_v(self, v=None, policy=None, print_value=True):
77 | renderer = render_helper.Renderer(self.reward_map, self.goal_state,
78 | self.wall_state)
79 | renderer.render_v(v, policy, print_value)
80 |
81 | def render_q(self, q=None, print_value=True):
82 | renderer = render_helper.Renderer(self.reward_map, self.goal_state,
83 | self.wall_state)
84 | renderer.render_q(q, print_value)
85 |
--------------------------------------------------------------------------------
/common/gridworld_render.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib
3 | import matplotlib.pyplot as plt
4 |
5 |
6 | class Renderer:
7 | def __init__(self, reward_map, goal_state, wall_state):
8 | self.reward_map = reward_map
9 | self.goal_state = goal_state
10 | self.wall_state = wall_state
11 | self.ys = len(self.reward_map)
12 | self.xs = len(self.reward_map[0])
13 |
14 | self.ax = None
15 | self.fig = None
16 | self.first_flg = True
17 |
18 | def set_figure(self, figsize=None):
19 | fig = plt.figure(figsize=figsize)
20 | self.ax = fig.add_subplot(111)
21 | ax = self.ax
22 | ax.clear()
23 | ax.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False)
24 | ax.set_xticks(range(self.xs))
25 | ax.set_yticks(range(self.ys))
26 | ax.set_xlim(0, self.xs)
27 | ax.set_ylim(0, self.ys)
28 | ax.grid(True)
29 |
30 | def render_v(self, v=None, policy=None, print_value=True):
31 | self.set_figure()
32 |
33 | ys, xs = self.ys, self.xs
34 | ax = self.ax
35 |
36 | if v is not None:
37 | color_list = ['red', 'white', 'green']
38 | cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
39 | 'colormap_name', color_list)
40 |
41 | # dict -> ndarray
42 | v_dict = v
43 | v = np.zeros(self.reward_map.shape)
44 | for state, value in v_dict.items():
45 | v[state] = value
46 |
47 | vmax, vmin = v.max(), v.min()
48 | vmax = max(vmax, abs(vmin))
49 | vmin = -1 * vmax
50 | vmax = 1 if vmax < 1 else vmax
51 | vmin = -1 if vmin > -1 else vmin
52 |
53 | ax.pcolormesh(np.flipud(v), cmap=cmap, vmin=vmin, vmax=vmax)
54 |
55 | for y in range(ys):
56 | for x in range(xs):
57 | state = (y, x)
58 | r = self.reward_map[y, x]
59 | if r != 0 and r is not None:
60 | txt = 'R ' + str(r)
61 | if state == self.goal_state:
62 | txt = txt + ' (GOAL)'
63 | ax.text(x+.1, ys-y-0.9, txt)
64 |
65 | if (v is not None) and state != self.wall_state:
66 | if print_value:
67 | offsets = [(0.4, -0.15), (-0.15, -0.3)]
68 | key = 0
69 | if v.shape[0] > 7: key = 1
70 | offset = offsets[key]
71 | ax.text(x+offset[0], ys-y+offset[1], "{:12.2f}".format(v[y, x]))
72 |
73 | if policy is not None and state != self.wall_state:
74 | actions = policy[state]
75 | max_actions = [kv[0] for kv in actions.items() if kv[1] == max(actions.values())]
76 |
77 | arrows = ["↑", "↓", "←", "→"]
78 | offsets = [(0, 0.1), (0, -0.1), (-0.1, 0), (0.1, 0)]
79 | for action in max_actions:
80 | arrow = arrows[action]
81 | offset = offsets[action]
82 | if state == self.goal_state:
83 | continue
84 | ax.text(x+0.45+offset[0], ys-y-0.5+offset[1], arrow)
85 |
86 | if state == self.wall_state:
87 | ax.add_patch(plt.Rectangle((x,ys-y-1), 1, 1, fc=(0.4, 0.4, 0.4, 1.)))
88 | plt.show()
89 |
90 | def render_q(self, q, show_greedy_policy=True):
91 | self.set_figure()
92 |
93 | ys, xs = self.ys, self.xs
94 | ax = self.ax
95 | action_space = [0, 1, 2, 3]
96 |
97 | qmax, qmin = max(q.values()), min(q.values())
98 | qmax = max(qmax, abs(qmin))
99 | qmin = -1 * qmax
100 | qmax = 1 if qmax < 1 else qmax
101 | qmin = -1 if qmin > -1 else qmin
102 |
103 |
104 | color_list = ['red', 'white', 'green']
105 | cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
106 | 'colormap_name', color_list)
107 |
108 | for y in range(ys):
109 | for x in range(xs):
110 | for action in action_space:
111 | state = (y, x)
112 | r = self.reward_map[y, x]
113 | if r != 0 and r is not None:
114 | txt = 'R ' + str(r)
115 | if state == self.goal_state:
116 | txt = txt + ' (GOAL)'
117 | ax.text(x+.05, ys-y-0.95, txt)
118 |
119 | if state == self.goal_state:
120 | continue
121 |
122 | tx, ty = x, ys-y-1
123 |
124 | action_map = {
125 | 0: ((0.5+tx, 0.5+ty), (tx+1, ty+1), (tx, ty+1)),
126 | 1: ((tx, ty), (tx+1, ty), (tx+0.5, ty+0.5)),
127 | 2: ((tx, ty), (tx+0.5, ty+0.5), (tx, ty+1)),
128 | 3: ((0.5+tx, 0.5+ty), (tx+1, ty), (tx+1, ty+1)),
129 | }
130 | offset_map = {
131 | 0: (0.1, 0.8),
132 | 1: (0.1, 0.1),
133 | 2: (-0.2, 0.4),
134 | 3: (0.4, 0.4),
135 | }
136 | if state == self.wall_state:
137 | ax.add_patch(plt.Rectangle((tx, ty), 1, 1, fc=(0.4, 0.4, 0.4, 1.)))
138 | elif state in self.goal_state:
139 | ax.add_patch(plt.Rectangle((tx, ty), 1, 1, fc=(0., 1., 0., 1.)))
140 | else:
141 |
142 | tq = q[(state, action)]
143 | color_scale = 0.5 + (tq / qmax) / 2 # normalize: 0.0-1.0
144 |
145 | poly = plt.Polygon(action_map[action],fc=cmap(color_scale))
146 | ax.add_patch(poly)
147 |
148 | offset= offset_map[action]
149 | ax.text(tx+offset[0], ty+offset[1], "{:12.2f}".format(tq))
150 | plt.show()
151 |
152 | if show_greedy_policy:
153 | policy = {}
154 | for y in range(self.ys):
155 | for x in range(self.xs):
156 | state = (y, x)
157 | qs = [q[state, action] for action in range(4)] # action_size
158 | max_action = np.argmax(qs)
159 | probs = {0:0.0, 1:0.0, 2:0.0, 3:0.0}
160 | probs[max_action] = 1
161 | policy[state] = probs
162 | self.render_v(None, policy)
--------------------------------------------------------------------------------
/common/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 |
4 |
5 | def argmax(xs):
6 | idxes = [i for i, x in enumerate(xs) if x == max(xs)]
7 | if len(idxes) == 1:
8 | return idxes[0]
9 | elif len(idxes) == 0:
10 | return np.random.choice(len(xs))
11 |
12 | selected = np.random.choice(idxes)
13 | return selected
14 |
15 |
16 | def greedy_probs(Q, state, epsilon=0, action_size=4):
17 | qs = [Q[(state, action)] for action in range(action_size)]
18 | max_action = argmax(qs) # OR np.argmax(qs)
19 | base_prob = epsilon / action_size
20 | action_probs = {action: base_prob for action in range(action_size)} #{0: ε/4, 1: ε/4, 2: ε/4, 3: ε/4}
21 | action_probs[max_action] += (1 - epsilon)
22 | return action_probs
23 |
24 |
25 | def plot_total_reward(reward_history):
26 | plt.xlabel('Episode')
27 | plt.ylabel('Total Reward')
28 | plt.plot(range(len(reward_history)), reward_history)
29 | plt.show()
30 |
31 |
32 |
--------------------------------------------------------------------------------
/cover.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WegraLee/deep-learning-from-scratch-4/b82cd6432b4e63ce6a4ab2b925fc74a1227fb06a/cover.jpeg
--------------------------------------------------------------------------------
/equations_and_figures_4.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WegraLee/deep-learning-from-scratch-4/b82cd6432b4e63ce6a4ab2b925fc74a1227fb06a/equations_and_figures_4.zip
--------------------------------------------------------------------------------
/notebooks/08_dqn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "64a9da69",
6 | "metadata": {},
7 | "source": [
8 | "## SETUP"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "1e89c815",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "!pip install gym[classic_control]\n",
19 | "!pip install dezero"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "id": "d354811c",
25 | "metadata": {},
26 | "source": [
27 | "## ch08/gym_play.py"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "id": "923b86c1",
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "name": "stdout",
38 | "output_type": "stream",
39 | "text": [
40 | "action: 0\n",
41 | "action: 0\n",
42 | "action: 0\n",
43 | "action: 1\n",
44 | "action: 1\n",
45 | "action: 0\n",
46 | "action: 0\n",
47 | "action: 0\n",
48 | "action: 0\n",
49 | "action: 0\n",
50 | "action: 0\n",
51 | "action: 1\n",
52 | "action: 0\n"
53 | ]
54 | }
55 | ],
56 | "source": [
57 | "import numpy as np\n",
58 | "import gym\n",
59 | "\n",
60 | "\n",
61 | "env = gym.make('CartPole-v0')\n",
62 | "state = env.reset()\n",
63 | "done = False\n",
64 | "\n",
65 | "while not done:\n",
66 | " #env.render()\n",
67 | " action = np.random.choice([0, 1])\n",
68 | " next_state, reward, done, info = env.step(action)\n",
69 | " print('action:', action)\n",
70 | "#env.close()"
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "id": "f3c0b72d",
76 | "metadata": {},
77 | "source": [
78 | "## ch08/replay_buffer.py"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 3,
84 | "id": "41011871",
85 | "metadata": {},
86 | "outputs": [
87 | {
88 | "name": "stdout",
89 | "output_type": "stream",
90 | "text": [
91 | "(32, 4)\n",
92 | "(32,)\n",
93 | "(32,)\n",
94 | "(32, 4)\n",
95 | "(32,)\n"
96 | ]
97 | }
98 | ],
99 | "source": [
100 | "from collections import deque\n",
101 | "import random\n",
102 | "import numpy as np\n",
103 | "import gym\n",
104 | "\n",
105 | "\n",
106 | "class ReplayBuffer:\n",
107 | " def __init__(self, buffer_size, batch_size):\n",
108 | " self.buffer = deque(maxlen=buffer_size)\n",
109 | " self.batch_size = batch_size\n",
110 | "\n",
111 | " def add(self, state, action, reward, next_state, done):\n",
112 | " data = (state, action, reward, next_state, done)\n",
113 | " self.buffer.append(data)\n",
114 | "\n",
115 | " def __len__(self):\n",
116 | " return len(self.buffer)\n",
117 | "\n",
118 | " def get_batch(self):\n",
119 | " data = random.sample(self.buffer, self.batch_size)\n",
120 | "\n",
121 | " state = np.stack([x[0] for x in data])\n",
122 | " action = np.array([x[1] for x in data])\n",
123 | " reward = np.array([x[2] for x in data])\n",
124 | " next_state = np.stack([x[3] for x in data])\n",
125 | " done = np.array([x[4] for x in data]).astype(np.int32)\n",
126 | " return state, action, reward, next_state, done\n",
127 | "\n",
128 | "\n",
129 | "env = gym.make('CartPole-v0')\n",
130 | "replay_buffer = ReplayBuffer(buffer_size=10000, batch_size=32)\n",
131 | "\n",
132 | "for episode in range(10):\n",
133 | " state = env.reset()\n",
134 | " done = False\n",
135 | "\n",
136 | " while not done:\n",
137 | " action = 0\n",
138 | " next_state, reward, done, info = env.step(action)\n",
139 | " replay_buffer.add(state, action, reward, next_state, done)\n",
140 | " state = next_state\n",
141 | "\n",
142 | "state, action, reward, next_state, done = replay_buffer.get_batch()\n",
143 | "print(state.shape) # (32, 4)\n",
144 | "print(action.shape) # (32,)\n",
145 | "print(reward.shape) # (32,)\n",
146 | "print(next_state.shape) # (32, 4)\n",
147 | "print(done.shape) # (32,)"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "id": "13c09484",
153 | "metadata": {},
154 | "source": [
155 | "## ch08/dqn.py"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 4,
161 | "id": "5e448679",
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "name": "stdout",
166 | "output_type": "stream",
167 | "text": [
168 | "episode :0, total reward : 14.0\n",
169 | "episode :10, total reward : 98.0\n",
170 | "episode :20, total reward : 38.0\n",
171 | "episode :30, total reward : 20.0\n",
172 | "episode :40, total reward : 12.0\n",
173 | "episode :50, total reward : 10.0\n",
174 | "episode :60, total reward : 23.0\n",
175 | "episode :70, total reward : 17.0\n",
176 | "episode :80, total reward : 9.0\n",
177 | "episode :90, total reward : 124.0\n",
178 | "episode :100, total reward : 122.0\n",
179 | "episode :110, total reward : 186.0\n",
180 | "episode :120, total reward : 156.0\n",
181 | "episode :130, total reward : 198.0\n",
182 | "episode :140, total reward : 146.0\n",
183 | "episode :150, total reward : 200.0\n",
184 | "episode :160, total reward : 200.0\n",
185 | "episode :170, total reward : 193.0\n",
186 | "episode :180, total reward : 200.0\n",
187 | "episode :190, total reward : 142.0\n",
188 | "episode :200, total reward : 200.0\n",
189 | "episode :210, total reward : 200.0\n",
190 | "episode :220, total reward : 179.0\n",
191 | "episode :230, total reward : 149.0\n",
192 | "episode :240, total reward : 200.0\n",
193 | "episode :250, total reward : 200.0\n",
194 | "episode :260, total reward : 200.0\n",
195 | "episode :270, total reward : 200.0\n",
196 | "episode :280, total reward : 200.0\n",
197 | "episode :290, total reward : 200.0\n"
198 | ]
199 | },
200 | {
201 | "data": {
202 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABeZ0lEQVR4nO2debwkVXn3f09Vb3edfWdWVocdRhhFRAQX3IhblASjRoMa9VUT30RjPhFfk1djXKIxIWLi6xo3jNFEVBBBJAFhRoZhhxnWGWbm3lnv3K27q+p5/6hzTp2qruru23v1nO/ncz/dXdVVdU7V7fOcZz3EzDAYDAaDAQCsbjfAYDAYDL2DEQoGg8FgUBihYDAYDAaFEQoGg8FgUBihYDAYDAZFptsNaIbFixfzunXrut0Mg8FgSBVbt27dz8xL4valWiisW7cOW7Zs6XYzDAaDIVUQ0ZNJ+4z5yGAwGAwKIxQMBoPBoDBCwWAwGAwKIxQMBoPBoDBCwWAwGAyKtgkFIlpNRDcT0QNEdD8RvU9sX0hENxLRo+J1gdhORPQFItpBRNuJ6Jx2tc1gMBgM8bRTU3AA/CkzbwSwGcC7iWgjgA8BuImZTwRwk/gMAJcBOFH8XQXgmja2zWAwGAwxtC1PgZn3ANgj3h8logcBrAJwOYAXiK99DcAtAP5cbP86+7W87yCi+US0QpzHcIxyaKqEn9y7B7933hpYFtV1zI+27cYLT1mKkUIWAPCz+/Zg07qFWDycx00P7sPGlaNYMW+g5nlu33kAt+/cj4tOXopz1y4I7bv/mSP4+X17a55j3eIhXHDCYnznzqfhep7avmQkj1edtQrfvONJFMsuRgeyeOsF61F2Pfy//34CMyUHADCQy+DNz12L/9q+B686cyUKWRs/2rYblzxrGYbzwc/3uq278NSBKQAAEeF15x6H1QsHAQC3PjKONQsH8cyRGSwdKeCEpcOxbf3tU4dwy0NjAIDz1i/C805cDADYOT6JvUdmsXxeAT/a9gwQKbe/fskQXn32cTg4VcJtO/bjguMX4Vu/eQoLBrO4cvNaEFV/bsyM72/dhcvPWol8xsYP796FF29cjqF8eHj62X17ce7aBfifnfuxc2yy5r1P4oITFmMwl8GND9R+fr3MSctH8IozVrb8vB1JXiOidQDOBvAbAMu0gX4vgGXi/SoAT2uH7RLbQkKBiK6Cr0lgzZo17Wu0oWtMFR386pFxXHjiYpz98RsBAGetno/TVs2reezeI7N433e24VOvPQO/++zVmCm5eNe3fosPX3YK/ujCDXjnN7fiXS84AX/yopNqnusTP30Q23cdwW+fOoxvvv380L5/umUnfrJ9D6qNd3LsfNcLjsc1t+xU35XbZ8ou/u7nD6vvP+f4Rdg/WcLf/uyh0HkOz5TwpV89hm1PH8Z7Lj4B7/vONnzm9WfiteceBwCYLbv44PfvAQAQ+eefmC3jo688FQDw/u9uw0tOXYZbH9mPzRsW4TO/e2Zsez934yP49aP7AQDHLdiN2/78hQCAf/zlDvzm8YO4+JQl+OYdT4X6zOxf81VnrsI3bn8Sn/vFI3j3xcfjH2/eCQB44bOWYdX86gL4pgfH8GfXbceOsUm8afNafOC79+DzbyRcftYq9Z3Zsot3fWsr/vylp+Dvfv4wXI+r3vskmIH/3nkAi4ZyuOGBfQ2do1d4xRkr0ykUiGgYwA8AvJ+ZJ/RZAzMzEc1plR9mvhbAtQCwadMms0JQH/J//vMBfHfL0/ijC9erbeOTxbqOnS27AICi47+WHA/M/qvjMcouw3G9iuPufuoQFg/n1ewaACZn/dl6Keb7xbKHZ60YxU/fd2FiW36wdRf+9Pv34O6nDmHJSB53feTS0PaJGf/8n379mfjg9+/B0VkHh6ZKAICbP/gCzJZdXPb5X8Nx/X/z7bsOoyzaorfJE1LmL152Cq56/vF42ed/jSf2+1qD43o4NF3CxIyDiZmyuj9xFMseNm9YiBecvBSf/OlDODRVwoKhHPZPlTBVcjBddHHcggElLADgH2/egb/7+cNwPA+PjB0FAGx98pDaP1V0Eq8nkX156sC06l/R8Sq+I5+j6zE+cOlJeN+lJ9Y8d5S3/L87cWiqBMdjnL5qHv7zvc+b8zn6nbZGHxFRFr5A+BYz/7vYvI+IVoj9KwCMie27AazWDj9ObDMcY+ydmAUAPDY+pbYdnCzVdawjTDRlMZDKAcfxWA00cTOJV//T/+AFn74ltG265A+grld5RNn1kLOrTzNXzCsAAO55+oh6DwAZcdyMGKDnD/hmrqmig8PTJbUta/s/z5Jo957Ds6otepvkW0tMuNYtHsSTB6YBAEdmykpzmCw5SljGUXI9ZG0LpwuN7L5njgDwTXizZRezjotC1g4dkxEmPddj7Ng3qforkfewGgM5/5zTZVcJOC9yz8viHkiBXqclsQKLCC4zXI8bPke/087oIwLwrwAeZObPart+DODN4v2bAfxI2/4HIgppM4Ajxp9wbJLL+P+W+zXt4MBUEbc9uh+1lo+VwkAKB/nqeoyiGISjp4gOtA/vPYpdh6YxJez6ToJQkIN2EsuFIJgpu1g+GggFeZwSCoO+UJgsOjg8UwYAjA5kkRf3Qc62D0yVgkFT64Rst9TC1y4awtOHpoWW4J9vbKII5soZuI7jecjZFk5dOQoAuG/3BADg0HQJs2UP0yUXAxGhYIuRdbbs4bH9k6pf0iAwXaqtKQyKc86UHEgFyI08JCncy6Kv9fqXolhE8Dz//jV6jn6nneajCwC8CcC9RLRNbPsLAJ8E8D0iehuAJwH8rth3PYCXAdgBYBrAW9vYNkMPk1dCoYSsmFX/3+t9O/vX/vA8XHRSbHFHAFDmBykcyo4UEpqmEBlwDmjCZ7Lo4CV/fyvWLBzEjNIUKgfSuQgFAGFNQQ6kJSkUcurah6fLGC1kYFukzj+pmWDUoKkJKtkfOcatWzSIssvYc2QWh4TmsefIDADfRJRE2WFkbQvzB3NYvXAA9+0ONAXA1zoK2XCfZV92jk+qew4AGxYPYef4lLqH1choQlL2q1JTkM9TagqNCgVfIHjMDZ+j32ln9NFtAJLu+iUx32cA725XewzpQZooxieLGC1kYVuEsaP+wF2sYhMHNE0hYj5yPVZmmOi8f99EIBS+9j9PAPAd1lJDcNxKTaHkMgZy1YXCYC6DeQNZHJkpY7kW7ZSkKUjzkRQSUmOa0mbbB6aKqj+SqPlo7aIhAMATB4JBeUL4R4ox/hFJ2fWUaeuU5aN4dOwoio6LKXGOw9NlHLcg7DSWA/pDeyZC2zcsGcbO8am6zEfyiUyXAvNRVDvTzYAAUEMeJ2JbBNfzzUe2EQqxmIxmQ88hNYWS42GkkMHCoZzaN5yvPo8pKyHghT47bqApRGehY0d9H8Zgzg/3BIBl8/Jqf5xPwanDpwAEGkI1n8KoCJ2dnPXNR1JISC1pshgMrNLPopuPvIimsH6xFArTSlOQVBOqJdc3HwHAwsEcjsyUcViYnwDg8HQp0afwmHBsn7J8BABw/BI/7LUeTUF2ZboYCIXoPQ80wCY1BYuEpgBYZvSLxdwWQ8+RzwQDz0ghi0XDgVAoxwzQOnJWX47M8j1m5WSNnkFqIfMHspgSA7CuPSQ5mmuZj4AEoSBGIzlgZm3CcD6DyaKLw9NlzBOOZ6kpTGvmo6cPTYs2BdeQA6n0KSwdySOXsbDr0LTyKUhK1XwKLqs+jQ5kMDHjhISKbz6K9ylMi/smBdKGJf5rPT4FeXtD5iOOFwryeTZuPiJ47E8MjPkoHiMUDD2HPiD4mkIwa48LJ9Upe+EolcDs4AXmo8gYPyYEwOhANhTKCvgz4XhHM9clFKTZaEXIfCSdsy4yFoGIMJS3MVks48hMWZmPslbY0ay3S79H8q0coIl8ITNddJU/QFLN0Vx2PWQz/jlGC1nMlF2MHw2Eo8fAQMSnIO/BrLhvMqT3eCkUapj79L7MlFwlIKKPWWkKXnPRRzb5Qt5jVvfLECbVK68Z+hM9Bn+kkMEizXxUriUUnIijWfMpFGMGVADYJ8xHHnOFI3Z0IBurKZSc+jSF45cMYShnY+loINikHX665CpT0lA+g6mi6/sUhKZgWYSsTSFHs94fSdR8BAADWRvTJbciBLWaUCi5ntJiRkUbZGirJElTmC27sC3CCUuGkctYOH7JMIjmZj4quV5sdBUAlJyw5tfogC7NRy43rm30O0YoGHqOsjZwDeezEaFQw3wkzUaxPoX4AUpqCiXHqxg0RwqZ2ISvsushl6k9qLzpOWtx2ekrQoOptMPPlF2lDYzkM5iYlZpCVn03a1vK0QsEUTi6UIiGpAJ+7P9s2a3oj7wH0yUHtkUhU53fp8B8BABPHawuFDJaSKptEV5zzio85/hFmD+Yw6AQTLXgmPDaJJ+CfK61Smck4YeksjAfNXSKvseYjww9R1RTWKj5FJyY8NCJ2cBuHrU9y1c9+ig6Cx0XmsJs2UPJ9TBaCOZKo4V4TaGszaqrkc/YFWUelMmlHNYU9h6ZhcdQPgX5Xf360nwSZz7SZ76+phAkw0nkPdj4Vz/Hiz57a2if71MIzEcAVGa0pJqmkLUIGdtSJqSBXKYuoaDfXk8J9SRHc3Oagi2S14z5KBkjFAw9h+4MHS1ksFKzx8uZsuSff7UTZ1x9A+547IC/X5mNIiGprOcphK8n7eZHhXBZPBKYekYKmaZ8CnFIQeCbj/xzDOUz2H3YzyWQPgUgcDbr1wXqMB/lbMyUXRycLmGp1p+i46mZua4FeB7D8YI+yWKClZpCkk/BU32RDOZsVdivGuFIqqA9OoGwbzKj2fIdzX7tJCMU4jBCwdBzlEOaQhYXnbQE1/y+v7yGmil7jMfGJ/HJn/pJbXJgdyJmhsDsEPgLOBJ/JM0z8nXxcDCIjhayCXkKgVN2rkiT0YyYXQO++UjOqhdo5qNcZKB1NCEnieYpAL6mMFPyo5n0ek6y7VHkfdWjjwDfp0ARX4WOnG0Xy67SMiSDuTrNR9p72a/KjOawBtFU8prHYIbJU0jACAVDz6HbwUcKGVgW4fwNiwD4/oYHnpnAhr+4HtfcslN9Tw4huhDQP7suq8St6MQ/6jNYrJmrRgcysSYrR4vpnytSU2BGSFOQ6D6FJE3Bi9EU9DFODshHZ8tYGTFfxYWlyvNGzUczZRdrNKGS7FNwK8wxA3UKBV1TKCXkkpScsJBvVCjYlqh9ZMxHiRihYOg5Sk5YUwCCgdTxGA/t9bNnv791l/qeGymE50Rs0E5C7SPWzEqSRUNhTSHqU/BDGtGw+Ug/TvcpSGTil//d8MAVRB+F+wCE7ewDWRuHZ8oouxzKkQCAo7OVJh15vwJNIRBMJ2jtiZqP9NpHUR+LL5hqm490R7M04SU6mpuNPtIczUZRiMcIBUPPUY44moHAjFJ2GYO5YLYqZ9LyEFUIryIk1YutfSS3jWiDskyWsy3CQM5OdHo2LhSC0UhFH4l+LhvNx/oU5Gs0YxtIMB/lbFXTadloWCgcjOQufPjft+MvfnhvqE9DOVvZ7fVFeaLmo4yWpxAVYAPZ+hzNupCWAitqPgryFCq1ormgkteMppCIEQqGnkO3eQ+LwVKaKcqup7KOAeBZoqyCp8xF4YxmVXLZi3c0S9ORPjNeJHwKgzkbGcsCc9icUVJCobFBJROnKQhBJzOCJXKQloOxiqaKCeOM5inIJi8YzIZm+FGhcN/uCVXuWgpfIlL3ZP3iITUI5xPMRzOa01wyKJzdtfBCQiFeU5Dao9RoGo4+svxzu2xqHyVhhIKh59DNRzI8Uw4CjuuFsmRPFkIh6kOImo881gviBQPOrHA+j2hhqItFXsRgzg6ZrSRS0ETt/fWS0QY0OZBKX3ZUKMhBWg7qpRjzUbTMhWy7ZDifwWghq7QhWVRPMl1yVFhvRhN08p4ct2AQBZHPUMgkOJodL9Qv2Ya5+hRk4b5K81E4xLi5MhcMz2s816HfMULB0HOUXcalz1qKv33t6dggBkkiQs62UHI5VAvo5OV+7X85c47mJ5Q1c5JM3PJqaAoLhnIgAoZyGTXohXIFlFO2eZ+CjD6SAuvUlfNC35WCRzp4HS36ShKXp1DQhUIhg797/Zl47yUnAAAOToXrIc2UXJU1rbdNOpuPWzCgFsIZyIWFQpx/RDKQsysymh/fP4WLP32L8gvp7QcCTSGp9lFQ5qLZgnjccKXVfsfcFkPPUXI8zB/M4Q3PXhOazWVs8jUFMdD85cufhctOWw4AcCNVNOXgoWcAx5qPhKDQE8YKWRvD+QwGcraa/eoRSM36FGyLlDlGDqRvee46fPSVG/HGZ68OfVdpCplk81FcnsKgZuYZyfthvRtX+ALnYERTmCm76p7ECYUV8wsoZMIai94XSZKjWffh3L7zAB7fP4U//d49apu+Xy5RWsvR3Hjto6B0tilzEY8RCoaeo+h4saaZrG3B8RjTJQcDWRtvv3ADhnK+iUOaX6JmBj1EtRTjaJbmIzkAAn7p7pF8JlFTaNanAAQOZuUzyNl46wXrK+zy2QTzUTgk1X+1QuajwBwm/TJ5cY4DkaVNdROPXrpjdCCDpSN55DO20jwqHM3a6FyZp5CBx+EQ48mirwnc/8wEnhaJcXGaWzSVohQxCza3yA5E6WwjFOIwtY8MPUc5IQcgaxNKroeyG9jMbTEQeVGfQjRPIWGN5sB8FPwU8hkLw4WophAc5TRpPgJ8DaHk1j6HFI75jA0irT9xmoKVYD7KhyO4dEez54VDcvXZ/lXP36BKiCufQkLyWvRYIBAgM6VgbecxrST5M4dnsHrhYDhPQeWSRDQFufKaWmSncfMR4EdvGUdzPG0TCkT0FQCvADDGzKeJbd8FcLL4ynwAh5n5LCJaB+BBAA+LfXcw8zvb1TZDb1NK0BQylgXH9fyw1LwQChQetJ1I6Wy9LESQp6BrCuGFbgA/wuYDl56EkUJWlXn46n8/gfmDWbz9wg1Nm4/8vlDoNQl5jWyGYBOFajlJostxAhHzUURT0IVCNDpI79O5axeq91JTiTqaq/kUpOCeLrtYILaNRUpxA2EhHRXqFdtdWRAPDSH/X8quKYiXRDs1ha8C+CKAr8sNzPwG+Z6IPgPgiPb9ncx8VhvbY0gJpSRNIUMou775aDDr/+vKyamcWerJav7nQFMIZqHBOZX5aCBsPrrs9BUAgO/d9TQA4Ef37MbCoTzefuGG1piP7LD5KAmVp2BbsCxS/fFCIan+azRPAfCFTl7TNoBAKOQyVoVQSKr8Ks9XyFXzKYSPlQl5+noQ+yZm1XvZB11Ix2VsA3pIams0hbLrGfNRAm3zKTDzrQAOxu0j33v4uwC+3a7rG9KJdALGDZZZy0JZOJqlpiBNFm5ECERfHc/Tah8FFBMczRI5+MyUPKVpqJDUJs1H+msScsnPrG3BJlKDY1xBvFCNIjGIDxcyylkvBcx+kdTGzBXRQUlCqiDMV9E+x4XXSqSGomdQjx8tYrlIpotbOyGuf0ALo480zdKYj+LplqP5QgD7mPlRbdt6IrqbiH5FRBcmHUhEVxHRFiLaMj4+3v6WGjqK/PEnOppd9oWCGPTkmKTMRxFHcynkU5AhqXHmo7BPQaLWUy45Wv0d4VNoME8BCIRZrfLbekazra0CF5enEC2IB4TXtJb9krkAZXEv49oVpZC1hWAID6S6UItqTlIo6IsEjR0tYvk8KRT8bbr7IM5nAuiO5ubyFKTccj02mkIC3RIKVyCsJewBsIaZzwbwJwD+jYhG4w5k5muZeRMzb1qyZEkHmmroJMUqiWEZm0RGs6Oia4gItkUxjubIugqa+QhVzEcWhWe/ag1ibcGaVvgUskoDqM+nkLOtkKO51noKUmiORKKqoshoIEmS+aiQtStyFICwEIkKlOG8f22ZezBVdDBZdAJNQa3HHBwTF10FVPoaGi6drd0jE5IaT8ejj4goA+A1AM6V25i5CKAo3m8lop0ATgKwpdPtM3QXpSnEDJYZ20LZY8yU3VDGrk1U4UOIXY4zpnR21NGcj8yG5UDHHJiaWuFTkKaWmuajiKZQ93oKQlPQazrFCdojM2GhkCToLjttOVbNL1Rsr+ZTUJqC0EykkznQFCrNR0maQnTFvWYK4gXnaOgUfU83QlIvBfAQM6sSl0S0BMBBZnaJaAOAEwE81oW2GbpMqYqmkBPJa1NFNxSHb1nBwBKYkcKz+tAazZrpJdAUwhE6En2gk0IlEFytiD6qfg7dIa3bwOMWptHNIbpPQRLXXpksFr1elEs3LsOlG5cl9gOoFHDDEZ+CdDKviJiPdM2tlk9B0vhynPp7oynE0TZZSUTfBnA7gJOJaBcRvU3seiMqHczPB7CdiLYBuA7AO5k51klt6G+qCYWMcDTPlJyQppCxgiUrS07YzFDSQ1LFTD+kKYjqnnJmHTWx2NpAV4xEwDRnPpKDffWBKa9pCvqgH9IUYhaeifMpEFFF/47O1qcpJGFZpAbaqKN5WAjuo8WwUJCaguvFaQqVmhBQuQZE4wXxjPmoFm3TFJj5ioTtb4nZ9gMAP2hXWwzpoZq9PmMTpsuM6bKrqooC/uxPDiJO9FWzRQfmiuCcs2UXhYwdShILXVMbREquB0/zTdQy/VQjq6KPGtMUapmPMraFnG2FNAX9u34dKU85naPtmgsZyz9XNjJQWxZhOJ9RgmfvEV8oHLfAX/SHY55HuU5NoZnlOINzGKEQh7GqGXqKYpVwz5xt4ehsGcz+ovAS26JAKGjmImaOz2iOCIV81lbCoFptHwAio7oVIalisK8xukV9CpJY81FkkHvpacvxHLFinUTOxF9xpp+HMVGnT6Easl1xAm6kkFE+hT1HZjGSzyjnt1qPuZ6M5ohPodmQVL/dDZ2i7zG3xdBTKCduQvSRtIEP5fVcAks5Jktu2BQRrO3raeUcwrWPCllLzZArNYVwO4qOp2azrYg+qldTyNkEvSm18hQA4AtXnI1Xnrky9rzPPX4xAKiS2dHrzQWVcxEj4IbzGRWSuvfILJbPK6iBWT4zfbjXQ4h1KjWFBs1HevSRCUmNxdQ+MvQUcsDNx9Y+stTMdiCUYOavwQwEmgLgCwJ5vpLjaTPT4JyzZb8uDxEhl7EqfQqRgaPouK3NU5hL9JFuPtL6wDF5CklcdtpyLB7OK42o0tHciPkoORFvpJBRjuY9E1IohNvNMeG17fIpGPNRbYxQMPQUcqaYlLwm9+trGmc0TSG0GI7LwfoDMYMoIIWC8CfYVtXoI8CPQGpNmQuRp1AreU3LaNYHsVpVUpO45ko/Evxn9+0FENYUMhY1FNVjV0nEGy5kVdjr3iMzOHnZEtVO5eOJCAAgOXlN0niegtZuIxRiMeYjQ09RqmKaycSEXAJ+SGo0+giAKp4XJVr7SBZ5y2etCvNRpaYQ+BRqDejVaERTSIw+YlkPqP7ryyS1iZmyGigbNYfJ5xInJH1NoYyy64ls5gF1T2VocOUTqiydXWE+akX0kTEfxWKEgqGnqFrmQts2pDuaSXM0a0kIrraGgo4+CM06QVnnnF1pPooO2r75yF96splBJdNk9FG4IJ70KdTfHimUJmYdLBjMiWs01p9qfRnJ+47msaNFMPs5CrKZbkz0kaQyo7n1jmYjE+IxQsHQU1Qrc6FH6ugF7GyL1ABTdlkdW/a4YoYJVC6yI81HG5YMY8OS8BrJFeYjx4PjxhfsmwvZeqOPbBkqm6wpxJW5qIUcyCdmypg36N/LZtecTnI0H511sPfIDACEHM1Rn4Le/IqM5qhPoSXRR0YqxGF8CoaeolQlJFUfiCuEglbWYiBro+R4cFyvoi4/EA5JLYqQVAD45tvPr/iuHY0+Ej6FZvwJgO6craEpZDRNoUb00VzGOHl/J2bLWDl/APmMVTO7Ogm7ilAYKWQxU3axR+QoLB3Jaz4F/ztqKVDLSqx9FPUpNLyegtbFRrOi+x2jKRh6CmkmiC+IFy8ULAo0BcdlFZlUdhllx6sYQKK1j6ILx4SuGRt9FL8I0FxQeQo1hMvy0QIyFmHV/IFI9FFjjubo9cvifhWyNrIJxfBqIYV1nICTyXMycW20kFWhtdGMZt1Ul7TIjqTRWb4uCIyjOR6jKRh6ipIoRZG0HKfcpyeZZezAp1B2PeWEdjx/Vl/I2KHFZPTaRzNlFwO55AG+InnN8VB2uOFZdbQvtc6zcv4A7r36JRjI2TUdzXMZ43RhNJCzRa5Gc5pCkqMZCITCYM5WIjma0exfX67RHPaZRBW+VuQpmOS1eMxtMfQU1ZLX5KClLxwDBI5mZobjBZqC43JISEh0TWGq6IbCW6NUOpr96KNGZ9XqvHVGHwFBpJWdEJI6lzwFiS4AhgsZFLJ2wxna1Yr7ySqt+0SF1KF8psJ8JIWaLlTiKqfqNCqTdSFvzEfxGE3B0FMo81FC7SMgnLgG+D90TwgEIBhEiyJhTf9+zrbA7K8+ls/4NuzhXBWhEJPR7PsUWqMpzMU3YdUwH83FpKKbxUbyGRQydtOaQnzymm/m23dkVi0NKqvNRhPU9Hut74v6E4DGNQX9MGM+iscIBUPHKDkeZkquinaJo+gkJ4ZJQTGYqxQKUisAAiEgl5rUE9IKWV8obPrrX+DkZSMAUFE0LnrucPv8xXaaqXsEBAPoXAbicJkL/X0j5iNNU8hnUMhaDYfYqrUhYpPX/Hu7Z2IGgzk/c5zE16LJa5kETSEurLjRAV1/nib6KB5jPjJ0jC//+jG88ou3Vf1OSQy4caq9nN0ORsw90tEstQypKRya9heo13MaCllbmY8e3nfU31/NfBST0XxgsohFw7mq/ahFvctx6iQVxGuF+WjxcF7lK8yVWmUuAGDfRFGV8Q5CUv3vhH0KPrqmIIW9/ixa4VMwikI8RlMwdIz9k0W1aHwS1SJ75Ix0MGI+ytiEYtmr0BTufNxfkuOs1fNx7+4jAHyhEHVaDlcRCnEZzfsnSzh7zfyq/ahFToWaNmg+arDMhUS/7nA+g7993Rl1HxulqqNZ3NuS4ynhayufgiyIJzQF7V6HypU4/vt8xoIjtL9GfQohX5TRFGIxmoKhYzBX2pGjlJxkoSBNNnqFVMAfDB2P1eI3UijctmM/ClkL56ydr77rm4/CbagmFOJCUvdPFrF4OF+1H7WoN09BJ6QpNJmnoF93RGgKjfZJzvCjOR3+uQNTodTwkjKa9TZ5MT6FQlZfQ6MF5iOjKsRihIKhY3jM4OoyASUnOTFM/qAHI47hjHA0K01BmI8e3z+Fc9YswEA2aj4KU818FJ1NHp4uY7rkNi8UlB2+/oGpVp7CXKJp9HusD9yNoDSFmL4UssE6EMNCmEfNR8wMonCIqBsTfVQIVcZtVCgE7030UTxtEwpE9BUiGiOi+7RtVxPRbiLaJv5epu37MBHtIKKHiegl7WqXoXt4zBXlC6JUMx/NihyGJEezrMa5cCiwjW/esCg08BYyczMfEVFoANp92C/XsLhJn0K2IUdzUpmLuWsKUUdzM1TTeohInV/6dmQ7pTbA7AsKXdPQc0mkUNADBhodz435qDbt1BS+CuClMds/x8xnib/rAYCINsJfu/lUccw/EVFymqkhlXgR89GPtu3GxZ++JWQqKLrJkT0ymiiad2CRrylIoaDP4s9fvzC0znI+ayGqrlSLPgLCg8fuQ0IojDRrPqo/T0G1I6EgXlAltbGQ1Fr9r9muKo5mIHA2K5+C+H5gPmJYBOiHx2kKegXbhqOPTEG8mrRNKDDzrQAO1vn1ywF8h5mLzPw4gB0AzmtX2wzdIVoA7fH9U3h8/1RoAPDNR/H/llPFeE1BZjQfnpZCwZ/F5zIWzlw9Xw0EGYuEqSl83mp5CvI4ABjK2UpTWNKk+UhmUUdzLqqRtPKaDE+di51dnzGPNKkpBMX94p+b0hSE+YiUoxnqlYgq8hTk/0lJOJr1LPZW+BRM6ex4uuFTeA8RbRfmpQVi2yoAT2vf2SW2VUBEVxHRFiLaMj4+3u62GlqINAkENW/8zzc9OIbLPv9rlMX6x9Hy1RI5o105fyC03RIZzYdn/BBUOYs/Z818FLK2GtTzGT/UNbr+b9RxHUXZxAuZWG2kEV566gr885XnVPSlGqFFdlgvEzH3PAWdVmkKSZrKqPBZ6KHBFoUnCYTKiKKjRQfrPvQTfH+LPzTo/xeNDugmea02nRYK1wA4HsBZAPYA+MxcT8DM1zLzJmbetGTJkhY3z9BOvEi0iTQbPbR3Ag/umcB0ya0affTm56zFp153Bq549prQdlk6W2oKy0YLAIDz1y9S+wEgn7VBCM+yC1mrZgRQRjlKg0Gt2TyFgZyNl562Yk7HRAfdoMro3PMUdFrlU0gKEBiOmI8A8cykTwF+26M5G9JU9+937wYQOJqbmeCHNAUjFGLpaJ4CM++T74noywD+S3zcDWC19tXjxDZDHxGtdaNXNgWCRXGq5Sn87qbVFdvlAHNkpoxC1sKq+QP41GvPwItPXSaO83/8MilOFwr1DIi2ZYEoGNTmD2abLnPRCNGZreuxKPHhf250kBuqYT6rRa0Fg6I+BQBCY/Pfe57vU4jO/qP9kZpCMw7i0CI7JvYylo7eFiLSp0avBiAjk34M4I1ElCei9QBOBHBnJ9tmaD8yScmLmD1kopIrFsWZa1lqWRDv0FQJ8wf8GfzvPns15osMXRnVks/6g7vuw6gWjirJWISsbakZcbOmo0aJDprR+9jwusVN2taD7OwETUFFH+l5BuEqqb5PIXx8VMZJTaGZUNLQIjtGU4ilbZoCEX0bwAsALCaiXQA+CuAFRHQWfI3xCQDvAABmvp+IvgfgAQAOgHczsxtzWkOKkWOx8ikoYRAUSGukrpByNM+UMT+mrpLuU7AonBhVj6aQsQl521ILxbxp89o5ta9VRAexkuvhj7/1WxWd0624+yCjOUlTED4F3XxEuvnIz1OIagbR6qhKU2iin8bRXJu2CQVmviJm879W+f7fAPibdrXH0H2CAmjys/8qaxbJ9Q/iymZXQzqaj0yXQ4vv6PsBPxqJQI1pChkLn3jN6dg/WcLrzj1uTu1rFdFB7PBUGb98aAyDOburMfdS6Ca1QZqPdAFsaeYjmacQ1RSihfBa4VPQjzU+hXhM7SNDx6jwKXiBMJCfy66H/Bw1BeVonilh/eKhiv3S5p3P2CAKJ0bV51Mg5GwLLzh56Zza1Wqig2FZdKTkeF2Nuc/UKAMuhYIeSkwUNn/5Gc1RTSEcJaaEQqt8CkYmxGJcLYaOEXUwc4VwqO5oTkI6mg9Pl2Mrfdoh8xEpIQTUaT6yrKYX1WkF0UHTcYP71s2SDbZKxKuVp6BpChZpIan+YB0d7Cs0BfF/0cwM35TOro3RFAwdIxpXnxR9NNfIHmmfPjxTjl2rQZolchkLoPBaBPWYj6Sm0G2q2dwbGd8+8/ozm85RAPSV1+IbceGJS/Dm56zFSWL9CkA8s0hGc/T4Cp+C0BRaFn1kzEexGKFg6BjKlxDxKShNwfXXRGhEU5gWJTBk9FF0PyCS1xAIpcXDObzqzJU1z5+xCR53XyhUrBcdEgpzH+Be2yLfyKXPWoapkpOYdLhkJI+PXX5aaFsoJFVEH0X7EF1xLa80hcbbqoehGqEQjxEKho4R1RBkFFDZa1JT0EaJ+OgjEZIqaudIIfSei0/Ac45fVNf5kwa8TpJkPgK6O8BtXDmKjStH53RMXEZzNU3BoiC6qSnzkSmIV5Pu/6cbjhmimcyBhuD/+Msi+qgRTUES5yOQs8NcxhKOZhHXX+egIPMUuk2rzUfdJJTRXIdPIWsHy4U2IxRCeQrdf6Q9idEUDB0j6lOQQkImr82WxZrKTQiFuAJzgaZgwfVYXa9e5+zLT59bOYp2ER3EQkIhZVLBCpmP4n0KulDI2Zaa5TflU9CONespxGOEgqFjSGNHUmiqLI09lyUqgbBJIFpWG9B9CjZmSq4yX9U7trzlgvVzak+7iCZt9Yr5qBHCIamV61YAYaGXFYmH8thGMSuv1cYoUIaOERUC0TIXM0JTmGukjz77K8RqCtHoo+YKyHWL6MxWD61NmaLg12yKZDRH13ouaUIvawfmpeaij/T3KbtpHcIIBUPHiFb1jPoUZARRLjO39ZVCK6tlK/+lo3kKqtT0nK7SfSqjj4JBM22mkLiMZrVYjzD3hTQF21KDeKt8CqYgXjzmthg6Blckr/nbpRmkYfNRDZ9Czrbw3OMX4azV80HaddM2U4zOpJ0UO5qrZTTL/pWjPgUxWjVX5sJEH9XCCAVDx4iaj6JlLpT5qIHaR5I4n4JlEf7tjzbj4lOWhr6bMpmg2i5n0mn2Kegam9IUVI0q/xnqeQq5TGs0BbOeQm0SHc1E9CfVDmTmz7a+OYZ+RprAVbXUiJCQ5qO5Rh/pawPXWt5SHwfSNijImXLWJsyUm09e6yY2kZbEmOBTiIaktiL6KMXPv1NUiz6SOeknA3g2/DUPAOCVMGsdGBogqilIc1JZmY8cAMklmJPQf9xxjmadkFBImZ6sV3sFIuajlPVFNx9FfQry+ZdCPoUg47kZ/wkRgci/pjEfxZMoFJj5YwBARLcCOIeZj4rPVwP4SUdaZ+grpIaQZEZq1HykO5praRn6gEIpczVHHbEyagtI36w3mqdAiPEpRBzNcq7QbNKZTQRH5EYYKqnn9i4DUNI+l8Q2g2FORFcKU+speJHooyZCUmvNIvW9KRtHg0EzI0M2U2w+smJ8ChFNoewEQs/PRm/epwAE/y9pS/jrFPUkr30dwJ1E9EPx+XcAfLXWQUT0FQCvADDGzKeJbX8H3/xUArATwFuZ+TARrQPwIICHxeF3MPM76++GIQ3UnbzWwHKc9ZLmKplyUJSDphMKSe1KkxrGiok+ykSFQlRTaJVQEIeb5LV4qv76yP8v/DqAtwI4JP7eysyfqOPcXwXw0si2GwGcxsxnAHgEwIe1fTuZ+SzxZwRCH5KYvOZGzEcNLMdZL6l2NEufgh3jU0hZX6pVSZXmo2LUp9CCkFQALRMu/UpVTYGZmYiuZ+bTAfx2Lidm5luFBqBvu0H7eAeA183lnIZ0k1gQL2I+mmv00Vx+3Po302Y9CKKPxEw65FPoRosaR89oBkTtIzvsSG9H9BGgm4+aOk3fUs9t+S0RPbsN1/5DAD/VPq8noruJ6FdEdGHSQUR0FRFtIaIt4+PjbWiWodVMlxyMHZ3VCuIh9FpZ+6gxR3PSIi86IUdzymaK0Zm0ntyVtllv2HwkqqRGzGO6+SinCYVmn1sr8h36mXp8CucD+H0iehLAFCCSQn0TUEMQ0UcAOAC+JTbtAbCGmQ8Q0bkA/oOITmXmieixzHwtgGsBYNOmTRzdb+g9vnDTDtz4wF6VWFaxnkLUfDRXTSFii66GPg6kbUxQ0Ud2+qOPSEteq8xTSPApyNpHTfa1VefpV+oRCi9p5QWJ6C3wHdCXsJg6MnMRQFG830pEOwGcBGBLK69t6A6Hp0s4PF1Wi9xULscpMppLjQmFIBO29nFpdjTLwSwXM2imzRRiUXgFPr1KaqAJRaOPxLFN9tVS50nX8+8UNYUCMz8JAES0FEChmYsR0UsB/BmAi5h5Wtu+BMBBZnaJaAOAEwE81sy1DL2D6zFc5mBm6IXNSI7KaG4sec2256ApaO/TNiZUmI9S7Gi2LVKTARY5A3bEfFRqW/QRpe7Zd5KavyIiehURPQrgcQC/AvAEwr6ApOO+DeB2ACcT0S4iehuAL8LPlL6RiLYR0T+Lrz8fwHYi2gbgOgDvZOaDDfTH0IN47AsCjvgSog7nmUYX2VFROfX4FIL3aRxIAc18lPoqqeE8BeVotmMczRlqycprgH8fTTZzMvWYjz4OYDOAXzDz2UR0MYArax3EzFfEbP7XhO/+AMAP6miLIYV4zL5gqLGewmzZHwQaXaN5ruajlI2janab64Poo3BIqp/RHHU0Fx1XfT/XyugjzaltqKSeX1+ZmQ8AsIjIYuabAWxqc7sMfYQnTEfRjGZXG9Qkjcziog7KqqRYU+in6CNbiz5SmkJEuEfNR8oX0GRXLSt996uT1KMpHCaiYQC3AvgWEY3Bj0IyGOrC9Riux1pGc3g9BZ25Jq4BcxMKadYUqkXnpC2SRjcfyegjaR6S5sNiOT76qGnzERnzUTXq+QVeDmAawAcA/Ax+eYpXtrNRhv6COfgDtEJ4MVJhrpFHgGZWqePYsKM5XQODFfEp6OajlHXFNx9ppdT1Mhf5rDQfBUJhMGe3LL/AEpVSDfHUoym8EcCtzPwogK+1uT2GPiQafRStlqozV38CEDhc69EyUp2nEHGop7nMhW1pGiMYFlmapuCHLs+KwIOv/eF5OHvNfOw6OCOObT6j2WgKydQjFNYA+BIRrYefN3ArgF8z87Z2NszQP7gRn4JyNMf4FOYaeQQEtudjJU8hznyUvjwFqshozkTNR44H2yJcdNIS/xjLFwrNPjZbW+XNUEnNfyVm/igzvxDARgC/BvC/AWxtd8N6Bc9jnPpXP8N37nyq201JLcx+OKo0F6gV12I1hbn/WOXgWM+xqTYfUcR8lPrlOP33KqM5JglRH7zl+2Zn+UTpC+HtJPXkKfwlEf0UwA0ATgDwQQDHtbthvcJkycFUycVf/+TBbjcltUQL36kaSF7ldxvxKchFZ5aO1JFbmWJHcxCSKsxH2g1M2yBHFE5i1DOa9Qg0XQC0aj0F//xNnaKvqcd89Br4dYp+Aj957XZRluKY4Oisn2U7GLMgvKE+VOayK81HcntrHM0XnrgYV79yI16/aXXN74bX6J3zpbpKtUVo0tgX9fxlRrMWXZSxCK7HIaHQsugjy5iPqlGP+egcAJfCX5f5RQDuJaLb2t2wXmFipgwAGMrXIz8NcUST1KL5CjqNOJqJCG+5YH1dz0hfgjNts+vK6KO0h6T676VPQdcO5P9BnBBvVgD66zSn6351kpq/IiI6DcCFAC6Cn7T2NHzfwjGBFApGU2icpExmN8581Ga9PjzIpGtgCGoDyeij9Ja5IAr/PxDC1UtlyQtdU2hVRrNNzZ+jn6ln+vtJ+ELgCwDuYuZye5vUW0wI89FQzmgKjRL1KUhbMrfIfDQXKGbmmRai5TzCIaldaVLD2ETq+XPEp+DnLPh9DAkFtb8VZS6aOkVfU0+V1FcQ0QD89Q6OKYEAaJpC3mgKjVLhU5BJbHFCoc2agj6gpE1TUNFHliwD0T/RR7pPwTcfVWoFQfRRk9e2yJTNrkI90UevBLANfjYziOgsIvpxm9vVM0zMCp+C0RQaxvMiPoVIdVSdTmoKaSMYNP1+6NFHqctTsMLlTvSMZtvSzEchIS5fW1DmIs3/CG2mnn+lqwGcB+AwAIiktfVta1GPYaKPmifqUK5a+6jdQgHp1RTkDFlG56TZpxAqnQ2GRYQ1C4dwxXmrcf76RcgKKWfFmI+aLnNhCuJVpZ7pb5mZj0T+6Y6ZZTCl+cjQOC5HPydrCo1EH82FkKM5ZbNr0hytFlGoimjaBrm46KNcxsInXuOv8lvN0dySRXaM+SiRen4W9xPR7wGwiehEIvoHAP/T5nb1DNJ8FDeAGeoj6lCuVvuos47mdA0Mxy0YwCvOWIFNaxeGVi4D/IiaNGFRODQ5+ijk5CA+o7m5a5vkterUoym8F8BH4K+h/G34voWPt7NRvcTEjG8+coxQaJioQI2Gpuq03dGMSht1WshnbHzx984B4A+Q+m1Nm4AjIvV/IKOPdGQuRiijWTOfNcNLTl2OI8YCkEg9yWvTzPwRZn42M28C8A34y2rWhIi+QkRjRHSftm0hEd1IRI+K1wViOxHRF4hoBxFtJ6JzGu1UK1GaQpwB3FAXUXmqF0KL0klNIW12eJ2o+SNtfbEtQpDQzBUCOhtT5kKt0dykNL/ivDV450XHN3WOfibxF0hEZxDRDUR0HxH9NRGtIKIfALgJwAN1nv+rAF4a2fYhADcx84niXB8S2y8DcKL4uwrANfV3o30ooRA1jBvqJloNtWr0UQdDUtM1jIaJJl+lTesJm48qZ//SpxBX1TZtfU0b1X6BXwbwbwBeC2A//LDUnQBOYObP1XNyZr4VwMHI5ssRrMvwNQC/o23/OvvcAWA+Ea2o5zrtREYfGfNR40TNRC5zbOIa0H5NIc0ZzTrRtqetL5ZmPpIZzTrZGPORDAww4aTtpdovMM/MX2Xmh5n57wFMMfOfMfNsk9dcxsx7xPu9AJaJ96vgl9CQ7BLbQhDRVUS0hYi2jI+PN9mU2sjoozj7t6E+oqY3j5Md9+2OPtKHk7QNpDrR25S2SCorZD6K8SlYlaYi+bzSZipLG9UczQUiOhvB76iof2bm3zZ7cWZmIprTaMvM1wK4FgA2bdrU1pGamVWZC6MpNE5UnnpiJbY42u9T0AvitfVSbSU6W07bQKmbj+J8CsrRrG3PWIRNaxdg48rRTjXzmKSaUNgD4LPa573aZwbwwgavuY+IVjDzHmEeGhPbdwPQax8fJ7Z1jaLjBesJxxX/N9RFVCvwxKI7cXTUfJRi43S07WkzqVhEamLgcaWAlmUuMpoKRES47l3P7Vgbj1UShQIzX9yma/4YwJvhF9p7M4AfadvfQ0TfAXA+gCOamakr6IOZYxzNDVMZkppsPsq1O+DeOJp7Aot88xEzq4xmnYzKaO5G645t2nrLiejbAG4HcDIR7SKit8EXBi8iokfhr9PwSfH16wE8BmAHfCf3H7ezbdW447EDmC45IZOR8Sk0TtSprK/XHMU4musj/eYjv73MwcprOnEZzYbO0NYqb8x8RcKuS2K+ywDe3c721MOhqRKu+PId+JvfOR2XnbZcbTc+hcapdDRz7FKcAJCz21tjKs3JazpR81HaBJxsvici0SrzFOQiO+nqVz9glLMIR2cdMPv5CbogMGUuGid661wvWVPIttl81C/Ja1FNIW0CTgo1lznWp2A0he6RqCnUyihuRfRRLzJd9qONZstuSBAYodA4FclrnJwh3lnzUVsv1VYqNIWUdUY3H/maQrj9MjQ5k7J+9QPVzEefqbKvmeijnmam5AIAZsteaOAyQqFxKkpne1whKCSmdHZ9RPMU0tYV3XwUm9HcojLZhrnTjeijniYQCm6otIXxKTROXEhq0u1sf5mL+PdpI2o+SmNIKpBsSsxmKjOaDZ2hLkczEZ0GYCOAgtzGzF9vV6O6yUzZFwpFxw2tbGU0hcaJ/uZd7pXktfQOOKl3NIv2ewwgRlPIxmQ0GzpDTaFARB8F8AL4QuF6+IXrbgPQl0JhWjMfyRlM1iYjFJqgIvqoivmos2Uu2nqptpJ6R7NoL4vw5OSM5pR1rA+o5xf4OvghpHuZ+a0AzgQwr62t6iJSU5gtu8pklLMtIxSaoHI5zspt0obcdkezdvq0za510l46O2w+So4+Mo7mzlPPL3CGmT0ADhGNwi9LsbrGMakl5FOQQiFjhUxJhrkRvXVujE9BagidXWQnvQNOpaaQrr7o5qO4jOa4NZoNnaEen8IWIpoPP8t4K4BJ+FnKfUmgKQR1j/IZ2ziamyCqFTBzheaVy1iYKbsdXmSnrZdqKxm7n8xHVTKa0/yQUkpNocDMstzEPxPRzwCMMvP29jareyifgqOZjzIWirNm+b5GifoUXK9yPYWRQgZHZ8sYyLU5o7lPqqTKmbVt+f6utM2olfkoIaNZ+hTS1q9+oB5H803MfAkAMPMT0W39xqymKXgh85HRFBqBYyqixiWvvebsVXj+SUswWsi2tT39s56C33YZBJG2vgR5CglVUtVynB1umKFqRnMBwCCAxWIdZfnYRhGz+E2/MF3yM5qLxtHcEuJum+dVmo+G8hlsWrew7e2JW94xjci2Z20Ls2UvheYj4VPwODajOaMymo1U6DTVNIV3AHg/gJUA9JIWEwC+2MY2dZWZku8VnS27SlPIZ41QaJS4xCQ3RnvoVJKSPvakbSDVkTPoQtbGZNFpeyhvq6lVJTUbs0azoTNUy2j+PIDPE9F7mfkfOtimrjIjax85ntIU8hkjFBol7r7FLcfZqZBK/SppC+PUkUJ0JJ/BJ159OjatW9DlFs0NqQBIM2L0SUgNIWWyri+oJ/roS0T0vwA8X3y+BcCXmLkvPa/xIakm+qhR4hKXvZjSBp2atUtBkGJ5AEBfrxi4dOOyGt/uPYI8BS/0WSKjj4yjufPUI4f/CcC54lW+v6adjeom06X45DUA+MOv3oWf3tvVxeBSR1w5i7hFdjplJpCXSbtZwlaO2HT2Q95/+RurWE/BhKR2jWqO5gwzOwCezcxnart+SUT3tL9p3UHmKXgcRCLls75Q+OVDY1i3aAiXnb6ia+1LG3HmI5nFqtOpGaEcjFI6lipsSrfNXQkFUXSyIqPZMqWzu0U1TeFO8eoS0fFyIxFtAOA2ekEiOpmItml/E0T0fiK6moh2a9tf1ug1mkGajwBgquj7F/KaYVP6HAz1Ec1H8LdVCouOmY/ka0oHU4kUomnth/xJyf+DSkezyVPoFtV8CvJpfBDAzUT0mPi8DsBbG70gMz8M4CwAICIbwG4APxTn/Bwzf7rRc7eCaU0oTAqhoGfZ6vsNtYnVFHrCfNSRy7UNO+UaD1WYj+Kjj4z5qPNUEwpLiOhPxPsvAZCppi6AswHc3ILrXwJgJzM/2Sszntmyi0LWj/2WAsAIhcaJ88+7XuUazZ0a3KyUm10kVr/4FFzpaA7vNxnN3aOa+cgGMAxgBL7wIPGXEdtawRsBfFv7/B4i2k5EXxEJcxUQ0VVEtIWItoyPj7eoGQHTJRcLB3MANPORJhRmjFCYE3F5ChyznkLHBmkKvaQWaX7plcnUXJFjfWA+Cu/PpFzopZlqmsIeZv4/7bowEeUAvArAh8WmawB8HP5Snx+HvxzoH0aPY+ZrAVwLAJs2bWppnCgzY6bsYv3gEJ45MoupUpz5yPgU5kJS8lq3zEf9oimk3XwkNQA5OTBrNPcO1TSFdj+NywD8lpn3AQAz72NmV5Tp/jKA89p8/Qpmy74qu2DIr78zVRTmIzso0mbMR3MjNnnNQ8UiO52qZhA4mjtzvXahzEcp7Ug0JDWpSmrahXcaqfZTbHfBuyugmY6ISI/zfDWA+9p8/QpkOOoCYT6SjmYZkqp/x1AfUUUhY1FojeZOL9CuHM0pn4GmPyTVf5XroEd7kVUZzensX5qpVubiYLsuSkRDAF4Ev76S5FNEdBZ889ETkX0doej4A/7ogK8pSFORvvCL0RTmRlRTsIVQkNszNsHxuGO28b4xH6mQ1C43pEHsiuij8H6T0dw96ilz0XKYeQrAosi2N3WjLToykWak4N+WyWJl9FEtR/PP7tuD0YEsnnv84ja1Ml3ELbupr6fgJyl5HTODyKukfayxOqxhtRo5CZCTg+jgv2Qkj8tOW45np6ymUz/QFaHQq5RFeJys6R8XfTRdcsCcPLP9+188iuMWDBqhIKgQCrYVWk8hsB13pj3Bc0vnYCqRQjSt5pX5g/5vbO/ELIAY85Ft4Zorz+1wqwxAfbWPjhmkKjuc92XldEzymsdA0Uler7nkesfkes53PXEQ/71jf8X2qJ85EzUfCdtxx6qk9knympVy89EJS4eRy1i4d9dhAOkNre1HjKagITWFQbEk5JQwFeUj6wbPlFwUsvHLRpZd75gss/0Pv9yBiZkyLjghrCFF70XGluYj/3O205qCeE2r2UWSdkdz1rbwrBWj2L77CID09qMfMZqChvQpDOZ8WSkjjaKLyU9XiUByXFbC5Vii5Lix/a70KVjYdWgGP73PrzarFmjvVEE8q7NCqF10+r61g9NWjuKx8SkA6X8e/YQRChrSfCQ1hZLjL3MYXRJwpkoC27GqKTguK6GqE7WkycHs5/fv8z+Le9uxkFT5mvKZaT9Uez191Tz1PuWPo68wQkFD1mHJZSwVP5+xrIrZWLWw1LLLx+SCPI7HKMf4UuKij+I+d6zKRZ8sspP2MhcAcNLyoFpOmvvRbxihoOEo5ydppXsrVfTqQsGLnTH3O44X3+9ojaOo1qUKn5lFduZEP2gKi4Zy6n3an0c/YYSChrSJZ2xL+REyllUxu43mKjy4ZwJ7j/ihdU4LNYWJ2TK2PnmoJedqN775qFJTiK6nEP3tZztsG++XPIW0r7wGAAs0oZDeXvQfRihoyJlu1g40BduimprCO76xFZ+98WEwM0qup9adbZbv3fU03njt7ShVCYHtFcquh3LsKmvhz/tEXLqk0+aj/stoTm8/RvJB8GOnal8ZamMehYbML8hYlgpDjRcKgaOZmbH3yCwOT5eVg7lV5qOjsw7Kri9oeh3Xi9cUok73Q9Pl0OeOO5op/JpW+kG46QItzf3oN4xQ0CiHNIVAPbcjYYx6UbyJGQcl18NM2VXHt8p8JIVUORWaQnz0UdxynDqdrobZD4MpoJmP0t2N1Jvx+hEjFDTkIGxbpPkUSM1m56lCeYFQGJ8sqm0y+iZuxtxQe8QgGxfV02s4nhcrDKOO5hOXDoc+Z5SZrn1tiyPlMiH1yWuS+aIicdr70U8YoaARaApWEH1EpAasOKGwXxcKYkbfKk1Btqecgmgm1+PY8h7RW/Hp15+Jez76YvU522HbeN9EH/WBTwEA5ovfVNqfRz9hhIKGXs5ZrfxkE2yhKRSyNgpZK5S8JoXCTMlRwqBVyWsyGiot5qOyyxXmouhiOrmMpYQr0D3zUdoHUzlR6bSG1WrmDUqh0OWGGBQp/5dqLdLsk7GCkFSbSEXI5LM2BnOZsKZwNNAUZJRQq8pcyJl3GgrsyXsXFYjR5LXKtXilRta+tsVdP+2DUL/4RuZrEwRDb2CEgobuaM7FhKTmMxYGsnYoT2H/ZAmAn7vQek3BP0/J6X3zkex71HQWvRfRQazTmgKhPwbTfghJBQKfwpRZvKpnMEJBQ4WkaslrUaEwmLPjfQrloCBcy6KP3NZqHu1E9jna1uitqFhhq8MhqVafaAqBo7nLDWkSaUo8MlOu8U1Dp+iaUCCiJ4joXiLaRkRbxLaFRHQjET0qXju67JKcmftlLipDUpVQKFcKBddjtShPyxzNavbd20KBtfURomGpleYj/16euXo+AG2N5g79JwZr7KR7NJWO5jRnNAOaUJgudbklBkm3NYWLmfksZt4kPn8IwE3MfCKAm8TnjuFo0Ue5jF8p1Q9JlULBxkDODjmaxyeDf+aJWX+7vtxkc+3xhUGvm490IRgNn40KBakRfPNt5+G/3vs8JQw6l7zWHzPsfglJPUUUxVs0nO9ySwySbguFKJcD+Jp4/zUAv9PJi8sZuUXhmjxyVpbLWBWO5gNCUwDCKnAr/AqOmw5NQdcOov2u9Cn4ryOFLE5bNU8bpDtd+yjdg2ngU+hyQ5rkpactxzfedh6u3Ly2200xCLopFBjADUS0lYiuEtuWMfMe8X4vgGXRg4joKiLaQkRbxsfHW9qgssvI2gQiCpW5yOiO5lzY0TwxU8ZCUdhrQhMKrTAhlXrIp/C2r96FL9z0aOw+XTuImo+kopCUH9BpG3+/aArKfJRyqUBEuPDEJak3g/UT3VyO83nMvJuIlgK4kYge0ncyMxNRxcjKzNcCuBYANm3a1FK7iut5yvEZVxAvn7HgeoGjmZkxVXJxwpIBHJwqhTSFVggFp4eijx7YM4FCLn4JUlcTBFEBJjWFrGWh5HoVM9tOm0Hk2JP2qB1138xgamgxXdMUmHm3eB0D8EMA5wHYR0QrAEC8jnWyTWWXVYikHpIqBUVORR/5voOi46+ytnTUt4dOzGpCoQWz+17KUyi7XmK11pCmkJCnkBR62nnzkTC7dORq7UP6YlIu2ww9SFeEAhENEdGIfA/gxQDuA/BjAG8WX3szgB91sl2O5ykNIavMRxYs8meYA1kbA7mMKog3KaKNlggnWavNR0GZi+4LhaLjoZggFJwqmoISCgk2cJWE1eHoo9T7FPrE0WzoPbqlKSwDcBsR3QPgTgA/YeafAfgkgBcR0aMALhWfO4bjshq8pKaQsXwfwz9ccQ7ecN4aDOZsUdLBw6SINloyIoVCEJXUEkezqpLaffNRyfFQcuITjHSh4LiMf73tcfzVj+4DEOQpZBNWWLM6PEgrodBrIRZzRGpeafcpGHqPrvgUmPkxAGfGbD8A4JLOt8jHdzQHpiIgGKxefsYKAMCgsKtPl9xAUxBCQfcptGJ23ytVUuXiQYmaQsh85OG/d+zHo2NHAYTrSQExmoLVYfNRn8yw+2E5TkNvkvL5UmtxPK/CpxBdinNACIWZkquS1eKEQis0hV4piOd4DGYk+hRCeQouY3LWURFarMxHSZpCZwe3vnE090mZC0PvYYSChuOy+rGpPIXIKiaBpuBgqpQsFFrpU2hVhnSjSGGQpCnoWpHrMSaLjorQUtFHCY5mZT7q2BrN/THDlvfRhHIaWk03Q1J7jrLrIasijfzBP2qzHcj6t+wjP7xPCQPlaA5FH7Uwo7nLjuayyqyOb4frhR3NUyUHM2XXL38hdmWUTyF8bKerfSpNoSNXax/R1QANhlZhhIKG6wUhqXJmGzUfSU3h9scOqG0jhSwKWSuiKdQ3kN/80Bi2PX0YH3jRSRX7ZO2jbjuaA00h3tFcjjiaJ2cdMAOzZU8zH8WbOzpeoK5foo+M+cjQJoz5SKPssZrRKkdzglDQGcr76yzoZX7q1RSuv3cPvn77E7H7HLc38hSk2SjRp+CGHc3SAT9dcjTzUbymIAe1jq28hv4YTPtlPQVD72GEgobjemp5yFqOZp2hXAYD2fD2ev0ARcfDbLl6/H+3zUfy+kk+Bd18NFsOopSmS64KSQ2ij8L3Uy1A3yFVoW9KZ6v71uWGGPoO8y+l4egZzVrtI53BXNjiNpSzYVmE4Xx4e73RR0XHxazjxlZVLfdInkKphqZQ1vp6WCuBPFN2VfJaNmGFte7VPkq3VDDJa4Z2YYSCRlnPaLaThEJYIxgSwmBeZFnBestc+Hb3eG2g3CNVUqUwcDyOFXZ6Xw9rfpXpkqvWaO6VMhdBtFNHLtc2gjIXRigYWkvKfxqtRc9oThIKUfPRcMEXCqNRoTAHTQFAhQlJX7im22UudIEVpy04IU1BFwoOXFX7yL+f0TFs84ZFePXZq1RV2nYT1D5K92CqzEfp7oahBzHRRxpl14NthR3NFdFHEd/BcIKmUL/5SNjryy6gnaMcqifUXfORnjxXcrwKwag71fUIrBndp5CQuXzu2gU4d+2CVjc5GZW81rlLtgNTJdXQLoymoOF4rEJRpaM5+qPLRDx7Q7l4oVDv7L4oNISopqCbjLqtKRS168eFpept1X0K0nwkCwoC3beB90o7mmX+YA4XnrgYZxw3v9tNMfQZRihouDEhqVFNAQA2rV2A977wBACB+ShOU7jriYPY+uTBqtdU5qPIYFuuUnm00+gmo7gIJF1TOFyhKfhZ4r1Sq6dfFtnJZSx8423n4yyx1rXB0CqMUNAoayGp2SpVKK9713PxgUtPQtYmzXwUtsSVPcYnrn8Qf/vTh6teUw6ys+WwUNCdt902H9UUCiFNodKnQBQIhW47RvtFUzAY2oXxKWjEh6TGy03LIlx00hKcvWY+AGDeYFRT8HB4plzTnRkIhaj5qDc1hThHsy60QuajsgtmX7DaFvWEHV85mHugLQZDL2KEgoZfJVWYj2TyWpXwjn9587PV+9FCNCSVMTHjqDj9JKSGENUU9MG360Khlk8hISR1puTCFT4Fot6YnffLIjsGQ7swQkGj7HKQ0RxZT6EWFXkKHuPobBll1/OdrQlG7ETzkdc70Ue1NAW9rcz+vctaJDKa/b77foWONLcqgVDobjsMhl6l4z4FIlpNRDcT0QNEdD8RvU9sv5qIdhPRNvH3sk63zXEDTWEwl8GKeQWsWzRY17FRoTBVdFB0PHgcrp6qw8xqkJ2p6lOYm6bAzLj1kXGVONYsZbeWTyF8neF8BgO5jBZ95PsUuu1PAPTS2d1vi8HQi3TD0ewA+FNm3ghgM4B3E9FGse9zzHyW+Lu+0w0re9pynBkLt3/4Elx2+oq6jo0KBd3hemg6XijoA2wx4lOIVh6dC9t3HcEffOXOUCXXZijW0hSE0Chk/X+n4XwGgzkbMyUHHkNFH/XC7FzKgl4QUAZDL9JxocDMe5j5t+L9UQAPAljV6XbE4WsKjQ0W0YzmA1OBw/Wg9l5HH2yjIakyoidr05w1hQNTRQDA/sninI5Lonb0kS+0CiKxTwqF6ZILl4M8hV6YnfdKaKzB0Kt0NSSViNYBOBvAb8Sm9xDRdiL6ChHFprkS0VVEtIWItoyPj7esLZ7H8DhYNnKuFCKZzoc0QaBH5OjoTtuoT0FqCoWsPecqqRMzfunqo7POnI5LIlTmwo1zNPsDv3TOLxrOYSBnq4V2LKJQrkI3kS3ogaYYDD1J14QCEQ0D+AGA9zPzBIBrABwP4CwAewB8Ju44Zr6WmTcx86YlS5a0rD1ytpttUTGZg9N1aAqaySgakiq1g8GcPWfzkfRhyHUNJDvHJxtaO7pUxcwF+IUEM5al6kUtGc5jIOtrCiXHN8kR9UhIqok+Mhiq0hWhQERZ+ALhW8z87wDAzPuY2WVmD8CXAZzXyTZJc020jMVcWTmvACCqKST5FJI1BSkIBnOZOZuPJkRY6FHNwb3nyAxe9Nlf4cYH9s7pXEAk+iimLa7I75Cmt8UjeWU+OjRdwoKhHGyrNwbifimdbTC0i25EHxGAfwXwIDN/Vtuue3RfDeC+TrZLmmviylrUyz0ffTF+8acXwSLgkK4piPeTRSckLGaraQpCSA1k7YqQVGbGvonZxHZMzFaaj57YPw2Pgd2Hk49LouR4qmR4nKbgCAe9FGxLhvMYymcwVXRwYKqEhUM5LBzMYUEkwa9bEBnzkcGQRDc0hQsAvAnACyPhp58ionuJaDuAiwF8oJONkhE02SY0hXkDWQzmMshYlnI0D+czyqfw59dtx9kfvxEP7z0KoIajWWkKdoWm8IsHx3DBJ38ZEgy6iUpqCLpQ2DsxAyBcxbReyq6HEVHjKX7dBz+UV2pEi0dyWDycx/jRIg5OFbFoKId3veAEfO+dz5nztduBDJE1GAyVdCP66DZmJmY+Qw8/ZeY3MfPpYvurmHlPJ9vlRBaDaQbbIr+8g0VYOb+gBuxbH/Ud43/2g+0AapmPhKYQIxQeG5+E4zGeOjgNANj65EGc+9c34rHxSQC6o1k3H82KfXMXCkXXU9Vg46KPXKEpyH2Lh/NYOpLHTNnFnsOzWDiUx0DOxtKRwpyv3Q4IJvrIYEjCFMQTSLt5tgVLcknBMlrIYMFgDoem/IFYLiTz5IEpADXyFISQGshWOpplqKnUFB7dNwlm4MkDvpCQjuYJXVMQQqERTaHkeMhlLOQyVmyZi7LLIQ1r8XAey0Z9AeB4jEXDuTlfs530SiKdwdCLGKEgkANtKwYw6ZcYHchiyUge45NFFB0X+ydLyAkzy2zZVYLA1uzxEl1TKLleaA3n/ZO+5jE2IYWD/ypNVoGjORAKzxyuXyi4HoeuV3I85DMW8raVUObCC61QJzUFycKh3hIKg3m7YllVg8HgY4SCYOyoP7C2wsQhI5hGC1ksHSlg38SsGsDPOG4eAGD8aFHNukcLmZj1FGRIqm+20UtJSAEm2zx21B/wD4qktcDRHAiAufgULv3sr/Avv35cfZaaQj5rJSav6Wa3hUM5LB3tXaHwb2/fjLc+d323m2Ew9CRGKAjGhClmmTaYNUqgKWSwdDSP6ZKLHWO+vV+ulDV2tKg0hXkD2Zg8BV8IDOf9Ge10KRAa41IYiDZL4VBNU6jXfHRkpozH909h267DalvJ9ZC1LeSSNAXXC5ndbIuwdDQQrot6TChsXDlaUercYDD4HLNCQQ6SkrGjRVgELBpuXijIyJbjlwwrIXOPGGTPXO1rCmMTs0pTmDeYSzQfnbbK//52bZBW5qOjYY3h4GQJzKyEwWTRATMr0xVQ29G865Dvl9h9aAbMjFd98TZsffIQchkLhayNmVJ8RnPUQT+Sz6haSL2mKRgMhmSOSaFw1xMH8fy/uxnXbd2lto1NFLF4OB+yjTfK7sO+qea0VfOUOeqepw8DAM7UNQVH1xTiS2dv3rAItkX4zWP+sp6ux8pMJM1GUmM4OFVC0fFQcj3MG8jC9RinX30D/vMeP5Br4VCupqaw69CMer1v9wS27zoCwC//sW7xEB4dO1pxjMxT+MWfXIT/eu/zAPhJYrLvi4aaF7QGg6EzHJNC4fRV83D++oX439fdg21isN53dDZkB2/VdaTD9Z5dRzBSyGDNwkHYFmHs6GxEKPjvv3fX07j8i7epfaOFLE5fNQ93iIqnh6ZLokYTYexoEZ7Hypx0YKqkNIFV8wcA+NrCx358PwDgRc9ahqLjVQggHSkU9k8W8e93B0Lz/meO4LSVo9gxNlmhLcyUXWRsCycsHVaaDQAsHckjYxFGB8yyHQZDWjgmhUIha+Mff/8cWET4zp1P4dLP/gq3PDze8jj6E5YOK9v6wakSlo8WYFmEJcN57Jsoolh2QQSMFDKYLvkmnx/f8wzu2XUEv3xoDIAf3nr+hoW4Z9dhfOaGh7H5/96kzn14uoy9E7NKqzg4VVLhqKsWDKh2HC062LhiFKcLJ/dTB6fx4s/9Cj+/fy8e3z+Fl3/h1/jxPc8A8M1Gku/d9TTOXevXJZwtuzht1Tx4DDywZyI492wZ254+rBzoOsvmFbBwKGfCPw2GFHHMTuFGC1mctmoevrvlacjoy1Y4mXWytoXRQjAgygF26WgeY0f9TN98xsIJS4ZxaLqMJw9MYcuTvplo65OHcP76hcjaFq48fy1++Nvd+Idf7lDn2rhyFA/tPYob7vdrGa2Y5yfJPSSypaWmIHnZ6ctVee/P3vAIHtk3ib/5yYMoOR72TsziE9c/iJecukz5FABgquTi9eceh3c8fwPWLBpUS47et/uI6ssvHxpDyfHw8ph1J95z8QnYe87cy2oYDIbucUxqCpLN6xdCX0J5wWBrHKJf+8Pz8L13+CUd9Fnyy8TAuXSkgJ1jkzg4VUI+Y2PzhkUAgC//+jHMlj1VI+h9l54IAFi9cBDffcdz8OHLTsF8se8Nm1bjuAUD+Nh/PQAAOGX5CCaLDv7Xt+/GyctGsGmdP2hfftZK/PlLT8GVm9eqhYB+dv9erJo/gKcOTqPouPjLlz8Le47M4nl/ezNueGAfTlk+AsCPInrxqcvx4lOX45Tlo1gxr4BFQznc/dQhAH6o6rd+8xSWjeZxzprKSufPWjGKi09Z2pJ7ajAYOsMxqykAwPkbFuJLtz6GNQsHxQA5t2qkSVx0UnxJ7+cc7w/+V5y3Gn/09S34/tZdWDycxynLRzBvIItv3vEUAOCaK8/F/c9M4DlCWADA+sVDeMdFx+Pys1bhuq1P49nrFuI7V23Gv/z6cWRtwuqFg7j54XF4DHzj7edhwWAOH7j0JLz1eevUDF9fHe7Lf7AJ/71jPy46eQlOXDqMouPh1kfGMX60iOOXDmPn+CTOX78oFDlERHjJactx3ZZd+NMXT+Ovf/IA7nz8ID75mtMT16A2GAzpgvTM1bSxadMm3rJlS8PHFx0XX7jpUbxp8zr8251P4crNa9pSn+fmh8YwU3aVpgAANz24D/+1fQ/OXbsAV25ei9de8z/Y+uQhvO7c4/Dp158552v8x9278f7vbsNlpy3HNVeeG/udx/dP4eJP34KXnLoMX3rTpor9juvhn27ZiReeshR3P30YZ6yahzNXzw995+mD07j407dgIGvjaNHB1a/ciLdcYBLBDIY0QURbmblyEMAxLhR6iXuePozbduzHO56/oaE1HY7MlPH5XzyK9116YsV60RLXY3zq5w/h989bizWLBhtu6zfueBK379yPS05Zhteee1zD5zEYDN3BCAWDwWAwKKoJhWPa0WwwGAyGMEYoGAwGg0FhhILBYDAYFD0nFIjopUT0MBHtIKIPdbs9BoPBcCzRU0KBiGwA/wjgMgAbAVxBRBu72yqDwWA4dugpoQDgPAA7mPkxZi4B+A6Ay7vcJoPBYDhm6DWhsArA09rnXWKbgoiuIqItRLRlfHy8o40zGAyGfqfXhEJNmPlaZt7EzJuWLIkvJ2EwGAyGxui12ke7AazWPh8ntsWydevW/UT0ZBPXWwxgfxPH9wr90g/A9KVXMX3pTRrty9qkHT2V0UxEGQCPALgEvjC4C8DvMfP9bbrelqSsvjTRL/0ATF96FdOX3qQdfekpTYGZHSJ6D4CfA7ABfKVdAsFgMBgMlfSUUAAAZr4ewPXdbofBYDAci6TO0dxiru12A1pEv/QDMH3pVUxfepOW96WnfAoGg8Fg6C7HuqZgMBgMBg0jFAwGg8GgOCaFQtqL7hHRE0R0LxFtI6ItYttCIrqRiB4Vrwu63c44iOgrRDRGRPdp22LbTj5fEM9pOxGd072WV5LQl6uJaLd4NtuI6GXavg+LvjxMRC/pTqsrIaLVRHQzET1ARPcT0fvE9tQ9lyp9SeNzKRDRnUR0j+jLx8T29UT0G9Hm7xJRTmzPi887xP51DV2YmY+pP/ihrjsBbACQA3APgI3dbtcc+/AEgMWRbZ8C8CHx/kMA/rbb7Uxo+/MBnAPgvlptB/AyAD8FQAA2A/hNt9tfR1+uBvDBmO9uFP9reQDrxf+g3e0+iLatAHCOeD8CP1doYxqfS5W+pPG5EIBh8T4L4Dfifn8PwBvF9n8G8C7x/o8B/LN4/0YA323kuseiptCvRfcuB/A18f5rAH6ne01JhplvBXAwsjmp7ZcD+Dr73AFgPhGt6EhD6yChL0lcDuA7zFxk5scB7ID/v9h1mHkPM/9WvD8K4EH4NcdS91yq9CWJXn4uzMyT4mNW/DGAFwK4TmyPPhf5vK4DcAkR0VyveywKhZpF91IAA7iBiLYS0VVi2zJm3iPe7wWwrDtNa4iktqf1Wb1HmFW+opnxUtEXYXI4G/6sNNXPJdIXIIXPhYhsItoGYAzAjfA1mcPM7Iiv6O1VfRH7jwBYNNdrHotCoR94HjOfA3/diXcT0fP1nezrj6mMNU5z2wXXADgewFkA9gD4TFdbMweIaBjADwC8n5kn9H1pey4xfUnlc2Fml5nPgl8H7jwAp7T7mseiUJhT0b1ehJl3i9cxAD+E/8+yT6rw4nWsey2cM0ltT92zYuZ94ofsAfgyAlNET/eFiLLwB9FvMfO/i82pfC5xfUnrc5Ew82EANwN4DnxznaxGobdX9UXsnwfgwFyvdSwKhbsAnCg8+Dn4Dpkfd7lNdUNEQ0Q0It8DeDGA++D34c3ia28G8KPutLAhktr+YwB/IKJdNgM4opkzepKIbf3V8J8N4PfljSJCZD2AEwHc2en2xSHszv8K4EFm/qy2K3XPJakvKX0uS4hovng/AOBF8H0kNwN4nfha9LnI5/U6AL8UGt7c6LaHvRt/8KMnHoFvn/tIt9szx7ZvgB8tcQ+A+2X74dsObwLwKIBfAFjY7bYmtP/b8NX3Mnx76NuS2g4/+uIfxXO6F8Cmbre/jr58Q7R1u/iRrtC+/xHRl4cBXNbt9mvteh5809B2ANvE38vS+Fyq9CWNz+UMAHeLNt8H4K/E9g3wBdcOAN8HkBfbC+LzDrF/QyPXNWUuDAaDwaA4Fs1HBoPBYEjACAWDwWAwKIxQMBgMBoPCCAWDwWAwKIxQMBgMBoPCCAWDQYOIXK2S5jaqUUWXiN5JRH/Qgus+QUSLmz2PwdAsJiTVYNAgoklmHu7CdZ+AH++/v9PXNhh0jKZgMNSBmMl/ivx1LO4kohPE9quJ6IPi/f8Sdfy3E9F3xLaFRPQfYtsdRHSG2L6IiG4QdfL/BX5CmLzWleIa24joS0Rkd6HLhmMUIxQMhjADEfPRG7R9R5j5dABfBPD3Mcd+CMDZzHwGgHeKbR8DcLfY9hcAvi62fxTAbcx8Kvz6VWsAgIieBeANAC5gvxCaC+D3W9lBg6EamdpfMRiOKWbEYBzHt7XXz8Xs3w7gW0T0HwD+Q2x7HoDXAgAz/1JoCKPwF+h5jdj+EyI6JL5/CYBzAdwlSuEPIF3FDQ0pxwgFg6F+OOG95OXwB/tXAvgIEZ3ewDUIwNeY+cMNHGswNI0xHxkM9fMG7fV2fQcRWQBWM/PNAP4cftniYQC/hjD/ENELAOxnv77/rQB+T2y/DIBc9OUmAK8joqVi30IiWtu+LhkMYYymYDCEGRArXUl+xswyLHUBEW0HUARwReQ4G8A3iWge/Nn+F5j5MBFdDeAr4rhpBKWNPwbg20R0P4D/AfAUADDzA0T0l/BX1rPgV2B9N4AnW9xPgyEWE5JqMNSBCRk1HCsY85HBYDAYFEZTMBgMBoPCaAoGg8FgUBihYDAYDAaFEQoGg8FgUBihYDAYDAaFEQoGg8FgUPx/NQ9tDiVDbFkAAAAASUVORK5CYII=\n",
203 | "text/plain": [
204 | ""
205 | ]
206 | },
207 | "metadata": {
208 | "needs_background": "light"
209 | },
210 | "output_type": "display_data"
211 | },
212 | {
213 | "name": "stdout",
214 | "output_type": "stream",
215 | "text": [
216 | "Total Reward: 200.0\n"
217 | ]
218 | }
219 | ],
220 | "source": [
221 | "import copy\n",
222 | "from collections import deque\n",
223 | "import random\n",
224 | "import matplotlib.pyplot as plt\n",
225 | "import numpy as np\n",
226 | "import gym\n",
227 | "from dezero import Model\n",
228 | "from dezero import optimizers\n",
229 | "import dezero.functions as F\n",
230 | "import dezero.layers as L\n",
231 | "\n",
232 | "\n",
233 | "class ReplayBuffer:\n",
234 | " def __init__(self, buffer_size, batch_size):\n",
235 | " self.buffer = deque(maxlen=buffer_size)\n",
236 | " self.batch_size = batch_size\n",
237 | "\n",
238 | " def add(self, state, action, reward, next_state, done):\n",
239 | " data = (state, action, reward, next_state, done)\n",
240 | " self.buffer.append(data)\n",
241 | "\n",
242 | " def __len__(self):\n",
243 | " return len(self.buffer)\n",
244 | "\n",
245 | " def get_batch(self):\n",
246 | " data = random.sample(self.buffer, self.batch_size)\n",
247 | "\n",
248 | " state = np.stack([x[0] for x in data])\n",
249 | " action = np.array([x[1] for x in data])\n",
250 | " reward = np.array([x[2] for x in data])\n",
251 | " next_state = np.stack([x[3] for x in data])\n",
252 | " done = np.array([x[4] for x in data]).astype(np.int32)\n",
253 | " return state, action, reward, next_state, done\n",
254 | "\n",
255 | "\n",
256 | "class QNet(Model):\n",
257 | " def __init__(self, action_size):\n",
258 | " super().__init__()\n",
259 | " self.l1 = L.Linear(128)\n",
260 | " self.l2 = L.Linear(128)\n",
261 | " self.l3 = L.Linear(action_size)\n",
262 | "\n",
263 | " def forward(self, x):\n",
264 | " x = F.relu(self.l1(x))\n",
265 | " x = F.relu(self.l2(x))\n",
266 | " x = self.l3(x)\n",
267 | " return x\n",
268 | "\n",
269 | "\n",
270 | "class DQNAgent:\n",
271 | " def __init__(self):\n",
272 | " self.gamma = 0.98\n",
273 | " self.lr = 0.0005\n",
274 | " self.epsilon = 0.1\n",
275 | " self.buffer_size = 10000\n",
276 | " self.batch_size = 32\n",
277 | " self.action_size = 2\n",
278 | "\n",
279 | " self.replay_buffer = ReplayBuffer(self.buffer_size, self.batch_size)\n",
280 | " self.qnet = QNet(self.action_size)\n",
281 | " self.qnet_target = QNet(self.action_size)\n",
282 | " self.optimizer = optimizers.Adam(self.lr)\n",
283 | " self.optimizer.setup(self.qnet)\n",
284 | "\n",
285 | " def get_action(self, state):\n",
286 | " if np.random.rand() < self.epsilon:\n",
287 | " return np.random.choice(self.action_size)\n",
288 | " else:\n",
289 | " state = state[np.newaxis, :]\n",
290 | " qs = self.qnet(state)\n",
291 | " return qs.data.argmax()\n",
292 | "\n",
293 | " def update(self, state, action, reward, next_state, done):\n",
294 | " self.replay_buffer.add(state, action, reward, next_state, done)\n",
295 | " if len(self.replay_buffer) < self.batch_size:\n",
296 | " return\n",
297 | "\n",
298 | " state, action, reward, next_state, done = self.replay_buffer.get_batch()\n",
299 | " qs = self.qnet(state)\n",
300 | " q = qs[np.arange(self.batch_size), action]\n",
301 | "\n",
302 | " next_qs = self.qnet_target(next_state)\n",
303 | " next_q = next_qs.max(axis=1)\n",
304 | " next_q.unchain()\n",
305 | " target = reward + (1 - done) * self.gamma * next_q\n",
306 | "\n",
307 | " loss = F.mean_squared_error(q, target)\n",
308 | "\n",
309 | " self.qnet.cleargrads()\n",
310 | " loss.backward()\n",
311 | " self.optimizer.update()\n",
312 | "\n",
313 | " def sync_qnet(self):\n",
314 | " self.qnet_target = copy.deepcopy(self.qnet)\n",
315 | "\n",
316 | "episodes = 300\n",
317 | "sync_interval = 20\n",
318 | "env = gym.make('CartPole-v0')\n",
319 | "agent = DQNAgent()\n",
320 | "reward_history = []\n",
321 | "\n",
322 | "for episode in range(episodes):\n",
323 | " state = env.reset()\n",
324 | " done = False\n",
325 | " total_reward = 0\n",
326 | "\n",
327 | " while not done:\n",
328 | " action = agent.get_action(state)\n",
329 | " next_state, reward, done, info = env.step(action)\n",
330 | "\n",
331 | " agent.update(state, action, reward, next_state, done)\n",
332 | " state = next_state\n",
333 | " total_reward += reward\n",
334 | "\n",
335 | " if episode % sync_interval == 0:\n",
336 | " agent.sync_qnet()\n",
337 | "\n",
338 | " reward_history.append(total_reward)\n",
339 | " if episode % 10 == 0:\n",
340 | " print(\"episode :{}, total reward : {}\".format(episode, total_reward))\n",
341 | "\n",
342 | "\n",
343 | "# === Plot ===\n",
344 | "plt.xlabel('Episode')\n",
345 | "plt.ylabel('Total Reward')\n",
346 | "plt.plot(range(len(reward_history)), reward_history)\n",
347 | "plt.show()\n",
348 | "\n",
349 | "\n",
350 | "# === Play CartPole ===\n",
351 | "agent.epsilon = 0 # greedy policy\n",
352 | "state = env.reset()\n",
353 | "done = False\n",
354 | "total_reward = 0\n",
355 | "\n",
356 | "while not done:\n",
357 | " action = agent.get_action(state)\n",
358 | " next_state, reward, done, info = env.step(action)\n",
359 | " state = next_state\n",
360 | " total_reward += reward\n",
361 | " #env.render()\n",
362 | "print('Total Reward:', total_reward)"
363 | ]
364 | }
365 | ],
366 | "metadata": {
367 | "kernelspec": {
368 | "display_name": "Python 3 (ipykernel)",
369 | "language": "python",
370 | "name": "python3"
371 | },
372 | "language_info": {
373 | "codemirror_mode": {
374 | "name": "ipython",
375 | "version": 3
376 | },
377 | "file_extension": ".py",
378 | "mimetype": "text/x-python",
379 | "name": "python",
380 | "nbconvert_exporter": "python",
381 | "pygments_lexer": "ipython3",
382 | "version": "3.9.4"
383 | }
384 | },
385 | "nbformat": 4,
386 | "nbformat_minor": 5
387 | }
388 |
--------------------------------------------------------------------------------
/notebooks/09_policy_gradient.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "dcad5835",
6 | "metadata": {},
7 | "source": [
8 | "## SETUP"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": null,
14 | "id": "cc983bba",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "!pip install gym[classic_control]\n",
19 | "!pip install dezero"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "id": "758a1577",
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "import matplotlib.pyplot as plt\n",
30 | "\n",
31 | "# utility functions (common functions)\n",
32 | "def plot_total_reward(reward_history):\n",
33 | " plt.xlabel('Episode')\n",
34 | " plt.ylabel('Total Reward')\n",
35 | " plt.plot(range(len(reward_history)), reward_history)\n",
36 | " plt.show()"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "id": "ab0de828",
42 | "metadata": {},
43 | "source": [
44 | "## ch09/simple_pg.py"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 3,
50 | "id": "b311d659",
51 | "metadata": {},
52 | "outputs": [
53 | {
54 | "name": "stdout",
55 | "output_type": "stream",
56 | "text": [
57 | "episode :0, total reward : 11.0\n",
58 | "episode :100, total reward : 26.0\n",
59 | "episode :200, total reward : 14.0\n",
60 | "episode :300, total reward : 22.0\n",
61 | "episode :400, total reward : 13.0\n",
62 | "episode :500, total reward : 13.0\n",
63 | "episode :600, total reward : 75.0\n",
64 | "episode :700, total reward : 57.0\n",
65 | "episode :800, total reward : 30.0\n",
66 | "episode :900, total reward : 82.0\n",
67 | "episode :1000, total reward : 74.0\n",
68 | "episode :1100, total reward : 176.0\n",
69 | "episode :1200, total reward : 74.0\n",
70 | "episode :1300, total reward : 41.0\n",
71 | "episode :1400, total reward : 76.0\n",
72 | "episode :1500, total reward : 79.0\n",
73 | "episode :1600, total reward : 94.0\n",
74 | "episode :1700, total reward : 187.0\n",
75 | "episode :1800, total reward : 28.0\n",
76 | "episode :1900, total reward : 23.0\n",
77 | "episode :2000, total reward : 133.0\n",
78 | "episode :2100, total reward : 69.0\n",
79 | "episode :2200, total reward : 52.0\n",
80 | "episode :2300, total reward : 113.0\n",
81 | "episode :2400, total reward : 47.0\n",
82 | "episode :2500, total reward : 101.0\n",
83 | "episode :2600, total reward : 83.0\n",
84 | "episode :2700, total reward : 63.0\n",
85 | "episode :2800, total reward : 40.0\n",
86 | "episode :2900, total reward : 102.0\n"
87 | ]
88 | },
89 | {
90 | "data": {
91 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA/j0lEQVR4nO2deXwV1dnHf08SCMi+hEUWWUTZRMCIKygCbrTFpa9i3apWaqutuy9qa3GrtG/Vaq0LVqq4oUXrBqIIKCBrgBB2CBCWEJKwZIHsyfP+MTM3c29m7p25d+bOXZ7v55NP5p6ZOec5d+ae55znPOc5xMwQBEEQBABI8VoAQRAEIXYQpSAIgiD4EKUgCIIg+BClIAiCIPgQpSAIgiD4SPNagEjo3Lkz9+nTx2sxBEEQ4oq1a9ceZuYMo3NxrRT69OmDrKwsr8UQBEGIK4hor9k5MR8JgiAIPkQpCIIgCD5EKQiCIAg+RCkIgiAIPkQpCIIgCD5cUwpE1IuIFhPRFiLaTET3qukdiWgBEe1U/3dQ04mIXiaiXCLKIaKRbskmCIIgGOPmSKEOwIPMPBjAuQDuJqLBAKYCWMjMAwAsVD8DwBUABqh/UwC85qJsgiAIggGuKQVmLmDmdepxOYCtAHoAmATgHfWydwBcpR5PAjCLFVYCaE9E3d2ST3CeA8cq8P32Isfz/WFHMfYfrQjrXmbGx1n7UVPXEJEMlTX1eGvZHszNKQAAbDtUhrV7j0aUp5ss23kYeYdPRJxPfkklFm8zfqa5RcexYteRoPfX1DXg46z9WL/vGDbll/qd+zw7H2VVtX5pa/KOYvuh8pBybTtUhs/W5+OHHcVoaGBM+2Iz1u49inX7jmHLwTLfdQ0NyvM/dqIGX2w4GDTP4vJqzN90qMm9NXUNWLStEAdLKlHfwPh4zX7U1Td9n1bvOYqdheXYcrAMby3bg40HSvFx1n40NDBW7zmKHYXlKCyrwndbCpvUeYd639q9x/DCgh1+1zQ0MB79dCNyDpQAAPYfded3phGVxWtE1AfACACrAHRl5gL11CEAXdXjHgD26247oKYV6NJARFOgjCTQu3dv94QWbHPpi0tQUVOPvOkTHc331pmrkZZCyP3zlbbv/TKnAI/MycGBY5V4YMJpYcvw7LwteG/lPgDA8N6X4PK/LwUAx+vqFDe9tQpA5PJNfHkpSipqDfMZ/8IPIct4ZdFOvLwo1/dZu3b7oXLcOzsblw/phtdvPst3/n9eX2FJbu37B4Dp15yBt5fn4e3leU3K+c/a/fjfTzb60gd1a4MBXdsY5nnrzNXYUlCGzU9ehlbpafgy5yAemZOD/GOVeGnhTnRu3RwPTDgdj/13I0ora3HnmH5+91/3xgrDfBdvK8LXqrLp2aElDhyr9KufVudAtGu+zDmID1fvw4er9+GlycPxyJwcVNc1uPbuuT7RTEStAXwC4D5mLtOfY2WHH1u7/DDzDGbOZObMjAzDVdqCR1TU1LuWd11DeJtBlVYqPdEjx6sjKr+orPH+SEcd8URJRW3oi4JQfLzGML2ipg4AUFBWFVH+AHAsiIyB8lfVmj87bTRar248pr07R08odTh8vAbHKpTjoxXG9TLia93o48CxSsv3aZRVNtbh3tnZqHb5/XNVKRBRMygK4X1m/lRNLtTMQup/bRyUD6CX7vaeapogeI7sT5g8BG5GyQZPP5zGPVyIKGplAe56HxGAtwBsZeYXdKe+AHCrenwrgM916beoXkjnAijVmZkEISKkUU9OAp+7UQPvQ217V+4+gtr6Bhg1xVr7/GWI+QknibJOcHVO4QIANwPYSETZatpjAKYD+JiI7gCwF8B16rl5AK4EkAugAsBtLsomJAlO/Z5kK/NwMf7iYvnr/PW7a/HrMf3Qs0NL18pgZhQfr0ZFtbnJtbSyFu1aNgM59hZbwzWlwMzLYP6bHGdwPQO42y15hOQklhsfwTmlbUagMt9ysAzDerYPKUtu0XGfUnCrQzDq2YVBz1/3+gp8c/+YqI8UZEWzkBRE+XclWCTaSnvqpxuxu/h4lEttihVFs71Qcc2N9rsrSkFICmTEEFuEauiKyqpworoOZVW1OByh51ggmjdRIGVVdY6W4xSJNKcgCJ4jI4T4ZNSfF6Jf51Y4VFblytoXS7jYGh+vsa6Aoj2nICMFQbCEjDWsUt/AIdeFWPk2dx8+4eraF6tYkXXRtsLQF+k4/7lF1i+WOQVBcB7xHooeT3+1BWc9812TEBZGeDWSK6uqRX2IBZGGLqkmEutDa1jheLWdkUJ0EaUgCDYRk1RwvtmsrOA9bsFG77auNlqXcLy6DsOmfYvn5m0Nca+1/AAgLdW9pjRhFq8JQizh5O/Ki0FHQwOH7NnGIkYjtPoG9nTkpvXSv8xxbgFaWop7DXe0OyEy0SwIccCVLy/FtkPlMRuAT0+oBr//Y/OiI4gJVidu7ZiP3FQKKVHuustIQUgKIu2Zej0nsc1COOlYw8rozImm1O4oMGioC6PrLVzupvno/o82uJa3EaIUhIQm2j7eTlFUVoU+U+fi82yJCRmKcBV2qBGDnXcnXt8zI0QpCAmNUz38aA8UdhYpq24/WrM/xJWC4CyiFISkwMmeXAJ1ChMep81+iTQiMEMmmoWkwMnGIf58gBIbs4Z6+FPfGm6IFO0VwvGGKAUhoUmGnl2sYXci1y3Mdo2zL19s1CdaiPlIEARXIFBUvLbcKkMbUXjteRZtRCkIggU4DlqGfy7OxW/fXxv1ct9atge3v70m6uVGitVRpP7RT/96mzvCmHDv7PVRLQ8Q85Eg2CZWLVL/9812T8p9+qstnpTrNprS8NIc9nl29Lb91HBzj+aZRFRERJt0aR8RUbb6l6dt00lEfYioUnfudbfkEsyZuWwPnvs6eDwYQYg13Jo3ioPBoSu4OVJ4G8ArAGZpCcx8vXZMRM8DKNVdv4uZh7sojxCCp9Qe36NXDPJYkuAs2FKIH3MPY9rPhkStTDY5Tkb+On8bTu/WxvR8sjamiYKbezQvIaI+RudICft3HYBL3CpfSFzunJUFADaVQvK0VFM/yXE1/1e/32XpOqLY8UQKB5/5KH6rEBZeTTSPBlDIzDt1aX2JaD0R/UBEo81uJKIpRJRFRFnFxcXuSyrENeH4pO85fALTvtiMhjiMSgoAs2UVtCWsvhlW3oJEWvvglVK4AcCHus8FAHoz8wgADwD4gIjaGt3IzDOYOZOZMzMyMqIgqhDPhNNT/fW7WXh7eZ4v1EQgifPzdwc737gX60is9vw10coqQ28WFM8jokCirhSIKA3ANQA+0tKYuZqZj6jHawHsAnBatGUTBKCx0dA3WLFmQjhwrAIvfLs9Llxl451vt9jbajPe8WKkMB7ANmY+oCUQUQYRparH/QAMALDbA9mEBCOSYX0st7d3vbcWLy/KRa7JaMZL7HzjXnzHTo1Ofsw93JhnAo0f3XRJ/RDACgCnE9EBIrpDPTUZ/qYjABgDIEd1UZ0D4C5mPuqWbEJicLCkEm8t2+N4vqEajVjQFdW1SkyfWJAlkFiUyQ1u/Ncq33EimY/c9D66wST9lwZpnwD4xC1ZhMTk9rfXYNuhclwxtBtObt/SsXxjeYQQTxC8+y6DmdXk+QZHwlwIcUu5ujF8Q5R/5YljKPAetyaaf9gR2jORQhRua5OdBHorRCkIQgBGjUEsdS6X7zps6hkVC9jR0ev3lbgyL1JtEDI7kPySSmwtKIu4rLV7j2HD/pKI84kVRCkIcU+ymQN+8eaq0BfFAhY7z+Nf+CHkNQdLKrF4e1GEAjXlipeWAgDW7zvW5JzV3v+1ry3HR1mJszZElIIgWEBcP8PAwa9s4stLcdu/rUdiDdacG40Er351eZO0RJo8toMoBcET1u87hoMllVErT3Ze8+dQaRXW7lV6xzV1DVhg4ItfVFYVbbFMOWayYU44iH4PjigFwROufnU5zp++yPVyZOc1Yy7+22Jc+5rSO/7bt9tx56wsrNh1xO+aK19eFlkhcf7dJ9LksR1EKQgJTSS9QjPzQTw1FZvyS1Fq0Muuqm2ciN13pAIAUFJR43fN4ePVEZW9u/gECkq9GW2E8iwSzJFNdgQhgETqIf7kH8swsFsbzL9vjOk1ztvOlfwmz1jpcL7WceIJ7j9W4UAu7lFaUYt2JzVzPF8ZKQgJTTgdxkSbYNx2qNzSddK59ucfi3K9FiEoV7/2oyv5ilIQBCGu2FFoTcklOruLT7iSrygFQQggkcxHicaPuYdx6YtLvBYjoRGlIAgWSAY3xoMlseOCasbu4thdyZ0oiFIQ4hYnbOAlFTWorQ8dEsGr/KKBpvCe+moLcg6UeCpLKGrqrWnnYO9GEuj3iBClICQ1w59agAc/3uBIXg0NjOFPLcAjc9zdI9lNnLBTuzmqevqrLY7nWWphZ7VkQpSCkBQEa6i+2HAw7Hwra+oby1D/f56dH3Z+RtTH6V7R0cKuEgocRJz55LeOyZIIiFIQEhrLm7MbtCz6JDM31UFPzMeaPGf3g6oLMD/98t+rHc0/0Ug0F2KvEaUgJC16RTBjSePur3bnKrLy/CNsRtJE5R0+gVMf/xqfrW8cbSzdeTjIHYIRMqcQPm5uxzmTiIqIaJMubRoR5RNRtvp3pe7co0SUS0Tbiegyt+QSEodCNWBbMPOB1QZgzlrfluH2zREmDdAri3aiz9S5qKqtN77AAG2h2byNBfaEiABpJAU9bo4U3gZwuUH6i8w8XP2bBwBENBjK3s1D1HteJaJUF2UTEoBai54o0UbTEW8vzwPQuEOcpXtjdInE/32zDX2mzg16TZ+pc13Z88Bp3vhhl9cixDSuKQVmXgLAqrF1EoDZzFzNzHsA5AIY5ZZsQvIQThtrt2EOvDxQVYVj846Wurtv9npL1/1zsbWG9MsNB2Ni5JGeZt6n3Hww8t3WNFbtPhL6ojjDizmFe4goRzUvdVDTegDQb110QE1rAhFNIaIsIsoqLg69D6sgmGHHTBStxWvRHih8lh2+51WsYPRsOrVuHpWyI/Fci1WirRReA9AfwHAABQCet5sBM89g5kxmzszIyHBYPMEtjhyvRp+pc7HEwobqN8xYiXs+WOdo+U56qJiNJGJ5d7Y+U+dGHArbErH7FQgWiapSYOZCZq5n5gYAb6LRRJQPoJfu0p5qmpAg5OSXAgDeWrYn5LUrdh/BVznRm2i1gp97akDDF6gkYnRawHS1cjR1WZbD7rsA8PWmpu9KDOvnmCeqSoGIuus+Xg1A80z6AsBkIkonor4ABgAQ52zBElZGAUZB7sJtN+77KDuEPI2lJh0hqqx3/XWKTfnOzRHYJVYdAyLBtU12iOhDABcD6ExEBwD8CcDFRDQcyu8mD8CvAYCZNxPRxwC2AKgDcDczW/fjEwQXMOttZu8v8fscMqpqGNonnnu6sWxGE0LjmlJg5hsMkt8Kcv2zAJ51Sx4hcbES6jpeVr1q20jakfeX/16N6zN7hb7QIou3F4XvthkfX7MQBFnRLEQVt9uMorIqXPvact+kqtXhfSRmAO1e07qFkbd+D+VQfL+9GL9537mJ+d++tw4rdztv+48mMlgJH1EKQtyj71X/e3ke1u49ho/WKB7OQVc7606GakQiGmkY3NrQwLhv9nqs36eEyHhu3lbM31Tg0SyE9br9Zf624BeEqEAi2uATDVEKQkLy5lL/CU0vdlML1gAeOVGDz7IP4s5ZWQCAN5bsxl3vrUOFjZAYgDtT2cHkfu37EGalCHTnN5sPhX+zRyTiLn2iFISEQuvxl1T4x8iP5pyC2ahj35EKPPH5piahsPX2e30MJktl2ZbOXT5dH74n+a/fXeuYHJFuFlSbxOHKRSkIccOsFXm273EqWmYw85JvcjhEhv/7SQ5mrdiLtXv9o6o+93WjScZNz51o9WqD1SBaMkz9dGNE9y/dmbzREkQpCFHBiabgic83G6ZvO1SONyP0f49oojlEXloj2SxN+bmdqGkMkBfNCVEro6UPVu9DRY14gyczrrmkCoIeN9s+zexw55h+5uWHmkh2UEAtr0Bl0TxVUQo1dbG1h/N3Wxsjm67e443XkaxtiB1kpCAkLNV1kfV4rc5DaCODFQERM63cHak3TqJMc67bdyz0RTFIInpTiVIQokK0fjv6hlwfZ8nox2srSqqFa26daS0yi5PfhZ3+dSx7ytTH1uApqRGlICQsdhaAGSmNSBtRK3cnm9XErGcdryOFnYXHvRbBcUQpCAnFj7nG+xmH0/g65cYaKw1/tNxyw6nv9K9DLIqLMlbrEGgyTAREKQhRxc6EYkMYvuL6iJmE4L19fSOpF4tsGoqNrl6TdxRF5YH7FzStT7Rt0gu2FKK4iVyC0Iip9xERPRDsRmZ+wXlxBKGRz7Lzcc3InhHl4VUgvP95fYXpOb3SiXQUYUen1NQ14K731mFgtzaRFRqC8qpa03OJODGbaARzSdXenNMBnA1lzwMA+ClkrwMhCgSuSnaKH3MPY2iPdmjRrHGgvP9YRfCbLCxes4P57m22s7KMNpm790iIukZIEi8GTghMzUfM/CQzPwllF7SRzPwgMz8I4CwAvaMloJC8RNq2EDU1H5VW1uLGf63CXQEhFYwmpVfscs9e7FTjb2tVdswFxRBiEStzCl0B1Og+16hpgmCbcHrVTqItHNtZVB6yYX5m7lbsPXICQPAGNVSVtHuNyvPi6xATjhAMKyuaZwFYTUT/VT9fBeDtUDcR0UwAPwFQxMxD1bT/g2J+qgGwC8BtzFxCRH0AbAWwXb19JTPfZb0aQrwQbytXy6vqQl4TSRsbZ1+HkAQEHSmQ0q2bBeA2AMfUv9uY+TkLeb8N4PKAtAUAhjLzMAA7ADyqO7eLmYerf6IQBEcVSKQ5HXN4fsNqb/1QaVXwfByQRRD0BB0pMDMT0TxmPgOAra2dmHmJOgLQp32r+7gSwM/t5CkIdjByR62xuXRWa7xzi4IsUgrDhdWqvtt9+IStvIMRC6OSWF5VLShYmVNYR0Rnu1D27QC+1n3uS0TriegHIhptdhMRTSGiLCLKKi5O3vC28YYXcwlVuthHWulTP8kBAJRZMAsp90Uut9YYG7XJR07UGKTayDuMa6VZFoJhRSmcA2AFEe0iohwi2khEOZEUSkSPA6gD8L6aVACgNzOPAPAAgA+IqK3Rvcw8g5kzmTkzIyMjEjGEOMSOSclolzBtxXOwSKV2G81Q1xcYmIAaTOphlq5x5Hh8Lzw7VBbcHCZYp2eHlq7ka2Wi+TInCySiX0KZgB7H6i+cmasBVKvHa4loF4DTAGQ5WbbgDXX1DSipsNYjDtyAximMmtpomVOufW05Xr/pLFTpttr83Qfrw8rrrGe+C1uOWBghuPV8k5E+nVq5km9IpcDMewGAiLoAaBFJYUR0OYBHAFzEzBW69AwAR5m5noj6ARgAILJdU4SYYdqXm/Heyn2+z0VBeos7CsudLVzXEpZV1SpmLA+M69n7S7BcXfewMb8Uq/OM9y2wK5qdhr46xvZxECLDLYtsSPMREf2MiHYC2APgBwB58J8LMLvvQwArAJxORAeI6A4Ar0BZKb2AiLKJ6HX18jEAcogoG8AcAHcxsze7fQiO81VOgd/nwjJrJhCjBtJ2e65ev6OwHMOmfdtkf+RQ1DU0oM7huM4vLdzpaH5Weeg/GwC4P78j6yDiGyvmo6cBnAvgO2YeQURjAdwU6iZmvsEg+S2Taz8B8IkFWQTBlGDzDdsOhTcC+dkrPyKjTXrQa6w0glZXE9tddRzOmMfttSKx4OUkhI+VieZaZj4CIIWIUph5MYBMl+USEghPO45a4UajDotNaqioopY8lGKooTwhezALQbAyUighotYAlgB4n4iKADjnPC0IOkI1rzHUtrpCIvSyPZq2STrcMgNaGSlMAlAB4H4A86GEp/ipK9IIQhxizXxkDWlLBau4NQK3MlKYDGAJM+8E8I5LcghJhFsTkcF6p0amokToza4x8WLykkT4XpMZK0qhN4A3iKgvlHUDSwAsZeZsNwUTEgc7w1z9pV6FerartKztxWxxotlmi/rVhoLQFwmCDUKaj5j5T8x8CYDBAJYCeBjA2uB3CYI79H9sXlj3eR1zx63ec6gV0ELikuLSKx1ypEBEfwBwAYDWANYDeAiKchCEuMHKqKOkogajnl0YdtA8J5A2XvAaK+aja6DEKZoLZfHaCjUshSAAAJ6btxUHjlWanj+qC/q2dOdhANuiIJWCzyPV0CXVn/X7S2wrBKu41dbLQjHBaayEuRipBqe7AMAEADOIqIiZL3RdOiEueGOJvYgkimJQKCqrQpe2jdFT9GYeZuBgibmyCcSrTraTpqksm7GBZGSRvHjmkkpEQwHcCOBWANcDyAewyBVphKRj3sbgE6WfrjsQUf6x0ma61XjHSv2E6OOlS+p0KHMILwNYw8zObkElJBzM7PlezFZwLNyDg2EubCNaIWlx6ydmxXz0EyJqCWW/A1EIgqM0adPI/1yk7XaQKBeCIBhgxXz0UwDZUFYzg4iGE9EXLsslxDi5ReUY+qdv8O6KPNfKqK1rwPMLdli+3u1Ab2Y8MicnZNli+xfiBSthLqYBGAWgBADURWt9XZNIiAumzFqL49V1+OPnmyPKJ9gIePH2Ilt5nfYH84jubiuMw8cj21ZTEOzjXeyjWmYuDUiTfo/gCE6+SDa3SmhSdiQ/Ma9WXwvJi2eb7ADYTES/AJBKRAOI6B8AlrsjjhA3OPRC/mNRLvYfrTA852QzGw8T3+EgykhwGitK4XcAhkDZQ/lDAKUA7nVTKCH2caqJPXqiBne8swazV+/D5oP+A1IzZWGHRA8DseewRLFPVtzq5liJfVTBzI8z89nMnAngXSjbaoaEiGYSURERbdKldSSiBUS0U/3fQU0nInqZiHKJKIeIRoZbKcFb7LbDVbUNmPrpRkx8eZnfi+6EnX7RtiJVptBRUn/z3rqIyzPDrTkNmcsQnMZUKRDRMCL6log2EdEzRNSdiD4BsBDAFov5vw3g8oC0qQAWMvMANa+pavoVAAaof1MAvGa9GoLGviMV+GRtZAu+rBAv5pi6eqUxttIkV9bKjmRC/ODFnMKbAD4AcC2Aw1DcUncBOJWZX7SSOTMvARAY8H0SGvdleAfAVbr0WaywEkB7IupupRyhkate/REPqhu06/lsfT4OlVaZ3vd5dr6bYgXFTbt4MA+m7216N0VCYhuxBC9wK/JvMKWQzsxvM/N2Zv47gBPM/Agzm7cs1ujKzFpsg0MAuqrHPQDs1113QE3zg4imEFEWEWUVFxdHKErisGhbIfYfrfALPqdRUVOH+z7Kxi/eXGl4b119A+6dne2YLN/vKMLeI+HZusur6hyTAwjukeRknUOR4FMbQgIRbEVzCyIagcb5jGr9Z2aO2ADLzExEtn4uzDwDwAwAyMzMlJ+ayu1vZ6F5qrGO1xrGwjJjfR7Olxisj3L721kAgLzpE23n+9RXVi2T8cX2wnKvRRASDC/CXBQAeEH3+ZDuMwO4JMwyC4moOzMXqOYhbQyfD6CX7rqeappgEbfCPgeycvcRFJVbi55uJ8ppIlNZI/MVgrNEXSkw81h3isQXUCKuTlf/f65Lv4eIZgM4B0CpzswkeEBtfQOW7CjGuEFd/dInzzA2QxlRXRdaUe0/6r7i8Np8I+sJhHjByjqFsCGiDwGsAHA6ER0gojugKIMJRLQTwHj1MwDMA7AbQC6USe7fuimb0IhZg/n373bgjneysHSnzN0IQqzh1kSzldDZYcPMN5icGmdwLQO42015kgknwlfvU3vwRpPXgj28HqkICYiHYS6EBMfMtOFV1NFExG5cJkHwCtORQqgVxU54HwmxgbT9giBoBDMfPR/kXCTeR0KcEKn5aXfx8YSPPWQVGXUJThP17Thd9D4SogBzo8ua3QYpv6QSXdqkN7mvpKLGlqK45PkfMH5QF1tlC4JgDbdCzViaaCaioQAGA2ihpTHzLFckEqJOoM64YPoi3HhOb99n7eUb/tQC23mv2hMY5SRBkYGAEGU8i5JKRH8C8A/1byyAvwL4mUvyCC4QTo/ihx2NbqiRmD7qZYYVQOKH8BYSByveRz+H4kJ6iJlvA3AmgHauSpWEVNXWo9bBFcn6Jijk/sEudnMrZCWvIMQVVpRCJTM3AKgjorZQwlL0CnGPYJOBf5yPn7/mzYZ2oTqx8RIm20tqQ4yIZKAgOI0XsY80soioPZRVxmsBHIeySllwmA0HArfCdoZwGnXRA/aoCrEXg5iPBKeJuveRBjNr4SZeJ6L5ANoyc45L8ggOoZiMrL02Rs2VW0voExVtMx8zdhXLtplCfGBlonmhdszMecyco08T4h/xoY8cmVAXoo1bZt1g23G2IKKOADoTUQd1b+WORNQHBpvfJBPHq+vQZ+pcvLsiz2tRALjXqEszZ526huiELRcEDS9cUn8NZQ5hIIB16vFaKKGuX3FJnrigWN1L4K1lezyWRCFSnRDqdjEkheZnr/zotQiC4AimSoGZX2LmvgAeYua+ur8zmTmplUI8cOrjX2O1xYVjRkpFPzKVEYMgxB5pqdHfo1njDSL6PRHNUf/uIaJmrkgTZ3jdWN4yczV+/+F6Uzlmr9kXdt4yOhCE2KbDSc1dydeKUngVwFnqf+34NVekiRNipcFcsqMYX2w4GPmcQqh1CpHlLgiCG0R7nQIRpTFzHYCzmflM3alFRLTBHXGEcAilEsJZ0Zx3pAJDesjCdUFINoKNFFar/+uJqL+WSET9AIQdu4CITieibN1fGRHdR0TTiChfl35luGUkG655lHptHxMEwRS31hIFUwpaiQ8BWExE3xPR9wAWAXgw3AKZeTszD2fm4VBMURUA/queflE7x8zzwi0jWjADq/ccxXVvrLAVt2jl7iOYPGMF6hyKdRRp7CJZpiAIgkawFc0ZRPSAevwGgFT1uB7ACACLHSh/HIBdzLw3XuPrPPSfDdh3tAIHSypxSqdWlu65/6NsFJRWoai8Gie3bxmxDKEade30iZp6lFfVok2LZobnmxCfj0QQkgK3msxgI4VUAK0BtIGiPEj9S1PTnGAygA91n+8hohwimklEHYxuIKIpRJRFRFnFxcVGl8QNrnfQDQr4ZnNhRPcLgpDYBBspFDDzU24VTETNoezL8Kia9BqAp6E0RU9D2Q709sD7mHkGgBkAkJmZ6UmzFamGdlrBR+x8FCKDOB3ECUJC48WKZrebgisArGPmQgBg5kJmrlfDdL8JYJTL5UdM5LZ8Z3SaqRy+7ThD3R/ivIwYBCFpCKYUxrlc9g3QmY6IqLvu3NUANrlcfthEOuvv9PzJ4fIaR/MTBCH2ifqcAjO7trkuEbUCMAHAp7rkvxLRRiLKgbLt5/1ulR8tGhoYf/tmO4rKq1wt5/HPNkZ0f+hNdiLKXhCEOMLKJjuOw8wnAHQKSLvZC1kigbmxwTRqWNfkHcUri3OxMb8U79weuTXsqS+3oK6hAU9NGuqXXl0XwrU1pPlI7EOCEG94sU5BiJB6VVNU1xmv9bNrq5/54x7MWrHXIB9p1AVBcAZRCjbYd6QC/11/wPBcNE0s76/yVwxm+7ss2VEMZsa/l4cI8S06RRDiDi/WKQgBTPrnMtz/0QbDhxGss+50R/7x/27CviMVvs9m+/8ePl6DlbuP4u/f7Qyan+gEIV65ekRS7/flCqIUbHCsotbvc8gJWgOb34nqOuSXVEYsS61up69gcgSarux0LmSuQYh1ktl06sU6BcEFpn2x2fE8g/0wrLi/hqPcBCEWSF6V4B6iFMLAqBHdc+SE+fW64yJ1K0+zfMJhw4FS03M7DpWHvN9sRKANRmTEIAgxiEuTCqIUwsCokbzt32ss3Rttn/9n520Nej6/pBKb8ssMz83ffMgNkQTBMZLYeuQaohTCQP8iWmnkzS6x2gMvr6rFPxb6TxbXhFqbYJELpi/CnbOygl7zY+5hR8oSBKf56Zkney2CZ7jVv/Rk8Vq8o2/KrfRU7HRmKmrqcFJz/8dy1jPfNVECT37p/NyEGR+u3o+xp3eJWnmCYJUJg7t6LULCISOFMLDq8WA0itAnHa+uw85Cf5t/bX3TvI1GBblFxy3J4BS/fX9dVMsTBCE45/XvFPqiMBCloLK1oAz1ZqvAAnDKjHnrzDWY8OKSsO6N9qZEdRa/G0EQosO5/UQpuMbmg6W44qWleGVRbtDrgsU5skJhWRWOV9f5Ph8+Xt30IpfbXgluJwhCMGROAUBBiRLFNOdAicU7GltuS42sevk5f15o4VKLpilLVxnkLx1+QRCCIErBBgSlfdcsKcrcgjdd75QIu/zlVbVRN0EJghD7iFKwAREBzKaxhsxvtH5pYNab8o0XplXU1Bmmh8wfwNETNRj59IKw7hcEIbGROQUbaG27LuyQoTmmqrYetfX6i8Iv08zLqKwqPKUwc9keUQiCIJgiSkFHqLbbN9Gsu7Kuvqm76MA/zsc1ry53xLDktIVnS4Hx6mVBEPwZ2qOt1yJ4gmdKgYjy1O03s4koS03rSEQLiGin+r+DF7Ixc9C1CPpTB0urdOmN9200MfuYlec7tiGnIAju8ZNhsbtaekCX1q7l7fVIYSwzD2fmTPXzVAALmXkAgIXq56jz3qp96PvoPBSX+7uMatFCzfTFc19vQ99H5zVJl4ByghB/TBndD7+6sK/XYhgy567zXcvba6UQyCQA76jH7wC4ygsh5qxVdlf7bH0++kyd28RVVZtoDmzqZyzZ7ffZqnePXskEjlDunZ1tKQ9BEJwlJYVwdt+OXovRhBWPXoJ2JzVzLX8vlQID+JaI1hLRFDWtKzMXqMeHADQJbEJEU4goi4iyiouLHRWIAPSZOhcb9pcAABZuKwQALN15uPECmO905gQyphAEIRgdTmruav5euqReyMz5RNQFwAIi2qY/ycxMRE3aSGaeAWAGAGRmZrrahga2/T7vI4ulWo2RlLX3mGmZgiB4R0MMhndxu43wbKTAzPnq/yIA/wUwCkAhEXUHAPV/kVfyAU177Y3WIGefymfZ+boyY+8lFIRkpT4Je2meKAUiakVEbbRjAJcC2ATgCwC3qpfdCuBzt2Q4WFKJPlPnIls1FRnCmrz+yY0rmoOXYXVOwe+q5HsHBSFmsRokMxhXDO0W1n2f332BYbrbgQi8Gil0BbCMiDYAWA1gLjPPBzAdwAQi2glgvPrZFZbuVOYjPli11/SawF57KO+jcHl/1T5dmYLgPS2axZoPijfUGYSyt8P0a87Ak5OGhHXvmb3aR1R2uHjy5Jl5NzOfqf4NYeZn1fQjzDyOmQcw83hmPuqFfI1y+n+mgIlmq6YeO0okCUerQgySGoW4WOc47Nlz77gBjuYHAM3SmjaRPdq3tHz/5FG9fZ3JeEG6A0HQTwAD+olmVv8Hvz+c39V/1+fj5rdW2b9REBzEqDF0mtO7tXE0v0nDrS0269jKuvfOxDO6N0nrl9HK8v1u0KJZqqv5i1LQETrMhb/5yKpnQtbeY1i83dqc+V/mb2t0gRUEjzijRzuvRbCN1Tm89i2t+/inpsRWL/+7B8a4XkbSKoVIzDTavUdO1Fi+5853smyXU1BaafseQYiUISe39Wtgf35WT1fKia3m1j3sWAxaNTceBaSrI7d+nd0Lb6GRtEohHALNR24Taic4wVvSYqwX6RSBjdjJNmzoXhL4NEwnaqP82Jwo7vN7LsDDl52OlCi8c0mrFCKZR4vWXHCkG+kI7hKuV0kyEUxxur3J089H9nAlX7ty25nDMMt7YLe2uHvsqbbKDZekVQrhEOh95CZHT9Tg3ZXm7rJC9Pn1mH5eixB3/Oeu8zwr22zKz6xJt+pVZHeEaEeJvDR5uN/nByachtlTzrVVXqQkrVIIp11vnGh2Xyn85ettoS8SokvAb3vScHd6ol5DIMfe8SEnOzdh/e/bzg56PrDtNarDNSN64J83jjS8Pz0tBZ3UXv1bt2YaXgOYTz5fZcH7qWvbdNNzn/72fPTueJJf2u/HDcC5/TqFzNdJklYpaITjQ2z19xLJ4Dha8xaCdQLfldbp4YcOS4+Cy2cs4KiFyOZPwujyF64fjoHdjDfPaZ6W4pP3jJ7myiySuSTtZx34vYwe0Bkje3dwfbWyFZLjzQyCnVhDjeYjl4TRISohfP4wcZAr+Tr5gz2/f3R7f/HE+f07YcjJSsM90MZahsB+lJ3faadWzfHcNWf4Ppt1Fiee0d10pGDnNxuqz9ffw7UQSasUvt/eNOz2stzg6wO0OCgVNeHtj2yH/GPijhouk0f19lqEkLRKT8N5OrPA/eNPc7yMcH3siYBHLhto6577x5+GGwK+98xTOoQ1Wv7gznPRLFVpmv58zRm46LSMMHKxZ+b94M5zMaJ36I0e/3njSPzeZOW0E4P7Xh1PwqDubfH0VUMjzyxMklYpzN98yHes9QBr6prut6ynvEpRBi8u2GGpjEhGFCt2Hwn/5iTnJJdWfDo5sk8h8ht53Dve+RANkcwLnNGzHe6zIdO94wf49bQBoHWLtLA9jPSSh8oi85QOePqqoU166rbCywTcHcyCcFpXZ1di60lPS8XX947G+f07u1ZGKJJWKWiEM6egKYdQ6Fcx18VgXPZEJRJf7l4dzT1QwmnfLh9iHCHzjgv74s4Y9WZ6YILzoxYjrHyf+kvMGuo5vzkfN597ShMlGF4YevsPOdBjyAjNLdVslBFLJL1SsPLiBCoOq5PAr32/KyyZBO/46p7RpufC6UCYNXxn9mqPEeriqrYtvNzrqikXn96lSdrvLjH3kR/Ru71hOrP9JvbSwU02WzTM477xA3BKJ39PnUhGCr06aHmF33ljAE/8ZDA6GGyV2UZ9xvEwl5T0SgEI/fKs23cMu4qP+z5Lpz9xCbb3bTgjhZvPPcU8P5Mm86Mo+6VbYfygpo21htkiS4bxdzb1CmW+IpiS/c1FyiiqX4ZxWIf7xp+GHx4e619ewO/Sys909IDOyJs+Ea0CPMnCjWx6+4V9sf6JS8O6N1YQpWCBBVsKMe75H3yfSytrPZRGiCfOPzW0bditPsbDNieLA7l0cDf1v7lCAMwntM3mNII1t9odlw/tjrzpE9FOF7zO7hRJtN26g83hNLqixoDPaQiSXinEW6zzZOOyIcEbJDf489X+E6bahKvRm2JlrcLks3vZKt+Jpixv+kT85uL+pgHWrDD45LbImz4RQ3u0CzpKuslkNHTDqN6ONILW8wiYU4gk6KUur36dg7uH+ha16tLO798JJ7dr4ft85+i+AICMNuaL12KFqCsFIupFRIuJaAsRbSaie9X0aUSUT0TZ6t+V0ZKpoKwqWkUJIWidnoa86RN9nx+70p01B8Ew7WEaNE6bnrzM73OgQgH8t3TMaJOOwd2NF0/FAlZW5erp2jYdPzuz6T150yfiSoO9CPS43Wl2alX2oocuNj03zGSR2wd3novlj47zfb75vD7Imz4xogWP0cILCesAPMjM69R9mtcS0QL13IvM/LdoCsNg/PGzTdEsUghCEw8Shy0AedMn4sK/LMKBIOtA9EWOG9jFUIaz+4T2adfQz0GteXx84weXGsX2unkROz11vTK2SnjhYsLLL1RZTeYUInh3rFoQvrjnQnyx4WD4BanEUsiUqI8UmLmAmdepx+UAtgKInW9EiCnSvdgrWG1NbjynN/51a6ZPSeibCX1j2z7I5LSSXex6JlgNtmbWSFqpWfYTE2xI1DRHbeP7AV2D7yUQ6AByjoMxg1o0SzE1/YSj2288p3Gh357nrnRtz4pw8HROgYj6ABgBQNt/8h4iyiGimURk2BUjoilElEVEWcXFTVcl22Vn0fHQFwmuEehWGNiz1Va2RhOtcUlNIUUetVHXizZaN4Gcpe/968h99goA7k0kj7Kwx3GoxsZqg9a1Xfi28PYnNcf2Zy4P+/7/yeyF7c9cjlM62Qv9MKpvR7z/q3MMz2lmnFCT6Bqbpl2GFVMvCX6RxQe945kr8PSkoRg/SHH9jbXJZ8+UAhG1BvAJgPuYuQzAawD6AxgOoADA80b3MfMMZs5k5syMjPCWv+tZv68k4jyE8Hn/V+f4+XXHQq96aA/F5q9Fp2wcKRAGdFF6q5fqFqWl6RSX/vetpV8QygMpzCp/8KtzsO3p4I3tEz8ZjC1PXdYkfaS6tsBqg9SlTQtsfvIybHryMowe0Fgfq48rPa1xwjsc5w79/WYYrTky61Ssf2IC1v5hvOkkeSBpqSl+z1mP9hVaXSzXPC0FKSmE1286C1ufCl9ZuoUnSoGImkFRCO8z86cAwMyFzFzPzA0A3gQwygvZhOgxcVh39OxwkqUffDQ565SOWPfHCb6J0uHqIrNhvRonFc3aUqPN6M1663Y7iGsCRiRpqSkhN3FPSSGc1FzpFV9waqM55ZKBSi+1u85DJhSt0tPQOj0wdIV1jdatrX9ZRtV3qk+guclqnYxhPdv5KcdmqSno1Dq9iVK8TFX2J0XgtWWVtNQUtIxCOXbxwvuIALwFYCszv6BL17sqXA1AZn9dYkOMLK7RNlAfO7DpCto2LnppjDVYsRuIfrescYO6YtVj4yzdN8JsC0gDmqs9z/EBJoxubVv4evJ6Qrkz/vDwxabn1v9xAmb+UtmPYMjJbfHbi0/F6sfGoVdA/H4r6EdzgY34xmmXNvHI0lj80MXY8KdLMbC7ojiHGXxXVkxi5nIp/3u0b4kNf1Lecc0U2KJZqk85BuPJnw1B1h/GN1nMlkx4MVK4AMDNAC4JcD/9KxFtJKIcAGMB3O+BbElBsFW70URrT56aNMTXoF10umISXP7oJVj3xwmWDA3XjrQ2SaeZPZ746WA/3/OHLzs95L1d21rrUQczxwSu8WrRLBUrHr0Ef7l2mF96n86t8MGd9lc1t21h/lw7tGqO9LRUrHl8PObcdT5SUghdLNYpkMxTGhvuwI59mxbNTN0uWzZPRbuWzTB6QAaWPjLW0JV1SgTxoDRX4jYt0nwyaCYdq4OytNQUdG5tbf7kVNWUqJnDYsDy6QhRV4fMvAzGz2hetGURvEX7ETVLTcEpnVphycNj0UXdmaqN2sAdPVFjeG9aCvmCDLZKtzYEn3Fzpq+8ub8fjdvfXoMVu48gw2IjEAlrHh/vGxno6d7OOABfoFnoQgsro61gNtpolmrdlvW7S07Fi98pkYLPC9PDJ3CEsuThsWjfqpnji926tFEUn5Ww2HZY8eglvnc0xuaJIyZ5x0hCUFo2S0Vlbb2rZZwW4GLYu5N1U0ZaaqNS0P8mZ93uPxX1zX1jcNnflwCAn/22ZfNU9OrYEit2hxtN0x5WVrKuemycaRyhGbec5bRIPpZPvQQtbYQb10ehff66Mx2RoUvb9JDzI6Ew6qmf2qU1vrlvjOOb1hgp80QZKSR9mAvBGLd7P4O6t8Uvz+8TWg6TdP3ev/qeYeAet0YTvxra8L+bSW89Es60Mbeg0bVtC1PlYcUe3lzd4nO4zbJPbt8SHXRzKHaItCF3g8B35vRubUw9h5xAMy0GdnLiFVEKCU6gvbyNxTDNbo+IR/Zub8lU0CxgL+Mz1bACz1w1FH3VeQF977pPiDg1en51YT98NOVcXHRaRtAN1fUMVENUBJuInPv7C/HuHdF3nmuVnoYv77kQ//yF8cb0sYyTnZBom3POOqUD/nPXeXGxV4IVxHyU4Jx1SqMtddbto3yeH8FYcP8YXPHSUkfKf2rSELRr2Qz3zs72SzczkwTSOj0Nn/zmPKQQIaNNOjq3Tsf6fSUY1L0tbhjVC3+et803gWtksw9GSgr5Vr3O/f1oHCoNHQPrr9cOw03n9EaP9v6jiwX3j8FJqqLQj2IiYcH9YzDhxSV+ad/cNwYVNXV+prBFD16EtBSl7sE2nI9lnHBL1hR1f5Nw225ydp/wvaZiDVEKCcj0a87A2r3H8J+1B9A6PQ3L/ncsSipqMbRHY4Px+k0jkZqSgvqGBtz13joAwKs3jsSI3u3RvV3LsHaK+9ctmfjVrCy/tG5tW2BYz/ZNrrXTmzvrFP8f3HnqRiWaDTclhfD53ReYegh9c98YpKcFVxidW6db8jpp2TzVMHzCABe2aNTy1IemNjKHme05EA98e/8Yx1at9+3cCu/eMcqvIyTYJymVQkmFsUdLojB5VG9MGt4DEwZ39SmCngG/k8uHKstCVun2gg4V1TIUgf72r944EhMGd/XzIOrcOh2Hj1ejiwMhhDW9RRTchh9sXiHWmXPXeeje3vk5j1jB6f2ORw+IPMpBspOUSkG/i1q80iyVUFtv3ptv2TzVLxSDGdoE5k+GWVMIix+6GGP/9r2lazUl00nXA3/26qGoqKnDT4bZC9FshLYBS8eTmk6SLnrwIhwxcWeNJzJj1Cyx8MGLcCwBvl+hKUmpFOobvC3/DxMH4Zm5W/3SrhnRA5+uz7ech7JgRlEK944bgJcW7gxLljN6tsML153ZRIG0ap6KEzX1+GjKuSgqr8bvPlyP0QM6o2/nVvj87gvQwIw9h0/ggY83GOb75i2Zfp/HD+qC77YWgQBcPcKZiJDXn90LRMD/GISR6JfRGv2k0+ga/TNaAx58vwsfvAgFJbL/iZskpVKoa/BWK4wf1BUDu7XFTW8pwWEfmHAa7h57Kj5dn+83AvjugTHYe6QCd7yj2Ombp6Wgpq6p7PdPOC1spQAA1xisCJ5372hsLSjz2c8ra+oxQTUPaaaaEb07oFlqCjqc1BxVAWsaJliMPhkJqSmEG0b1Dn2hkDD0z2jtyURyMpGUSiGYPfuMHu2wMb/Ucl5/ufYMtE5vhtO6tsb0r7dh4bYinN+/E35xTm/cOzvbb9ctPRfqIk1qrmwvTR6OYT3b+8wzp3Zpg1O7tMGbt2Ti5PYtcFLzNGTlHcXDc3LQoVUzvH7TWb79ov/9y7Ox+/AJDHLIfn5Kp1Z+oYqvM9lS8qcGoQqM0FZ/BrqYCoIQWySlUuhjEJf9usye+HrTIfznrvNQWFaFj9bsx6vf7wKgLGmfv+kQBnZri3X7jqFnh5Z4+qutOHy8Gj8982SfXX7GLZl4/Ydd+OX5fdAqPQ0Du7XFrBV5OFRahV9f1B/XvrYcQOMio3/dkum3QYvZ7kv6Xnffzq1QU9+A0adm+K0AHjuwC8ZG+L04wWd3X4CCkqa7mk372RAM6NoaF8lEoCDENBQL8evDJTMzk7OyskJfaECfqXMBAF/97kJsPliK689uaobQrjHapnDvkRNYsKUQvxptPYDX6L8uwv6jlch+YgLaG0yOany8Zj+G9GjrmL+7IAiCHiJay8yZhueSVSnsLCzH99uLcWeQqIzzNx1Cs1TCuEHO2Md3Fx/H/M2H8NuLT3UkP0EQhHAQpSAIgiD4CKYUZNZPEARB8CFKQRAEQfAhSkEQBEHwEXNKgYguJ6LtRJRLRFO9lkcQBCGZiCmlQESpAP4J4AoAgwHcQESDvZVKEAQheYgppQBgFIBcZt7NzDUAZgOY5LFMgiAISUOsKYUeAPbrPh9Q03wQ0RQiyiKirOLi4qgKJwiCkOjEmlIICTPPYOZMZs7MyJCQCYIgCE4Sa7GP8gHoI6/1VNMMWbt27WEi2htBeZ0BHI7g/lghUeoBSF1ilUSpS6LUA4isLqeYnYipFc1ElAZgB4BxUJTBGgC/YObNLpWXZbaqL55IlHoAUpdYJVHqkij1ANyrS0yNFJi5jojuAfANgFQAM91SCIIgCEJTYkopAAAzzwMwz2s5BEEQkpG4m2h2mBleC+AQiVIPQOoSqyRKXRKlHoBLdYmpOQVBEATBW5J9pCAIgiDoEKUgCIIg+EhKpRCPQfeIKI+INhJRNhFlqWkdiWgBEe1U/3dQ04mIXlbrl0NEIz2WfSYRFRHRJl2abdmJ6Fb1+p1EdGuM1GMaEeWrzyWbiK7UnXtUrcd2IrpMl+75+0dEvYhoMRFtIaLNRHSvmh5XzyVIPeLuuRBRCyJaTUQb1Lo8qab3JaJVqlwfEVFzNT1d/Zyrnu8Tqo6WYOak+oPi6roLQD8AzQFsADDYa7ksyJ0HoHNA2l8BTFWPpwL4i3p8JYCvARCAcwGs8lj2MQBGAtgUruwAOgLYrf7voB53iIF6TAPwkMG1g9V3Kx1AX/WdS42V9w9AdwAj1eM2UNYHDY635xKkHnH3XNTvtrV63AzAKvW7/hjAZDX9dQC/UY9/C+B19XgygI+C1dGqHMk4UkikoHuTALyjHr8D4Cpd+ixWWAmgPRF190A+AAAzLwFwNCDZruyXAVjAzEeZ+RiABQAud114HSb1MGMSgNnMXM3MewDkQnn3YuL9Y+YCZl6nHpcD2AolzlhcPZcg9TAjZp+L+t0eVz82U/8YwCUA5qjpgc9Ee1ZzAIwjIoJ5HS2RjEohZNC9GIUBfEtEa4loiprWlZkL1ONDALqqx/FQR7uyx3Kd7lFNKjM1cwviqB6q2WEElJ5p3D6XgHoAcfhciCiViLIBFEFRsLsAlDBznYFcPpnV86UAOiHCuiSjUohXLmTmkVD2mribiMboT7IyboxL/+J4lh3AawD6AxgOoADA855KYxMiag3gEwD3MXOZ/lw8PReDesTlc2HmemYeDiXu2ygAA6MtQzIqBVtB92IFZs5X/xcB+C+UF6ZQMwup/4vUy+OhjnZlj8k6MXOh+kNuAPAmGofpMV8PImoGpSF9n5k/VZPj7rkY1SOenwsAMHMJgMUAzoNiqtOiT+jl8smsnm8H4AgirEsyKoU1AAaoM/rNoUzQfOGxTEEholZE1EY7BnApgE1Q5Na8PW4F8Ll6/AWAW1SPkXMBlOpMArGCXdm/AXApEXVQTQGXqmmeEjBXczWU5wIo9Ziseoj0BTAAwGrEyPun2p7fArCVmV/QnYqr52JWj3h8LkSUQUTt1eOWACZAmSNZDODn6mWBz0R7Vj8HsEgd3ZnV0RrRnF2PlT8onhQ7oNjrHvdaHgvy9oPiTbABwGZNZij2w4UAdgL4DkBHbvRi+Kdav40AMj2W/0MoQ/haKPbNO8KRHcDtUCbNcgHcFiP1eFeVM0f9MXbXXf+4Wo/tAK6IpfcPwIVQTEM5ALLVvyvj7bkEqUfcPRcAwwCsV2XeBOAJNb0flEY9F8B/AKSr6S3Uz7nq+X6h6mjlT8JcCIIgCD6S0XwkCIIgmCBKQRAEQfAhSkEQBEHwIUpBEARB8CFKQRAEQfAhSkEQdBBRvS6yZnaoaJlEdBcR3eJAuXlE1DnSfAQhUsQlVRB0ENFxZm7tQbl5UHz/D0e7bEHQIyMFQbCA2pP/Kyl7WqwmolPV9GlE9JB6/HtS4vrnENFsNa0jEX2mpq0komFqeici+laNm/8vKIvDtLJuUsvIJqI3iCjVgyoLSYooBUHwp2WA+eh63blSZj4DwCsA/m5w71QAI5h5GIC71LQnAaxX0x4DMEtN/xOAZcw8BEosq94AQESDAFwP4AJWAqPVA7jRyQoKQjDSQl8iCElFpdoYG/Gh7v+LBudzALxPRJ8B+ExNuxDAtQDAzIvUEUJbKBv2XKOmzyWiY+r14wCcBWCNEtYHLdEYlE4QXEeUgiBYh02ONSZCaex/CuBxIjojjDIIwDvM/GgY9wpCxIj5SBCsc73u/wr9CSJKAdCLmRcD+F8oYYxbA1gK1fxDRBcDOMxKvP8lAH6hpl8BZStLQAlG93Mi6qKe60hEp7hXJUHwR0YKguBPS3XnK435zKy5pXYgohwA1QBuCLgvFcB7RNQOSm//ZWYuIaJpAGaq91WgMdTxkwA+JKLNAJYD2AcAzLyFiP4AZZe9FCgRWe8GsNfhegqCIeKSKggWEJdRIVkQ85EgCILgQ0YKgiAIgg8ZKQiCIAg+RCkIgiAIPkQpCIIgCD5EKQiCIAg+RCkIgiAIPv4f8FOBVzpIQfkAAAAASUVORK5CYII=\n",
92 | "text/plain": [
93 | ""
94 | ]
95 | },
96 | "metadata": {
97 | "needs_background": "light"
98 | },
99 | "output_type": "display_data"
100 | }
101 | ],
102 | "source": [
103 | "import numpy as np\n",
104 | "import gym\n",
105 | "from dezero import Model\n",
106 | "from dezero import optimizers\n",
107 | "import dezero.functions as F\n",
108 | "import dezero.layers as L\n",
109 | "\n",
110 | "\n",
111 | "class Policy(Model):\n",
112 | " def __init__(self, action_size):\n",
113 | " super().__init__()\n",
114 | " self.l1 = L.Linear(128)\n",
115 | " self.l2 = L.Linear(action_size)\n",
116 | "\n",
117 | " def forward(self, x):\n",
118 | " x = F.relu(self.l1(x))\n",
119 | " x = F.softmax(self.l2(x))\n",
120 | " return x\n",
121 | "\n",
122 | "\n",
123 | "class Agent:\n",
124 | " def __init__(self):\n",
125 | " self.gamma = 0.98\n",
126 | " self.lr = 0.0002\n",
127 | " self.action_size = 2\n",
128 | "\n",
129 | " self.memory = []\n",
130 | " self.pi = Policy(self.action_size)\n",
131 | " self.optimizer = optimizers.Adam(self.lr)\n",
132 | " self.optimizer.setup(self.pi)\n",
133 | "\n",
134 | " def get_action(self, state):\n",
135 | " state = state[np.newaxis, :]\n",
136 | " probs = self.pi(state)\n",
137 | " probs = probs[0]\n",
138 | " action = np.random.choice(len(probs), p=probs.data)\n",
139 | " return action, probs[action]\n",
140 | "\n",
141 | " def add(self, reward, prob):\n",
142 | " data = (reward, prob)\n",
143 | " self.memory.append(data)\n",
144 | "\n",
145 | " def update(self):\n",
146 | " self.pi.cleargrads()\n",
147 | "\n",
148 | " G, loss = 0, 0\n",
149 | " for reward, prob in reversed(self.memory):\n",
150 | " G = reward + self.gamma * G\n",
151 | "\n",
152 | " for reward, prob in self.memory:\n",
153 | " loss += -F.log(prob) * G\n",
154 | "\n",
155 | " loss.backward()\n",
156 | " self.optimizer.update()\n",
157 | " self.memory = []\n",
158 | "\n",
159 | "\n",
160 | "episodes = 3000\n",
161 | "env = gym.make('CartPole-v0')\n",
162 | "agent = Agent()\n",
163 | "reward_history = []\n",
164 | "\n",
165 | "for episode in range(episodes):\n",
166 | " state = env.reset()\n",
167 | " done = False\n",
168 | " total_reward = 0\n",
169 | "\n",
170 | " while not done:\n",
171 | " action, prob = agent.get_action(state)\n",
172 | " next_state, reward, done, info = env.step(action)\n",
173 | "\n",
174 | " agent.add(reward, prob)\n",
175 | " state = next_state\n",
176 | " total_reward += reward\n",
177 | "\n",
178 | " agent.update()\n",
179 | "\n",
180 | " reward_history.append(total_reward)\n",
181 | " if episode % 100 == 0:\n",
182 | " print(\"episode :{}, total reward : {:.1f}\".format(episode, total_reward))\n",
183 | "\n",
184 | "\n",
185 | "# plot\n",
186 | "plot_total_reward(reward_history)"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "id": "2acaed03",
192 | "metadata": {},
193 | "source": [
194 | "## ch09/reinforce.py"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 4,
200 | "id": "7d956534",
201 | "metadata": {},
202 | "outputs": [
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "episode :0, total reward : 13.0\n",
208 | "episode :100, total reward : 48.0\n",
209 | "episode :200, total reward : 36.0\n",
210 | "episode :300, total reward : 26.0\n",
211 | "episode :400, total reward : 48.0\n",
212 | "episode :500, total reward : 33.0\n",
213 | "episode :600, total reward : 193.0\n",
214 | "episode :700, total reward : 58.0\n",
215 | "episode :800, total reward : 124.0\n",
216 | "episode :900, total reward : 200.0\n",
217 | "episode :1000, total reward : 108.0\n",
218 | "episode :1100, total reward : 200.0\n",
219 | "episode :1200, total reward : 200.0\n",
220 | "episode :1300, total reward : 176.0\n",
221 | "episode :1400, total reward : 122.0\n",
222 | "episode :1500, total reward : 200.0\n",
223 | "episode :1600, total reward : 196.0\n",
224 | "episode :1700, total reward : 200.0\n",
225 | "episode :1800, total reward : 200.0\n",
226 | "episode :1900, total reward : 170.0\n",
227 | "episode :2000, total reward : 200.0\n",
228 | "episode :2100, total reward : 200.0\n",
229 | "episode :2200, total reward : 200.0\n",
230 | "episode :2300, total reward : 200.0\n",
231 | "episode :2400, total reward : 200.0\n",
232 | "episode :2500, total reward : 200.0\n",
233 | "episode :2600, total reward : 200.0\n",
234 | "episode :2700, total reward : 200.0\n",
235 | "episode :2800, total reward : 133.0\n",
236 | "episode :2900, total reward : 200.0\n"
237 | ]
238 | },
239 | {
240 | "data": {
241 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA5TUlEQVR4nO2deZwU1bWAv9OzMTAzDDADDJvsAsqmI6vggiLgnhiV5EXjRjS4JMYkbknM85mY/WleXgw+fWqiJsb9GbMYNUETN1BEVBBQEJBV2WSHOe+Prh56ZnqpXqqruvt88+vfdN+qunVuLefce+6954qqYhiGYRgAIb8FMAzDMIKDGQXDMAyjGTMKhmEYRjNmFAzDMIxmzCgYhmEYzZT6LUAm1NXVad++ff0WwzAMI6+YP3/+JlWtj7Utr41C3759mTdvnt9iGIZh5BUisjLeNnMfGYZhGM2YUTAMwzCaMaNgGIZhNGNGwTAMw2jGjIJhGIbRjGdGQUR6i8jzIvKOiLwtIlc56Z1F5BkRWer87+Ski4jcLiLLRGShiBzhlWyGYRhGbLxsKewHvq6qw4BxwGwRGQZcCzyrqoOAZ53fANOBQc5nFvArD2UzDMMwYuDZPAVVXQusdb5vF5F3gZ7A6cCxzm73An8HvuWk36fhWN4vi0itiDQ4+Rhpoqo8vmANJx3Wnfblpbz+4WaWrNvOoK5VPPL6ai6Y2I8Fq7awcPUWTh/Vk7KSEKUh4fCeHVm+8VMemb+aDzbt4LUVm2lS5Q+Xjuf+lz+kS1U5Y/t15id/XUKPjpX87d31fLpnP6P7dOKovp3ZtXc/T775EZt37muWZeaY3jz46qrm3x3KS6htX87arbuoLCuhf30VQxuqeWje6uZ9yktCVJSG2L5nf4tyjezVkb0HlKYmZcn67dRVVbBt9z727m9qcw26dCjn4x17OfKQTsxfuTnutappV8q23fspKxH2HdDm/8koCQnty0vYvjssY8/aStZs2dVin4rSEHv2N3H8kK68uGwT9VUVbfaJvk6rN+/ihaWb6NWpktWbD+43pHs1i9dtB+CwHjU0dKxk5cc7WLrhU7pWV7Bh+x4AqitK2bF3P5dM7s+v//F+i/wnDOjCv5Z/DMCgrlWs27qb7Xv209CxHScO68ZD81ZRFgpxeM+OvLbiE8YP6MILSzcB0NCxHRu372F/U8vrUllWgqJMHdadNVt28caHm2lSGN+/Cx0qSnlu8XoGdq3ipMO6s2jNVp5fshEI38djBtfz6BtrqKuqYMGqLW3KXNOulHZlJWzYvofSkDDt8O70r+vA39/byMbte1i7dTd9u7Tn2EO78od5q9ix9wAA5aWh5uehrESYMKCO5Rs/ZfXmXYzv34WX3g9fg+417ehf34E9+5tYu2UXDbWVzF+5mXZlIXbvO/g8RZ4jgP71HVi7ZTczhjfwyOur6d25klWf7GJEr45MGFDHHf9Y3uL6DKjvwPKNOwA4qm8nXluxmZNHNPDHhWupblfa/OxEPy9nN/Zm06d7mp/vVz/4pHn7aSN7cPWJg+lb1yHmM5QJkov1FESkLzAXOBz4UFVrnXQBNqtqrYg8Bdyqqi86254FvqWq81rlNYtwS4I+ffocuXJl3DkYBjBvxSecdcdLfO7IXvz4cyPpe+0fXR234taTXe9rGEbuuWRSP244eVhax4rIfFVtjLXN845mEakCHgG+qqrborc5rYKUrJKqzlHVRlVtrK+POUvbiOJTp4a9bttunyUxDLhgYt82aQtvmppyPvddOCYL0uQ3rVtr2cJToyAiZYQNwv2q+qiTvF5EGpztDcAGJ30N0Dvq8F5OmpEBIRG/RTAMwwO8cvJ4OfpIgLuAd1X1Z1GbngTOd76fDzwRlX6eMwppHLDV+hMyJ2IUmmzZVSOgWLUlPbx6p70MiDcR+CLwlogscNKuB24FHhKRi4CVwNnOtqeBGcAyYCdwgYeyFQ2RhkJT2/5Xw8hb0mkAl4SEAx65XPzAq7J4OfroReJXAqbE2F+B2V7JU6xEXh5NrevGMDxBYqgEyZGLs9BaJF7ZN5vRXOA0u4+aYOXHO3yWxjDaUmjK2gvOG39ImzSvRo6aUShwIi/cqys+4Zgf/91PUQwja8RqcRQyleUlbdK86lMwo1Dg5KppbhhuiPU42iOanFhG0NxHRlqE7IUzjLwnluG0loKRFtZSMIJOOq4ge6zzcJ6CEQzs5TGCTjrPqBcKsZ8HcYSyRaxL5NWQVDMKBY7NaDZygdvHLMhP47CGGr9FiIu5j4ysYX0KRi4oscqHp8RysZ3d2DvGnpljRqHAKbahe4Y/hAqg9hHkCZ6xbO7kwd4EBDWjUOBYBc7IBW5tQraGpAZZgXtBLl9jMwpGTGwtBSMVMum7CkprNtAxI3NYuzOjYBhGxnStrvBbhILGWgpG1jD3kZELnr5qEtdOH5J0v1jzZoIyJDXILYVcvsdmFAzDyJj25aVcesyAtI61ektyculi83I9BcMw8oyQeBdTJ6icPqoHEwZ04bnFG5LvXARYS8EwjGb8CIuSzjmzabdG967lnKP6ZDHH7GPuI8MwXPHgJeOymp/Xuidb+XuxlkCg+xRyeC4v12i+W0Q2iMiiqLTfi8gC57MiskyniPQVkV1R2+7wSq5iIyjD/QxvGD+gS1bz82Nggj2hycnlffGyT+Ee4L+A+yIJqnpO5LuI/BTYGrX/clUd5aE8hlH0nDyigT8uXBt3e7gSkdsqs98j5CLuqwA3FHLq1vOspaCqc4FPYm2TcAnPBh706vxGGL9fOCNYTBjQhaGJAr/lwH9053mNGWcTZAWeKT1rK309v199CpOA9aq6NCqtn4i8ISL/EJFJ8Q4UkVkiMk9E5m3cuNF7SQ2jwIin9xsP6cQvP3+E5+c/cVi3lvIEpOaSTp/CF8bmpoO6GDqaZ9KylbAW6KOqo4GrgQdEJGZ1RlXnqGqjqjbW13sTEKqQCMj7ZgSIeM/Ew5dNaKOwjcScN75vTs6Ty77BnBsFESkFPgP8PpKmqntU9WPn+3xgOTA417IVIkEeUWH4g58VhSAOfDh4PVJ/WXIVHLbQWwonAItVdXUkQUTqRaTE+d4fGAS874NshlHQiPOXLl06lGdRmgwISGUnV8q6UIakPgi8BBwqIqtF5CJn07m07WCeDCx0hqg+DFyqqjE7qY3UMPeR0ZovH9PfbxECSTqtai/6Q/x+Zz0bkqqqM+OkfylG2iPAI17JYhjGQU4Z0YPLH3gjrWMDUkHPKol0cE27Urbt3h93e66Wuy1095GRQ4LowzXyAy9W9sqWcvNikZ10csxZn0IhdzQbhuEfmSjlqcO6eRJeIp/xoqWQrdXp0sWMQoHjt3/SyF+iH533vz+DX3/xSN9kyQWxDF4yE+jl+3XayB7eZZ4AC51tGEZSQqHshILIXkC8LGUEGWl2rybeLb55GmUlIZ5a+BFNWiBhLgzDMLzCk5XX0jjGC1UtCO3KSigJCaUlIc/OEw8zCoaR5/zgM8Nd75upcim6LoUclvd/YsSEiri0rE/BMAqc2celt3RlLKoqvPECe6GIgtLHdeyhB0dWRURK1eC9cv2UrJanprKsTdoBZxm80lwNc8KMQsGye98BwGLVB5XykhK/RQCgvrqiTVqilkdQRh9lKsWcLzYycWBma1F0q2mXoRQtiWVgIkujhswoGJnwt3fWM+Tbf2bBqi1+i2LkId2zpOyC0iqIRXlpiEO6dGiRFsvQJDM+2bSREb0f67q1bimUeGgkbPRRAfLC0nBI8QUfbmbiwDqfpTHyjcjEsFhqJxUdmPvlelIjII2eKOIr+uj5EM9fc6xnLkOwlkJB8dzi9fS99o+8v2lHc1rgnnsD8GZGbizKS1q+4l7X3v/xjWOjztX2ZPFm5j51xdFeiZSUiJjpuMZiHXHZsen1FyWq/EcbhX51HWK6/bKFGYUC4v/eDC+zuODDLUBwFi8x/KOuKstRTZPozWiXTCpP3+E9O6Ynj8ekYygqStNTq5H3Nfq6RVZhy2XlzoxCAWKtg+ATPNdF9vGyThJLWd/xb8GYcZ1unKJYRz02ewIPXDI2M4FSxPoUChhrKASXspLg3pyIvo3V0kytT6Ftr4KXz2SvTsnXNn7ma5Ojop5mbpljGaey0jSNQozDula3o2t1O/4wbxUAwxKtr50lrKWQ59z81Dv82/+80iLt0z3hh74YaqP5Ssf25dw+c7Tny1+2fgRyGjU3y6eKdoWl+2gP6lbNkYd0apEWuSax3pdUz3PbuaPSHr0VkSOR23eoGQUjGXe9+AEvLtvktxhGqqhy2sge1FV512HoBan42LNtfnp3bp9wu9eVoG41be9V63OePqpn2vknakVZn4KRFcx9lA8ErzmXLeUaMwR0drKOSapj91uXM9mIMLetLC+NUy7eaS+X47xbRDaIyKKotJtEZI2ILHA+M6K2XSciy0RkiYic5JVcxYa5kLwjln/X7VDBfLgtmc9TyK4Gi36WYz3XQxuq08r34JDUtA7POjGvWg5l87KlcA8wLUb6z1V1lPN5GkBEhhFeu/kw55j/FpFgxAHII1o/TNZQ8JYvjOvTJi3w1zxDAVNRnLluqYoIl0zql/bxpSVt1WEuDYWb65WLS+qZUVDVucAnLnc/Hfidqu5R1Q+AZcAYr2QzDK9IVRGmo3SSRUXt3fngKJxsK7VUyhd4A9mKH581IuF2t2VP95IHZelcP/oULheRhY57KTIMoCewKmqf1U5aG0RklojME5F5Gzdu9FpWw0iJXPidZ45p20KJ5oYZw9LPnINKLZYS/MOl4zPKO3vNh2wMJ235u1tNu5RnCmfT6DZfmhiXKFcz4CH3RuFXwABgFLAW+GmqGajqHFVtVNXG+vrsLyxuGJkQtM79bMtzWA/3M4+9nFEfTxmno6QTSZmOMk43kmyiy3XqyB7MGN6db5x0aFp5p0JOJ6+p6vrIdxG5E3jK+bkG6B21ay8nzciEoGmoIiDVK+61z7p1/l48EbeceXicc2e3cAHpB25BNmvwiVqZ7ctL+e8v5GbGdk5bCiLSEPXzTCAyMulJ4FwRqRCRfsAg4NVcymbkP9ELp/iF29pxUNYliEWqsn1h7CEx00tLQkwa1DJKr5urU14SYkzfzinJECEVyWMp9ERF97qKFXl0/K7KeTkk9UHgJeBQEVktIhcBPxKRt0RkIXAc8DUAVX0beAh4B/gzMFtVD3glW7EQDjIQXOWTba6dPsRvEQqscZZZYX76uZH85qI04vbkfNRS/G3J7GM2ZkE3y5HmcdnGM/eRqs6MkXxXgv1vAW7xSp5ipHjMQXBIdYRKPhrtwd2qeG/9p0n3OyHNEB5BUY7Fis1oNowsko1hhRdM7Ju5IB6SVu3fwdVY/Fb7jO5Ty4UTW84/iGdKM/fKxc8gnY7zId1Tn1Dnd8h7MwoFjNW4ck823ufvnnpY5pnEwY3CSTQkNRcI0qIF9dhXJvKdU4cl1PjHDE69P8mNAUm6HGfU96tPHNwmcWDXKt78ztRURfMVC52dp7y0/GMObV0LMSvgOyHXHc0t/7sl1Q7YfHRPpcPMMb2T7xSHdFp3nTuEI7ZGguTd+pnhnBtj/oiIuL4HQblT1lLIQ1SVmXe+zOfvfLnVhpY/C6vTMz/4fAzF0K4se6/ZQ5lOHssCXg+ccvPcxp2nkKFq/fJkd0tpvv7tE4HwUNEVt54c0yCki9+vrRmFPGbxuu1+i1DwnDc+9nDLeJwxuu1E/MU3T2+TFpRaYSw8jfLp2Yzv1FVprFNcMrl/yvm0zbdlzgEefRwTMwoFTr49kEHjhpOHpn3sqSN7JN0nG7dnWA/3C6+kojrj7RsEl1Q8GdJ63hNPac6IVK53UN5VMwqFhN/tzjwinY5JN0S7Pm47ZxTLbmnbSsg2A+qrWHrLdKZ6vIpbrohvjBIck4Nn38tzRI8489vta0YhD3FbowhK1MUgkuJ6LHG54viBLX5HZxsKScxwzJD9Gc1lcc4TNLyONBoE2i7ek5zSbD2QWSA/niTDyDLZGgte3koZu803skBPrmMfuTzKgzyzSzoyzBje0OK3uyGpmRU2lccsCG45sCGpBY3fzdAgk62aejq5vHTd8TR0rEy+owektB5CnH07VHirNpIZ1gcuGcvHn+5tkda7U+L1m1+74QQ6VpbFPl9q4nlCdJn9buFbS8Ew0qRflw5t0ty8zrk0CF5UDDpWlvH5sekNwXQrTiyjHUnqUN7WKCXrbK+vrqC8NDN1F7mWyYYYt5Y8XgXk+hlDUh7dlguspZCHBKORGTxq2sWuCWZCvFrbuP6dmTCwjnkrN7dILylJcfF4j++mV66enrVhw+aHUhNJFOYiy+G6W2X38nVTUpp3IkBFWeyVhctKQvSo9afFmIi4RkFErk50oKr+LPviGNkmCP7fXJHLF+zQbrFj2pSFQvzr2uMD1XGYKpFnxo0bwwtXUiZXrn99let90zHI3Tu2S/mYqopS/vLVyZz0n3Pj7iME511NZPKqnU8jcBnh5TF7ApcCR3gvmmEEn9YvcklI6FFbSdea1JWHF3itZ4Jm+s4bfwi/nzWuRdrfrj4m4TFuBwdkMvGuTUgaoCGOgfG7LzCuUVDV76nq9wivgnaEqn5dVb8OHAlkb0634RlBe2G95MpWQ0P9IuUWQo5rh34onBe+eRwnHdYtpfMnDUQXp1otIozt36VF2sCu7lsPAB3KD7p7vLo9dVUVnHRY98C0DqJx0/brBkR39e910gyfcOs3vfbRt+K6OQqNVBdc94pQwNxG6Ujj5ulKxXffu3N7Bnat4i9vr0+4X3lJiL0HmuIKHe3uqW1f7vr8cYlThGyGro6X1+TBdS23SX64jyLcB7wqIjeJyE3AK8A9XgplpEes5u2S9RYfKZ/o7ridOrVv2Wn+8KXjmfuN41LOLxM9k87aB3HlcCHIU1ce7SovQZg8qI5fzBzt7uRJ83O5n+tWjvurHpS5CdEkNAoSNmX3ARcAm53PBar6g2QZi8jdIrJBRBZFpf1YRBaLyEIReUxEap30viKyS0QWOJ87MilUsRLEByzfcasIzjqyF3ed35hy/q3v2GOzJwDwxysncfeXDubX2LczfbokHoufDySqhXerPuhjT2ZERMRVbCk/qPJ4HofXJJReVVVEnlbV4cDrKeZ9D/BfhI1KhGeA61R1v4j8ELgO+JazbbmqjkrxHIbhKxFD3KNjO6YMTd2rWtMu/ApeM3UwQxtqmucw9Kit9GQ0VbYmRiVS2n+6ahIbt+9JOc+QU0WtrwqGKxDSG+J66oge/O8/V7Bg1Za4VzvWfQhKpc6N++h1ETkq1YxVdS7wSau0v6rqfufny4Q7sY0Uiffo+D0T0ldi1EBvzCDCKcBvLhqT0fFu+Nb0Idx48lC+cuzAtIxKtnjgkoNLbKaiB2M9c0MbapjcKuCgmyyr25Xxo7NG8NuL01/uMxVSVcFu365QSPjiuNTmb0RfR7+X43TTzhkLfEFEVgI7iAypVR2R4bkvBH4f9bufiLwBbANuVNUXYh0kIrOAWQB9+tggKCM+w3t2zOj4SYPq2XegKUvSxKZ9eSkXT8o8hn+mDIwxvl8EHp89kY+27MqZHGc3hldQi6WwveqI9UIHB6POnx5ujMJJ2T6piNwA7Afud5LWAn1U9WMRORJ4XEQOU9VtrY9V1TnAHIDGxsZ8vvaGx+SixtWsqPweXO4Ro3rXMqp3bZt0r1+8igRRX3N5qdMtZ0T8ZNFrow1d3ow+UtWVqroS2EX4GkU+aSEiXwJOAb6gjsNOVfeo6sfO9/nAcmBwuucoVFZv3sllv53P7n0H/BYlL0ikPNyupZzvtFY0rpa6dPF6R0beejUC97aZo7zJOIpsh8SI5pQRPbj46H5cPyO2C7P1fRA5mOb3bPikRkFEThORpcAHwD+AFcCf0jmZiEwDvgmcpqo7o9LrRaTE+d4fGAS8n845Cpn/eOpd/rRoHc8t3uC3KL6Qqh4/ok+nuNu+/5nh7s6Z2ikLikR9VBdM7McXxvZh1jHu1jROVf8GMWhgKi3PspIQN54yjI7t3cfjGtq9houP7scvP+9vwAg3Hc03A+OA91S1HzCFcCdxQkTkQeAl4FARWS0iFxEejVQNPNNq6OlkYKGILAAeBi5V1U9i5WsYbhjXvzMlIeHsxthjGboFJAxFMu69cAzfnHZo2sd71SDqUFHKLWcOT3n4Za4baFdOGZTR8bl06YRCwo2nDPN96LGbO7rP8fWHRCSkqs+LyH8mO0hVZ8ZIvivOvo8Aj7iQxTBi0lrXXDixny9yZJtjBtdntHRoUPzU6fDjs0awIWpoazplufpE80KnipuWwhYRqQLmAveLyG2ERyEZAaNI3OQpccXxmdUUk9Hcz+zpWXKLF4YknTH4n2vszezjvItp5Uai+y4cw+Bu4ZFZ2bzHrfMK0vPjxiicDuwEvgb8mXAn8KleCmWkRz7XCt2QzovTu3P+zwL2DQ80VRDn0iSSafLgep66YhIje9dyy5nu+qHyHTdG4VxggKruV9V7VfX2yEghwygmenVKv/Nz1mT/5yL4ycyj+lBXVcFpo4IZmiIR5aUhnpg9kfEDuiTfOUW8HAGVLm6MQh/g1yLygYj8QUSuEJFRHstlGHlFMtddvKGJxULfug7Mu/GE5hXb0iFX6rNDRTh09jdOSr+DP1WC5Pp1M0/hu6p6PDAMeAH4BjDfa8GMtiTzywbpwfKbWMMHpx/ePSv5RHPqiAYATnH+J2LCgC58ZnTPlGXINUHvJ8nWcx6vkl7qBGE6PQetmunDw8/NaSOD81wkHX0kIjcCE4Eq4A3gGsLGwQgQW3ft4/kinb/gJ4O6VbPi1pNd7fvAJeOS75R1wprvyEM6MX/lZt/j6gQRPy/JgPoq189PrnDjPvoM0AX4G/Ao8ISqrvVUKiMmkQ6xWDWcyx94nY937G27oYBIpNBM1yXGLo//RK8OGOTn1Y376AjgBOBV4ETgLRF50WvBjNR4f6ONEjayQxA7P70g16W8euqh3OpyJr2fuHEfHQ5MAo4BGoFVmPvIF4ISbz1fybR2FuTanRcEzdVULMbKb9y4j24FaoDbgaGqepyqfsdbsQwjM7xQZ4Wgk1pfl0gYkKp2B+uHh/WoAeCEoV1zJVZKZGuuw4nDwutXDGuoyUp+hYIb99EphA3Cx6q6z3uRDID31m9vUzNq7lOwFkNWue3cUXG3BauunDrxDNnEgeEx99dMPZQl/zGN9uUHjcLArtUsvnkap4/yfkTMmH6dPT9HPE4b2YPFN09jULfqnJ87iJP4IriJknoqsIDwbGZEZJSIPOmxXEXN35dsYOrP5/Lw/NV+ixIovHqNSlyGKg6YNyUlWst+zwVjeOffT0JEqCgtabN/u7K2aV5wf45WWYtHrsoZIR+qc27cRzcBY4AtAKq6ACiMaGMZsn7bbk/yXbbhUwDeXbu9RXqiFsKaHK6OZeQfrVsMZSWhFq0Dv0i2CI0bxvfP/kzjYsbNHdmnqltbpeWDwfOUZ95Zz9jvP8vfl3g3N8DcRIk5qu/B9RJy0RwviD6FPG7txOK1G07gfy9IeQn5hBw9sA4g5bDghYIbo/C2iHweKBGRQSLyC+BfHssVeBas2gzAojWt7aV3JJqnUIzcd+HYjMJKRyiG61lIxiC6LPXVFVl3Ad18xuHM/cZx1LYvz2q+0QT5frgxClcAhwF7gAeBrcBVXgqVT6SiUFSV/SksBN+69msth5ZUlpfQozb2YjmxXrpMWxNBfpGN7FFeGvJ9oRs/cTP6aKeq3qCqR6lqI/AbwiuoGSnyu9dWMfCGP7F2qzv/fzwjUKzKyY9y5/u1tmqEkSpxjYKIjBCRv4rIIhH5DxFpEJFHgGeBd9xkLiJ3i8gGEVkUldZZRJ4RkaXO/05OuojI7SKyTEQWioi/C5V6wBML1gDwwaaWs49P/+U/Oew7f3adTzG4OwyjEMmHdzdRS+FO4AHgs8AmwsNSlwMDVfXnLvO/B5jWKu1a4FlVHUTYwFzrpE8HBjmfWcCvXJ7DFw7OGcicN1dtYcfeA+7PmQcPViYc0afW9b6Ffi0ype0KX/nb9Cmkex3kFmgio1Chqveo6hJV/U9gh6p+U1Vdj8NU1bnAJ62STwfudb7fC5wRlX6fhnkZqBWR5PGIC5B44QWKpU/hjDTCS2fykhXyVS3EsgVZoRYCicZctROR0RysbOyJ/q2qr6d5zm5RUVbXAd2c7z0Jx1WKsNpJaxGRVURmEW5J0KdPnzRFyA+ia0bvrd/OX95eD9hL4Qa7Ri3J5xaCkVsSGYW1wM+ifq+L+q3A8ZmeXFVVRFKqzKjqHGAOQGNjo28VoWwpnR179rva7+m3DtrGIDejH7xkHDPvfNmTvMOKzb/C57NiLZZWZtDJh/sQ1yio6nEenXO9iDSo6lrHPRSZ/bUG6B21Xy8nraBp3ekMcPNT4X78eIYnyI/V+AFd6F7TjnUZzPb2TPWmkXG0Ky8fXuhk5HMLqhCuf6UzpyLIE+P8kOxJ4HzC0VfPB56ISr9cRH4HjAW2FvtiPtEtgnyupaZKrl/9YgjJXEjPTz6X5fRRPdmwfQ/nj+/rtyhx8dQoiMiDwLFAnYisBr5L2Bg8JCIXASuBs53dnwZmAMuAncAFXsqWLVKbvNY2LVHNbe7SjTHTg/5K5FuNzm38nXxWRkYwKAkJlx4zwG8xEuKpUVDVmXE2TYmxrwKzvZQniCRSNO9v3MGBJuXK373Bnn0HZ0L/c9mmXIjmG3GvSIwNrQ1teWmIvfubUlLgU4d147JjB/Crvy93fUy+UAytICO7xDUKySaPZTD6qGhJx5/70ZZd/HFhSy/ao28Eu6vFDz0UubQTBnTh70tit7DiUVoS4lvThiQ1CvnWAoqFtXWMZCRqKfw0wbasjD7KZ9J5uVJ1H0E4vk/Rkc+9oQEjaEtqZoI1enKDH6OPjBTIx1c6Yz3k0dufqVj52KcQcR9959Rh3PnC+xw3JJhLbKZCAdm5QOKqh01EDheRs0XkvMjHa8HyhXguhd37DvCDp99lV1T4imJ5mAu1RveLmaP9FiFtetRWctu5o3O+0piRf7hZjvO7wC+cz3HAj4DTPJYr+CTR8P/7zxX8eu77zJn7fptt23bt4/Znl9LUpC2y2bJzL794dmmLfQtUvyYmzrV1Y1O9NEjDe3bkplOHeXcCDyjK58fICDejj84CRgJvqOoFItIN+K23YgWTnXv3c/2jb3H80G5J993nrJuwv6nt+gk3PfkO67bt5rAeNfTsVNmcfsPji9p0KucjuVREcTt/PWiVFUtLzyhu3BiFXaraJCL7RaSG8Azk3skOKkR+8pf3eHzBRzy+4COunDIo4b4rPm47UznCjr3h0Bb7Wi24szNGyIs1m23tZTeYwo5NIV2WVCobxx5az+jenZLvaLTBjVGYJyK1hENpzwc+BV7yUqigsn33vjZpqvDK+x9TU1nG0Iaa5vRHX48/bFRafI8Oo9CWs+4o+pVPjQwoRPeRG0N3zwVjPJejUHGz8tpXVHWLqt4BnAicr6p5Mds428R7wc6Z8zLTb3shrTyT1XD3HSjE1zqYdKws45qpg+Nuz+fhnfkruZFr3HQ0Pxv5rqorVHVhdJrRkg827WDRmq2u9i3UUTqZEk+BudHJiS5pMqX+5nencvnxid2ChlHoJJrR3A5oTzhuUScOvqs1hNc5KGriqZfjfvL35MdGKaemArQMQSiS1YwNIz0S9Sl8Gfgq0AOIDmmxDfgvD2UKLNlSdlt3HeybuHPuB9nJ1DAKHIvjlBsSzWi+DbhNRK5Q1V/kUKa8IpXHtPUz/e7abSxZvy2r8gSD3L28pieKjzzu2skL3Mxo/rWIXCkiDzufy0WkzHPJAk42Hszbn1vGojX5axSumz7EbxGayccQFLmgkIzmGaPCXusuHSp8lqSwcTMk9b+BMuc/wBeBXwEXeyVUUMk0Smah1XDilyezgsbL143iNxdDbArh2bv8+IFcPKl/cQaJzCGJOppLVXU/cJSqjoza9JyIvOm9aIVH8egr/wuaz8NHvaAQnj0RMYOQAxK5j151/h8QkealgkSkP3Ag9iHJEZFDRWRB1GebiHxVRG4SkTVR6TPSPUdOSeFt27677YzlNLMKBLl22ZwwLHl4kUQUmplobwrS8IBE7qPIO3QN8LyIRCK79SWDpTJVdQkwCkBESoA1wGNOnj9X1Z+km7fnZKi031mbuP+gUIanelWMn3xuBP/35kfeZJ5nvP7tEykrcW/mrOFkuCWRUagXkaud778GItWSA8Bo4PksnH8KsFxVV1pzH15YWtjLbGZKRanVjCN07lDutwhGgZLIfVQCVAHVhI2HOJ9SJy0bnAs8GPX7chFZKCJ3OxPmAksqrhNVLcgO0CDY8cK7qtmlEJ87w1sStRTWquq/e3ViESknvC7DdU7Sr4CbCb/nNxNeDvTCGMfNAmYB9OnTxyvxYpLO67Xp0z30u+5pbj7j8KzLY0QhCX8ahuGSRC0Fr9+r6cDrqroeQFXXq+oBVW0iHJE1ZphDVZ2jqo2q2lhfX++xiMlJZihWbNoJwKOvr/ZemALB5hxkD3PLGqmSyChM8fjcM4lyHYlIQ9S2M4FFHp8/I1J914rp1TSHRXAw95GRKnGNgqp+4tVJRaQD4TDcj0Yl/0hE3hKRhYSX/fyaV+dPxEX3vMbpv/xnzG3pvGCFMqIolyQyuL+9aKwn+RY61voy3OJmRnPWUdUdQJdWaV/0Q5bWPLt4Q1bzi9iEYmrGZ1o7TXT40YPqMsrbMIzEuIl9ZDjE0lXJ9F8kNEYhmoQgGLrW198aZoaRGWYUPKYYlVSmxiKdw1sfEgB7ZRh5iRkFj4n0KRSTkvKzczMSG6ckVEQXPAFFWCcxMsSMQgpE6zq3Kqcp0qdQgA6kbJbooqP7UV+deUjkH352BF87YTDj+nVJvnMxUXiPn+ERvnQ0FxPFWFNLpcwXTuxHRVmIa6Yeyo49+/nda6syOnfnDuVcdULsdZaLWi8W44NopIUZhQxJtsZCsyulqDVSfOqqy/nKsQOB/Op/ySNRDSMlzCikQLQicNtHEOlTWLJue/YF8pl41yBT5R5k+3nOUX14+6NtXH3iYL9FSY0gX1QjUJhR8JiIgty6a5+/ggSU6L6WdFa2y3Q1vFSpLC/hx58bmXxHw8hTrKPZY5qK0M+QykirbI3KCsKciUBShM+fkRlmFFIg1lDLpJPX8slRHkVXFyOB4qnhdItciCO0DCPfMKOQAunoujy1CZwyokfOz5lrV1BRYHbWSBEzCimwcPWWhNvXbt3VJq0YFV0qraNYOssrT1BRupiK7/EzMsSMQgqs+uSg0o+l98b/4Lk2aYXcp5ANJVuMetoP7Dobbilqo/CXt9exfXd6o4Lc6vp87VMoTWFReF/Jz8trGIGlaI3Cik07+PJv5vP1h95M63i3uj5PbUJasYOuOH6gB5K4I09MWM6JzO6uLCvxWRIjXyjaeQo79x4A4MNPdmaUTzKdn6+L7JS48De03uXrUw9N+Twt5ink56UKNBdP6s/Fk/r7LYaRRxRtSyFTCr0DORSjpTC+v7sgc7GuzNmNvWLuG8v22NBUw/AP34yCiKxwlt9cICLznLTOIvKMiCx1/nfyS75ska8dzSUiDKjvkHCfVFT3j84ayYpbT27+feHEfmlKZhiGl/jdUjhOVUepaqPz+1rgWVUdBDzr/A40yVwembqn/CJWR7PrESwuDKEfLS1rfxhGcvw2Cq05HbjX+X4vcEauTrxl514279jbIm3v/ibWbGk79wAK3/89qndtTs5TlHMHDCPA+NnRrMBfRUSBX6vqHKCbqq51tq8DurU+SERmAbMA+vTpkzVhRv37M23Svvnwmzy+4CPe/fdpzSt6RQsPsGvv/qzJECQmDqxLXpePp9Bd6PlYRjWtGeNpHGMYRnz8NApHq+oaEekKPCMii6M3qqo6BoNW6XOAOQCNjY2e6YTNO/by7OINAOzed6CNUYhotXtfWumVCIFnYH1V7A0p3JVstROswWEY2cE395GqrnH+bwAeA8YA60WkAcD5v8Ev+Ubf/Azbd4dbAT/407t+iRFoxg9Ib8nLaAVuytwwgoUvRkFEOohIdeQ7MBVYBDwJnO/sdj7whB/ytebpt9a1STO3RXwSzc14+bopvH7jiYlnepuhMAzf8Mt91A14zOlkLAUeUNU/i8hrwEMichGwEjjbJ/mMDNi570Dcbd07tgOgX114uGvP2sqcyGQYhjt8MQqq+j7QZvkqVf0YmJJ7iYxsMrR7De+s3ZZwn/PG92VIQw3joibEeT6iK0YLpKqiaCf1G0ZM7I1Ik0Ifkgqk7SO7/+KxPLd4A1//Q/y4UqGQtDAIfvDUFUfTtSb5YkKGUUwUrVHItIOz0MNcxMLtNevUoZyROZrnkEkU2sN7dsyiJIZRGARt8lrOSEWXxFI8v3x+eRalKUTSV9bp2OtEBuvLx1hAOMNwS9EahVTYsfcAmz7d47cYGdO+9VyLVsw+bkDWzhUk99qgrtV+i2AYeUPRGoVU3UfzVmz2RpAA8dkjWkYyzUSvB8gmGIaRAkXbp5Aq+5ua/BbBUy49ZgD9481QznOi3X//c14j3Wra+SiNYQQbMwou2X8g/+u+8RpHg7tVce30ITmVJRbRnfe/vWgs/3bXKzH3+8XM0fTp3D7l/AXhhGFtwmm14YnZE1mZp9FtDSNTzCi4ZO+Bwm4pZJtM+hREhN6d409qO3Vkj4PnSf80cRnZuzZno6cMI2gUbZ9CqmQy9DGI1LYv8zT/dIbshjIYJ2yrtRlGdij6lsLiddv52TPvJd3v+08vTrpP0Ileu8BrFZqODb1+xlAqSkOcOrKBdVt3Z1+oNHnksvG8+kHhDzQwDLCWAgC3P7s06T5bd+3LgSTecubons3fow1EPAWeSesonUM7dyjnljOHU1GaeOhsrjnykM5cdmz2husaRpAxo1BEXDllUPP3u85vTLBn5uRqxvc3pw1hypCuTD0sfgdyYTn+DMNbzCgUCVdNGUR99cE4P6P7dEp6zE/PHsWkQXVeihUXt9FTe9ZWcteXjqJ9eXJPaKGv3fDNaUNoPKSTb/fMKAzMKBhxa9JHHtKJ31w0tvl3Kp25mfbLl5bYo5kqA7tW8fBlE+hgkV+NDLA3z8hKp/NFR/fLQi6GYfhN0RqFRWu2+i2CJwzu5s+s5G+fMqzF7yCN4D1mcD0lIeH88X39FsUwAk/RGoVvPLzQbxE84a9fOyZmeiId7VZ/z5qcn9FGu9W0Y/n3ZzC8l4XKNoxk5NwoiEhvEXleRN4RkbdF5Con/SYRWSMiC5zPDK9kaGoKUDXWA35+zsFF7UIR31CCqrvboaeTB9ez4taTXe1bjOtNGEYh4EdLYT/wdVUdBowDZotIxPfwc1Ud5Xye9kyAPDMK3zvtMH702RGu94/W8VdNGRx3v1LHYnx+7CHNaccP6Ur3LASMC5L7yAgePWsrOWZwvd9iGDHI+TAFVV0LrHW+bxeRd4GeiY/KLk15qLGmDO3a4veZo3vy2BtrXB8fq8T11RWs3bqb6Yd3b067+0tHpStiC3qnEbDOyB4je9eyOsBB/f557fF+i2DEwdc+BRHpC4wGIuEwLxeRhSJyt4jEHEgvIrNEZJ6IzNu4cWNa5823lkIsfn7OqLjbom1e67H5Q7pXU1bi/YD9zh3KXbua4jHKgtKlzROzJzL/2yf6LYaRh/hmFESkCngE+KqqbgN+BQwARhFuSfw01nGqOkdVG1W1sb4+vebngTwzCplMuhrbrzMA4/p3AeBPV03ivf+YDsBZR4YX1amp9DY4Xro89pUJfPADz7qWDMOIgS9GQUTKCBuE+1X1UQBVXa+qB1S1CbgTGOPV+YPS0dy7cyU3njzU1b6pSBwxIqeO7MHY/l1YfPM0Jg6sc7ZJc9yjq08czOKbp1GVwWSnMY7R8YKIrB2SLCNqGEb2yHmfgoQ10l3Au6r6s6j0Bqe/AeBMYJFXMgTFfVQWCiVUyEcPrOPFZZtSzjfiPop0JLcri61URSTuNrfcf/FY9u73dq0Jc4MYRu7wYz78ROCLwFsissBJux6YKSKjCFeKVwBf9kqAXHU0j+nbmVdXfJJwn0SuofLScEMukfeoojTEnjhKORehfspKQpR5HJIiU8NlGIZ7/Bh99CKx9ZVnQ1Bbk6s+hYqyJMpSEg/dbJ5jINJiv6ENNQDMv/EENu/cywk/m9viOC9K16+ugwe5GoYRNIpyRnNQOprj1eTLnZr34T3DM3AP6dyedo6BOaJPLQ9fOh6ALlUViaODZqmp8NJ1x/N/Vxzd/HuEzQw2jIKlKMMp5sooRNfuZ47pzYOvrmLSoDpeWHqwnyCW+yiSdt74vpwwtFuzcfjb1ZPp1al9C3dKrJL0qA1PPhvcrTrjMgA0dGwZxvqBS8axafseV8e+cv2UjJbZNAwjtxRnS8HDPoX/veAo2scYLdO5QzkAndqXN68VMLJXLd2jFO4PPzucZ742mX0Hwn0EFaWhZoMAMLBrtSv/+oQBdTz6lQnMmuRNrKKqilL6unQndatp12IdB8Mwgo21FLLI2Y29OO7QrhzRp1ObUUPRduiZqydz30sruXBiP8pLQzxw8ViaFCYO7IKIEBGvojS5zY4Xt+gIF4voGIZhtMaMQhYZ0y88QSziLamrKm+zjwi0Ly/l0mMOrvk7YWDLlbL6dmnPio93ulpoJrKecV1VBY/PnpCu6IZhGIAZhaxS22pm8Bmje/L4go8AUlqM/qFLx7N8ww5X+9ZXV3DPBUcxuk8nOgZ0ZrJhGPlDURoFr+YptA5aB/Dmd6fyr2Wb2LXvAOBuQFDX6nZ0rXYfqfTYQ9ue1zAMIx2KsqO5rir9js+K0hAPXDKWJy+f2GZbJHyERI226VhZxvThDc2dz7Xt27qUDMMwgkJRGoUetZXcf/HY5DvGYOaYPkwYUMeIXrWUhBLX+6PbI1OHdeemU4fxrWlD0jqvYRhGLihK9xHAxIF13HvhGM6/+9WE+10/Ywiq0KTwwz8vbjEk9C9fncRba7YyqGs1H0bFrq+qCO9TFjpoc0Mh4UsTbXF7wzCCTdEaBQgv6D73G8dxw+NvcdHR/ehZW8mrKz7hhsfCsfh+8rmRzeGl9+5vYte+A1x6zMGx/wO7VjOwa3iCWPR8glvOGM7Q7jVMGNAlh6UxDMPIHHG7Pm8QaWxs1Hnz5mU933+8t5Htu/dxyogeWc/bMAzDb0Rkvqo2xtpW1C2FeNjasYZhFCtF2dFsGIZhxMaMgmEYhtGMGQXDMAyjGTMKhmEYRjOBMwoiMk1ElojIMhG51m95DMMwiolAGQURKQF+CUwHhhFet3mYv1IZhmEUD4EyCsAYYJmqvq+qe4HfAaf7LJNhGEbREDSj0BNYFfV7tZPWjIjMEpF5IjJv48aNORXOMAyj0Mm7yWuqOgeYAyAiG0VkZQbZ1QGbku4VfAqlHGBlCSqFUpZCKQdkVpZD4m0ImlFYA/SO+t3LSYuJqmY09VhE5sWb6p1PFEo5wMoSVAqlLIVSDvCuLEFzH70GDBKRfiJSDpwLPOmzTIZhGEVDoFoKqrpfRC4H/gKUAHer6ts+i2UYhlE0BMooAKjq08DTOTrdnBydx2sKpRxgZQkqhVKWQikHeFSWvA6dbRiGYWSXoPUpGIZhGD5iRsEwDMNopiiNQj7GVxKRFSLylogsEJF5TlpnEXlGRJY6/zs56SIitzvlWygiR/gs+90iskFEFkWlpSy7iJzv7L9URM4PSDluEpE1zn1ZICIzorZd55RjiYicFJXu+/MnIr1F5HkReUdE3haRq5z0vLovCcqRd/dFRNqJyKsi8qZTlu856f1E5BVHrt87IzMRkQrn9zJne99kZXSFqhbVh/CopuVAf6AceBMY5rdcLuReAdS1SvsRcK3z/Vrgh873GcCfAAHGAa/4LPtk4AhgUbqyA52B953/nZzvnQJQjpuAa2LsO8x5tiqAfs4zVxKU5w9oAI5wvlcD7zky59V9SVCOvLsvzrWtcr6XAa841/oh4Fwn/Q7gMuf7V4A7nO/nAr9PVEa3chRjS6GQ4iudDtzrfL8XOCMq/T4N8zJQKyINPsgHgKrOBT5plZyq7CcBz6jqJ6q6GXgGmOa58FHEKUc8Tgd+p6p7VPUDYBnhZy8Qz5+qrlXV153v24F3CYeUyav7kqAc8QjsfXGu7afOzzLno8DxwMNOeut7ErlXDwNTRESIX0ZXFKNRSBpfKaAo8FcRmS8is5y0bqq61vm+DujmfM+HMqYqe5DLdLnjUrk74m4hj8rhuB1GE66Z5u19aVUOyMP7IiIlIrIA2EDYwC4Htqjq/hhyNcvsbN8KdCHDshSjUchXjlbVIwiHFZ8tIpOjN2q43ZiX44vzWXbgV8AAYBSwFvipr9KkiIhUAY8AX1XVbdHb8um+xChHXt4XVT2gqqMIh/gZAwzJtQzFaBRSiq8UFFR1jfN/A/AY4QdmfcQt5Pzf4OyeD2VMVfZAlklV1zsvchNwJweb6YEvh4iUEVak96vqo05y3t2XWOXI5/sCoKpbgOeB8YRddZGJxtFyNcvsbO8IfEyGZSlGo5B38ZVEpIOIVEe+A1OBRYTljoz2OB94wvn+JHCeM2JkHLA1yiUQFFKV/S/AVBHp5LgCpjppvtKqr+ZMwvcFwuU41xkh0g8YBLxKQJ4/x/d8F/Cuqv4salNe3Zd45cjH+yIi9SJS63yvBE4k3EfyPHCWs1vrexK5V2cBzzmtu3hldEcue9eD8iE8kuI9wv66G/yWx4W8/QmPJngTeDsiM2H/4bPAUuBvQGc9OIrhl0753gIafZb/QcJN+H2E/ZsXpSM7cCHhTrNlwAUBKcdvHDkXOi9jQ9T+NzjlWAJMD9LzBxxN2DW0EFjgfGbk231JUI68uy/ACOANR+ZFwHec9P6Elfoy4A9AhZPezvm9zNneP1kZ3XwszIVhGIbRTDG6jwzDMIw4mFEwDMMwmjGjYBiGYTRjRsEwDMNoxoyCYRiG0YwZBcOIQkQOREXWXJAsWqaIXCoi52XhvCtEpC7TfAwjU2xIqmFEISKfqmqVD+ddQXjs/6Zcn9sworGWgmG4wKnJ/0jCa1q8KiIDnfSbROQa5/uVEo7rv1BEfuekdRaRx520l0VkhJPeRUT+6sTN/x/Ck8Mi5/o35xwLROTXIlLiQ5GNIsWMgmG0pLKV++icqG1bVXU48F/Af8Y49lpgtKqOAC510r4HvOGkXQ/c56R/F3hRVQ8jHMuqD4CIDAXOASZqODDaAeAL2SygYSSiNPkuhlFU7HKUcSwejPr/8xjbFwL3i8jjwONO2tHAZwFU9TmnhVBDeMGezzjpfxSRzc7+U4AjgdfCYX2o5GBQOsPwHDMKhuEejfM9wsmElf2pwA0iMjyNcwhwr6pel8axhpEx5j4yDPecE/X/pegNIhICeqvq88C3CIcxrgJewHH/iMixwCYNx/ufC3zeSZ9OeClLCAejO0tEujrbOovIId4VyTBaYi0Fw2hJpbPyVYQ/q2pkWGonEVkI7AFmtjquBPitiHQkXNu/XVW3iMhNwN3OcTs5GOr4e8CDIvI28C/gQwBVfUdEbiS8yl6IcETW2cDKLJfTMGJiQ1INwwU2ZNQoFsx9ZBiGYTRjLQXDMAyjGWspGIZhGM2YUTAMwzCaMaNgGIZhNGNGwTAMw2jGjIJhGIbRzP8Do9u/judwoa4AAAAASUVORK5CYII=\n",
242 | "text/plain": [
243 | ""
244 | ]
245 | },
246 | "metadata": {
247 | "needs_background": "light"
248 | },
249 | "output_type": "display_data"
250 | }
251 | ],
252 | "source": [
253 | "class Policy(Model):\n",
254 | " def __init__(self, action_size):\n",
255 | " super().__init__()\n",
256 | " self.l1 = L.Linear(128)\n",
257 | " self.l2 = L.Linear(action_size)\n",
258 | "\n",
259 | " def forward(self, x):\n",
260 | " x = F.relu(self.l1(x))\n",
261 | " x = F.softmax(self.l2(x))\n",
262 | " return x\n",
263 | "\n",
264 | "\n",
265 | "class Agent:\n",
266 | " def __init__(self):\n",
267 | " self.gamma = 0.98\n",
268 | " self.lr = 0.0002\n",
269 | " self.action_size = 2\n",
270 | "\n",
271 | " self.memory = []\n",
272 | " self.pi = Policy(self.action_size)\n",
273 | " self.optimizer = optimizers.Adam(self.lr)\n",
274 | " self.optimizer.setup(self.pi)\n",
275 | "\n",
276 | " def get_action(self, state):\n",
277 | " state = state[np.newaxis, :]\n",
278 | " probs = self.pi(state)\n",
279 | " probs = probs[0]\n",
280 | " action = np.random.choice(len(probs), p=probs.data)\n",
281 | " return action, probs[action]\n",
282 | "\n",
283 | " def add(self, reward, prob):\n",
284 | " data = (reward, prob)\n",
285 | " self.memory.append(data)\n",
286 | "\n",
287 | " def update(self):\n",
288 | " self.pi.cleargrads()\n",
289 | "\n",
290 | " G, loss = 0, 0\n",
291 | " for reward, prob in reversed(self.memory):\n",
292 | " G = reward + self.gamma * G\n",
293 | " loss += -F.log(prob) * G\n",
294 | "\n",
295 | " loss.backward()\n",
296 | " self.optimizer.update()\n",
297 | " self.memory = []\n",
298 | "\n",
299 | "\n",
300 | "episodes = 3000\n",
301 | "env = gym.make('CartPole-v0')\n",
302 | "agent = Agent()\n",
303 | "reward_history = []\n",
304 | "\n",
305 | "for episode in range(episodes):\n",
306 | " state = env.reset()\n",
307 | " done = False\n",
308 | " sum_reward = 0\n",
309 | "\n",
310 | " while not done:\n",
311 | " action, prob = agent.get_action(state)\n",
312 | " next_state, reward, done, info = env.step(action)\n",
313 | "\n",
314 | " agent.add(reward, prob)\n",
315 | " state = next_state\n",
316 | " sum_reward += reward\n",
317 | "\n",
318 | " agent.update()\n",
319 | "\n",
320 | " reward_history.append(sum_reward)\n",
321 | " if episode % 100 == 0:\n",
322 | " print(\"episode :{}, total reward : {:.1f}\".format(episode, sum_reward))\n",
323 | "\n",
324 | "\n",
325 | "# plot\n",
326 | "plot_total_reward(reward_history)"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "id": "3372fddf",
332 | "metadata": {},
333 | "source": [
334 | "## ch09/actor_critic.py"
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": null,
340 | "id": "59a22521",
341 | "metadata": {},
342 | "outputs": [
343 | {
344 | "name": "stdout",
345 | "output_type": "stream",
346 | "text": [
347 | "episode :0, total reward : 41.0\n",
348 | "episode :100, total reward : 12.0\n",
349 | "episode :200, total reward : 10.0\n",
350 | "episode :300, total reward : 15.0\n",
351 | "episode :400, total reward : 67.0\n",
352 | "episode :500, total reward : 54.0\n",
353 | "episode :600, total reward : 166.0\n",
354 | "episode :700, total reward : 200.0\n",
355 | "episode :800, total reward : 151.0\n",
356 | "episode :900, total reward : 200.0\n",
357 | "episode :1000, total reward : 200.0\n"
358 | ]
359 | }
360 | ],
361 | "source": [
362 | "class PolicyNet(Model):\n",
363 | " def __init__(self, action_size=2):\n",
364 | " super().__init__()\n",
365 | " self.l1 = L.Linear(128)\n",
366 | " self.l2 = L.Linear(action_size)\n",
367 | "\n",
368 | " def forward(self, x):\n",
369 | " x = F.relu(self.l1(x))\n",
370 | " x = self.l2(x)\n",
371 | " x = F.softmax(x)\n",
372 | " return x\n",
373 | "\n",
374 | "\n",
375 | "class ValueNet(Model):\n",
376 | " def __init__(self):\n",
377 | " super().__init__()\n",
378 | " self.l1 = L.Linear(128)\n",
379 | " self.l2 = L.Linear(1)\n",
380 | "\n",
381 | " def forward(self, x):\n",
382 | " x = F.relu(self.l1(x))\n",
383 | " x = self.l2(x)\n",
384 | " return x\n",
385 | "\n",
386 | "\n",
387 | "class Agent:\n",
388 | " def __init__(self):\n",
389 | " self.gamma = 0.98\n",
390 | " self.lr_pi = 0.0002\n",
391 | " self.lr_v = 0.0005\n",
392 | " self.action_size = 2\n",
393 | "\n",
394 | " self.pi = PolicyNet()\n",
395 | " self.v = ValueNet()\n",
396 | " self.optimizer_pi = optimizers.Adam(self.lr_pi).setup(self.pi)\n",
397 | " self.optimizer_v = optimizers.Adam(self.lr_v).setup(self.v)\n",
398 | "\n",
399 | " def get_action(self, state):\n",
400 | " state = state[np.newaxis, :] # add batch axis\n",
401 | " probs = self.pi(state)\n",
402 | " probs = probs[0]\n",
403 | " action = np.random.choice(len(probs), p=probs.data)\n",
404 | " return action, probs[action]\n",
405 | "\n",
406 | " def update(self, state, action_prob, reward, next_state, done):\n",
407 | " state = state[np.newaxis, :] # add batch axis\n",
408 | " next_state = next_state[np.newaxis, :]\n",
409 | "\n",
410 | " # ========== (1) Update V network ===========\n",
411 | " target = reward + self.gamma * self.v(next_state) * (1 - done)\n",
412 | " target.unchain()\n",
413 | " v = self.v(state)\n",
414 | " loss_v = F.mean_squared_error(v, target)\n",
415 | "\n",
416 | " # ========== (2) Update pi network ===========\n",
417 | " delta = target - v\n",
418 | " delta.unchain()\n",
419 | " loss_pi = -F.log(action_prob) * delta\n",
420 | "\n",
421 | " self.v.cleargrads()\n",
422 | " self.pi.cleargrads()\n",
423 | " loss_v.backward()\n",
424 | " loss_pi.backward()\n",
425 | " self.optimizer_v.update()\n",
426 | " self.optimizer_pi.update()\n",
427 | "\n",
428 | "\n",
429 | "episodes = 3000\n",
430 | "env = gym.make('CartPole-v0')\n",
431 | "agent = Agent()\n",
432 | "reward_history = []\n",
433 | "\n",
434 | "for episode in range(episodes):\n",
435 | " state = env.reset()\n",
436 | " done = False\n",
437 | " total_reward = 0\n",
438 | "\n",
439 | " while not done:\n",
440 | " action, prob = agent.get_action(state)\n",
441 | " next_state, reward, done, info = env.step(action)\n",
442 | "\n",
443 | " agent.update(state, prob, reward, next_state, done)\n",
444 | "\n",
445 | " state = next_state\n",
446 | " total_reward += reward\n",
447 | "\n",
448 | " reward_history.append(total_reward)\n",
449 | " if episode % 100 == 0:\n",
450 | " print(\"episode :{}, total reward : {:.1f}\".format(episode, total_reward))\n",
451 | "\n",
452 | "\n",
453 | "# plot\n",
454 | "plot_total_reward(reward_history)"
455 | ]
456 | }
457 | ],
458 | "metadata": {
459 | "kernelspec": {
460 | "display_name": "Python 3 (ipykernel)",
461 | "language": "python",
462 | "name": "python3"
463 | },
464 | "language_info": {
465 | "codemirror_mode": {
466 | "name": "ipython",
467 | "version": 3
468 | },
469 | "file_extension": ".py",
470 | "mimetype": "text/x-python",
471 | "name": "python",
472 | "nbconvert_exporter": "python",
473 | "pygments_lexer": "ipython3",
474 | "version": "3.9.4"
475 | }
476 | },
477 | "nbformat": 4,
478 | "nbformat_minor": 5
479 | }
480 |
--------------------------------------------------------------------------------
/pytorch/actor_critic.py:
--------------------------------------------------------------------------------
1 | if '__file__' in globals():
2 | import os, sys
3 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
4 | import numpy as np
5 | import gym
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import torch.optim as optim
10 | from torch.distributions import Categorical
11 | from common.utils import plot_total_reward
12 |
13 |
14 | class PolicyNet(nn.Module):
15 | def __init__(self, action_size):
16 | super().__init__()
17 | self.l1 = nn.Linear(4, 128)
18 | self.l2 = nn.Linear(128, action_size)
19 |
20 | def forward(self, x):
21 | x = F.relu(self.l1(x))
22 | x = F.softmax(self.l2(x), dim=1)
23 | return x
24 |
25 |
26 | class ValueNet(nn.Module):
27 | def __init__(self):
28 | super().__init__()
29 | self.l1 = nn.Linear(4, 128)
30 | self.l2 = nn.Linear(128, 1)
31 |
32 | def forward(self, x):
33 | x = F.relu(self.l1(x))
34 | x = self.l2(x)
35 | return x
36 |
37 |
38 | class Agent:
39 | def __init__(self):
40 | self.gamma = 0.98
41 | self.lr_pi = 0.0002
42 | self.lr_v = 0.0005
43 | self.action_size = 2
44 |
45 | self.pi = PolicyNet(self.action_size)
46 | self.v = ValueNet()
47 |
48 | self.optimizer_pi = optim.Adam(self.pi.parameters(), lr=self.lr_pi)
49 | self.optimizer_v = optim.Adam(self.v.parameters(), lr=self.lr_v)
50 |
51 | def get_action(self, state):
52 | state = torch.tensor(state[np.newaxis, :])
53 | probs = self.pi(state)
54 | probs = probs[0]
55 | m = Categorical(probs)
56 | action = m.sample().item()
57 | return action, probs[action]
58 |
59 | def update(self, state, action_prob, reward, next_state, done):
60 | state = torch.tensor(state[np.newaxis, :])
61 | next_state = torch.tensor(next_state[np.newaxis, :])
62 |
63 | target = reward + self.gamma * self.v(next_state) * (1 - done)
64 | target.detach()
65 | v = self.v(state)
66 | loss_fn = nn.MSELoss()
67 | loss_v = loss_fn(v, target)
68 |
69 | delta = target - v
70 | loss_pi = -torch.log(action_prob) * delta.item()
71 |
72 | self.optimizer_v.zero_grad()
73 | self.optimizer_pi.zero_grad()
74 | loss_v.backward()
75 | loss_pi.backward()
76 | self.optimizer_v.step()
77 | self.optimizer_pi.step()
78 |
79 |
80 | env = gym.make('CartPole-v0')
81 | agent = Agent()
82 | reward_history = []
83 |
84 | for episode in range(2000):
85 | state = env.reset()
86 | done = False
87 | total_reward = 0
88 |
89 | while not done:
90 | action, prob = agent.get_action(state)
91 | next_state, reward, done, info = env.step(action)
92 |
93 | agent.update(state, prob, reward, next_state, done)
94 |
95 | state = next_state
96 | total_reward += reward
97 |
98 | reward_history.append(total_reward)
99 | if episode % 100 == 0:
100 | print("episode :{}, total reward : {:.1f}".format(episode, total_reward))
101 |
102 | plot_total_reward(reward_history)
--------------------------------------------------------------------------------
/pytorch/dqn.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from collections import deque
3 | import random
4 | import numpy as np
5 | import gym
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import torch.optim as optim
10 |
11 |
12 | class ReplayBuffer:
13 | def __init__(self, buffer_size, batch_size):
14 | self.buffer = deque(maxlen=buffer_size)
15 | self.batch_size = batch_size
16 |
17 | def add(self, state, action, reward, next_state, done):
18 | data = (state, action, reward, next_state, done)
19 | self.buffer.append(data)
20 |
21 | def __len__(self):
22 | return len(self.buffer)
23 |
24 | def get_batch(self):
25 | data = random.sample(self.buffer, self.batch_size)
26 |
27 | state = torch.tensor(np.stack([x[0] for x in data]))
28 | action = torch.tensor(np.array([x[1] for x in data]).astype(np.long))
29 | reward = torch.tensor(np.array([x[2] for x in data]).astype(np.float32))
30 | next_state = torch.tensor(np.stack([x[3] for x in data]))
31 | done = torch.tensor(np.array([x[4] for x in data]).astype(np.int32))
32 | return state, action, reward, next_state, done
33 |
34 |
35 | class QNet(nn.Module):
36 | def __init__(self, action_size):
37 | super().__init__()
38 | self.l1 = nn.Linear(4, 128)
39 | self.l2 = nn.Linear(128, 128)
40 | self.l3 = nn.Linear(128, action_size)
41 |
42 | def forward(self, x):
43 | x = F.relu(self.l1(x))
44 | x = F.relu(self.l2(x))
45 | x = self.l3(x)
46 | return x
47 |
48 |
49 | class DQNAgent:
50 | def __init__(self):
51 | self.gamma = 0.98
52 | self.lr = 0.0005
53 | self.epsilon = 0.1
54 | self.buffer_size = 10000
55 | self.batch_size = 32
56 | self.action_size = 2
57 |
58 | self.replay_buffer = ReplayBuffer(self.buffer_size, self.batch_size)
59 | self.qnet = QNet(self.action_size)
60 | self.qnet_target = QNet(self.action_size)
61 | self.optimizer = optim.Adam(self.qnet.parameters(), lr=self.lr)
62 |
63 | def get_action(self, state):
64 | if np.random.rand() < self.epsilon:
65 | return np.random.choice(self.action_size)
66 | else:
67 | state = torch.tensor(state[np.newaxis, :])
68 | qs = self.qnet(state)
69 | return qs.argmax().item()
70 |
71 | def update(self, state, action, reward, next_state, done):
72 | self.replay_buffer.add(state, action, reward, next_state, done)
73 | if len(self.replay_buffer) < self.batch_size:
74 | return
75 |
76 | state, action, reward, next_state, done = self.replay_buffer.get_batch()
77 | qs = self.qnet(state)
78 | q = qs[np.arange(len(action)), action]
79 |
80 | next_qs = self.qnet_target(next_state)
81 | next_q = next_qs.max(1)[0]
82 |
83 | next_q.detach()
84 | target = reward + (1 - done) * self.gamma * next_q
85 |
86 | loss_fn = nn.MSELoss()
87 | loss = loss_fn(q, target)
88 |
89 | self.optimizer.zero_grad()
90 | loss.backward()
91 | self.optimizer.step()
92 |
93 | def sync_qnet(self):
94 | self.qnet_target.load_state_dict(self.qnet.state_dict())
95 |
96 |
97 | episodes = 300
98 | sync_interval = 20
99 | env = gym.make('CartPole-v0')
100 | agent = DQNAgent()
101 | reward_history = []
102 |
103 | for episode in range(episodes):
104 | state = env.reset()
105 | done = False
106 | total_reward = 0
107 |
108 | while not done:
109 | action = agent.get_action(state)
110 | next_state, reward, done, info = env.step(action)
111 |
112 | agent.update(state, action, reward, next_state, done)
113 | state = next_state
114 | total_reward += reward
115 |
116 | if episode % sync_interval == 0:
117 | agent.sync_qnet()
118 |
119 | reward_history.append(total_reward)
120 | if episode % 10 == 0:
121 | print("episode :{}, total reward : {}".format(episode, total_reward))
--------------------------------------------------------------------------------
/pytorch/reinforce.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.optim as optim
7 | from torch.distributions import Categorical
8 |
9 |
10 | class Policy(nn.Module):
11 | def __init__(self, action_size):
12 | super().__init__()
13 | self.l1 = nn.Linear(4, 128)
14 | self.l2 = nn.Linear(128, action_size)
15 |
16 | def forward(self, x):
17 | x = F.relu(self.l1(x))
18 | x = F.softmax(self.l2(x), dim=1)
19 | return x
20 |
21 |
22 | class Agent:
23 | def __init__(self):
24 | self.gamma = 0.98
25 | self.lr = 0.0002
26 | self.action_size = 2
27 |
28 | self.memory = []
29 | self.pi = Policy(self.action_size)
30 | self.optimizer = optim.Adam(self.pi.parameters(), lr=self.lr)
31 |
32 | def get_action(self, state):
33 | state = torch.tensor(state[np.newaxis, :])
34 | probs = self.pi(state)
35 | probs = probs[0]
36 | m = Categorical(probs)
37 | action = m.sample().item()
38 | return action, probs[action]
39 |
40 | def add(self, reward, prob):
41 | data = (reward, prob)
42 | self.memory.append(data)
43 |
44 | def update(self):
45 | G, loss = 0, 0
46 | for reward, prob in reversed(self.memory):
47 | G = reward + self.gamma * G
48 | loss += - torch.log(prob) * G
49 |
50 | self.optimizer.zero_grad()
51 | loss.backward()
52 | self.optimizer.step()
53 | self.memory = []
54 |
55 |
56 | env = gym.make('CartPole-v0')
57 | agent = Agent()
58 | reward_history = []
59 |
60 | for episode in range(3000):
61 | state = env.reset()
62 | done = False
63 | sum_reward = 0
64 |
65 | while not done:
66 | action, prob = agent.get_action(state)
67 | next_state, reward, done, info = env.step(action)
68 |
69 | agent.add(reward, prob)
70 | state = next_state
71 | sum_reward += reward
72 |
73 | agent.update()
74 |
75 | reward_history.append(sum_reward)
76 | if episode % 100 == 0:
77 | print("episode :{}, total reward : {:.1f}".format(episode, sum_reward))
--------------------------------------------------------------------------------
/pytorch/simple_pg.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import gym
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torch.optim as optim
7 | from torch.distributions import Categorical
8 |
9 |
10 | class Policy(nn.Module):
11 | def __init__(self, action_size):
12 | super().__init__()
13 | self.l1 = nn.Linear(4, 128)
14 | self.l2 = nn.Linear(128, action_size)
15 |
16 | def forward(self, x):
17 | x = F.relu(self.l1(x))
18 | x = F.softmax(self.l2(x), dim=1)
19 | return x
20 |
21 |
22 | class Agent:
23 | def __init__(self):
24 | self.gamma = 0.98
25 | self.lr = 0.0002
26 | self.action_size = 2
27 |
28 | self.memory = []
29 | self.pi = Policy(self.action_size)
30 | self.optimizer = optim.Adam(self.pi.parameters(), lr=self.lr)
31 |
32 | def get_action(self, state):
33 | state = torch.tensor(state[np.newaxis, :])
34 | probs = self.pi(state)
35 | probs = probs[0]
36 | m = Categorical(probs)
37 | action = m.sample().item()
38 | return action, probs[action]
39 |
40 | def add(self, reward, prob):
41 | data = (reward, prob)
42 | self.memory.append(data)
43 |
44 | def update(self):
45 | G, loss = 0, 0
46 | for reward, prob in reversed(self.memory):
47 | G = reward + self.gamma * G
48 |
49 | for reward, prob in self.memory:
50 | loss += - torch.log(prob) * G
51 |
52 | self.optimizer.zero_grad()
53 | loss.backward()
54 | self.optimizer.step()
55 | self.memory = []
56 |
57 |
58 | env = gym.make('CartPole-v0')
59 | agent = Agent()
60 | reward_history = []
61 |
62 | for episode in range(3000):
63 | state = env.reset()
64 | done = False
65 | total_reward = 0
66 |
67 | while not done:
68 | action, prob = agent.get_action(state)
69 | next_state, reward, done, info = env.step(action)
70 |
71 | agent.add(reward, prob)
72 | state = next_state
73 | total_reward += reward
74 |
75 | agent.update()
76 |
77 | reward_history.append(total_reward)
78 | if episode % 100 == 0:
79 | print("episode :{}, total reward : {:.1f}".format(episode, total_reward))
--------------------------------------------------------------------------------
/series overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WegraLee/deep-learning-from-scratch-4/b82cd6432b4e63ce6a4ab2b925fc74a1227fb06a/series overview.png
--------------------------------------------------------------------------------