├── .gitignore
├── .idea
└── vcs.xml
├── Appendix
└── Appendix.ipynb
├── Chapter 1
└── Ch1_Introduction.ipynb
├── Chapter 10
└── Ch10_book.ipynb
├── Chapter 2
└── Ch2_book.ipynb
├── Chapter 3
├── Ch3_book.ipynb
├── GridBoard.py
└── Gridworld.py
├── Chapter 4
└── Ch4_book.ipynb
├── Chapter 5
└── Ch5_book.ipynb
├── Chapter 6
├── Ch6_book.ipynb
├── MNIST Genetic Algorithm.ipynb
├── String Genetic Algorithm.ipynb
├── buffer.py
├── main.py
└── simulator.py
├── Chapter 7
└── Ch7_book.ipynb
├── Chapter 8
├── Ch8_book.ipynb
└── script_8.py
├── Chapter 9
├── Ch9_book.ipynb
└── MAgent
│ └── build
│ └── render
│ └── README.txt
├── Environments
├── GridBoard.py
└── Gridworld.py
├── Errata.md
├── Errata
├── Chapter 2.ipynb
├── Chapter 3.ipynb
├── Chapter 4.ipynb
├── Chapter 5.ipynb
├── GridBoard.py
└── Gridworld.py
├── LICENSE
├── README.md
├── old_but_more_detailed
├── Cartpole A3C N step.ipynb
├── Cartpole A3C.ipynb
├── Ch10_Relational DRL.ipynb
├── Ch2_N Armed Bandits.ipynb
├── Ch3_DQN.ipynb
├── Ch3_Gridworld.ipynb
├── Ch3_Gridworld_exp.ipynb
├── Ch4_PolicyGradients.ipynb
├── Ch4_PolicyGradients_.ipynb
├── Ch6_Evolutionary.ipynb
├── Ch6_book_dev.ipynb
├── Ch7_DistDQN.ipynb
└── Curiosity-Driven Exploration Super Mario.ipynb
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .idea/
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 |
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .coverage.*
34 | .cache
35 | nosetests.xml
36 | coverage.xml
37 | *.cover
38 | .hypothesis/
39 | .pytest_cache/
40 |
41 | # Jupyter Notebook
42 | **/.ipynb_checkpoints
43 |
44 | # pyenv
45 | .python-version
46 |
47 | # Environments
48 | .env
49 | .venv
50 | env/
51 | venv/
52 | ENV/
53 | env.bak/
54 | venv.bak/
55 |
56 | # Scrap notebooks and scripts
57 | archive/
58 | Chapter\ 2/Ch2_Bandits_TensorFlow.ipynb
59 | Chapter\ 2/Ch2_MDPs_old.ipynb
60 | Chapter\ 2/.ipynb_checkpoints
61 | Chapter\ 3/.ipynb_checkpoints/
62 | Chapter\ 3/.DS_Store
63 | Chapter\ 3/Ch3_DQN_old.ipynb
64 | Chapter\ 3/Ch3_Gridworld_exp.ipynb
65 | Chapter\ 3/Ch3_Gridworld.ipynb
66 | Chapter\ 3/Sokoban.py
67 | Chapter\ 4/Ch4_PolicyGradients_.ipynb
68 | Chapter\ 4/.ipynb_checkpoints
69 | Sandbox
70 | .idea
71 | **/.DS_Store
72 |
73 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Chapter 1/Ch1_Introduction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true,
7 | "jupyter": {
8 | "outputs_hidden": true
9 | }
10 | },
11 | "source": [
12 | "# Deep Reinforcement Learning in Action \n",
13 | "## Ch 1. Introduction\n",
14 | "### Dynamic Programming"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "def fib(n):\n",
24 | " if n <= 1:\n",
25 | " return n\n",
26 | " else:\n",
27 | " return fib(n - 1) + fib(n - 2)"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/plain": [
38 | "13"
39 | ]
40 | },
41 | "execution_count": 2,
42 | "metadata": {},
43 | "output_type": "execute_result"
44 | }
45 | ],
46 | "source": [
47 | "fib(7)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 3,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "mem = {0:0, 1:1}\n",
57 | "\n",
58 | "def fib_mem(n):\n",
59 | " if n not in mem:\n",
60 | " mem[n] = fib(n - 1) + fib(n - 2)\n",
61 | " return mem[n]"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 4,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "13"
73 | ]
74 | },
75 | "execution_count": 4,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | }
79 | ],
80 | "source": [
81 | "fib_mem(7)"
82 | ]
83 | },
84 | {
85 | "cell_type": "markdown",
86 | "metadata": {},
87 | "source": [
88 | "### Time Performance"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 5,
94 | "metadata": {},
95 | "outputs": [
96 | {
97 | "name": "stdout",
98 | "output_type": "stream",
99 | "text": [
100 | "4.4 s ± 844 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "%timeit fib(35)\n",
106 | "# We get 5.54 seconds to run with n=35"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 6,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "319 ns ± 274 ns per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "%timeit fib_mem(35)\n",
124 | "# We get 412 ns to run with n=35"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": null,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": []
133 | }
134 | ],
135 | "metadata": {
136 | "kernelspec": {
137 | "display_name": "Python 3 (ipykernel)",
138 | "language": "python",
139 | "name": "python3"
140 | },
141 | "language_info": {
142 | "codemirror_mode": {
143 | "name": "ipython",
144 | "version": 3
145 | },
146 | "file_extension": ".py",
147 | "mimetype": "text/x-python",
148 | "name": "python",
149 | "nbconvert_exporter": "python",
150 | "pygments_lexer": "ipython3",
151 | "version": "3.10.12"
152 | }
153 | },
154 | "nbformat": 4,
155 | "nbformat_minor": 4
156 | }
157 |
--------------------------------------------------------------------------------
/Chapter 3/GridBoard.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import sys
4 |
5 | def randPair(s,e):
6 | return np.random.randint(s,e), np.random.randint(s,e)
7 |
8 | class BoardPiece:
9 |
10 | def __init__(self, name, code, pos):
11 | self.name = name #name of the piece
12 | self.code = code #an ASCII character to display on the board
13 | self.pos = pos #2-tuple e.g. (1,4)
14 |
15 | class BoardMask:
16 |
17 | def __init__(self, name, mask, code):
18 | self.name = name
19 | self.mask = mask
20 | self.code = code
21 |
22 | def get_positions(self): #returns tuple of arrays
23 | return np.nonzero(self.mask)
24 |
25 | def zip_positions2d(positions): #positions is tuple of two arrays
26 | x,y = positions
27 | return list(zip(x,y))
28 |
29 | class GridBoard:
30 |
31 | def __init__(self, size=4):
32 | self.size = size #Board dimensions, e.g. 4 x 4
33 | self.components = {} #name : board piece
34 | self.masks = {}
35 |
36 | def addPiece(self, name, code, pos=(0,0)):
37 | newPiece = BoardPiece(name, code, pos)
38 | self.components[name] = newPiece
39 |
40 | #basically a set of boundary elements
41 | def addMask(self, name, mask, code):
42 | #mask is a 2D-numpy array with 1s where the boundary elements are
43 | newMask = BoardMask(name, mask, code)
44 | self.masks[name] = newMask
45 |
46 | def movePiece(self, name, pos):
47 | move = True
48 | for _, mask in self.masks.items():
49 | if pos in zip_positions2d(mask.get_positions()):
50 | move = False
51 | if move:
52 | self.components[name].pos = pos
53 |
54 | def delPiece(self, name):
55 | del self.components['name']
56 |
57 | def render(self):
58 | dtype = '= 4:
7 | self.board = GridBoard(size=size)
8 | else:
9 | print("Minimum board size is 4. Initialized to size 4.")
10 | self.board = GridBoard(size=4)
11 |
12 | #Add pieces, positions will be updated later
13 | self.board.addPiece('Player','P',(0,0))
14 | self.board.addPiece('Goal','+',(1,0))
15 | self.board.addPiece('Pit','-',(2,0))
16 | self.board.addPiece('Wall','W',(3,0))
17 |
18 | if mode == 'static':
19 | self.initGridStatic()
20 | elif mode == 'player':
21 | self.initGridPlayer()
22 | else:
23 | self.initGridRand()
24 |
25 | #Initialize stationary grid, all items are placed deterministically
26 | def initGridStatic(self):
27 | #Setup static pieces
28 | self.board.components['Player'].pos = (0,3) #Row, Column
29 | self.board.components['Goal'].pos = (0,0)
30 | self.board.components['Pit'].pos = (0,1)
31 | self.board.components['Wall'].pos = (1,1)
32 |
33 | #Check if board is initialized appropriately (no overlapping pieces)
34 | #also remove impossible-to-win boards
35 | def validateBoard(self):
36 | valid = True
37 |
38 | player = self.board.components['Player']
39 | goal = self.board.components['Goal']
40 | wall = self.board.components['Wall']
41 | pit = self.board.components['Pit']
42 |
43 | all_positions = [piece for name,piece in self.board.components.items()]
44 | all_positions = [player.pos, goal.pos, wall.pos, pit.pos]
45 | if len(all_positions) > len(set(all_positions)):
46 | return False
47 |
48 | corners = [(0,0),(0,self.board.size), (self.board.size,0), (self.board.size,self.board.size)]
49 | #if player is in corner, can it move? if goal is in corner, is it blocked?
50 | if player.pos in corners or goal.pos in corners:
51 | val_move_pl = [self.validateMove('Player', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
52 | val_move_go = [self.validateMove('Goal', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
53 | if 0 not in val_move_pl or 0 not in val_move_go:
54 | #print(self.display())
55 | #print("Invalid board. Re-initializing...")
56 | valid = False
57 |
58 | return valid
59 |
60 | #Initialize player in random location, but keep wall, goal and pit stationary
61 | def initGridPlayer(self):
62 | #height x width x depth (number of pieces)
63 | self.initGridStatic()
64 | #place player
65 | self.board.components['Player'].pos = randPair(0,self.board.size)
66 |
67 | if (not self.validateBoard()):
68 | #print('Invalid grid. Rebuilding..')
69 | self.initGridPlayer()
70 |
71 | #Initialize grid so that goal, pit, wall, player are all randomly placed
72 | def initGridRand(self):
73 | #height x width x depth (number of pieces)
74 | self.board.components['Player'].pos = randPair(0,self.board.size)
75 | self.board.components['Goal'].pos = randPair(0,self.board.size)
76 | self.board.components['Pit'].pos = randPair(0,self.board.size)
77 | self.board.components['Wall'].pos = randPair(0,self.board.size)
78 |
79 | if (not self.validateBoard()):
80 | #print('Invalid grid. Rebuilding..')
81 | self.initGridRand()
82 |
83 | def validateMove(self, piece, addpos=(0,0)):
84 | outcome = 0 #0 is valid, 1 invalid, 2 lost game
85 | pit = self.board.components['Pit'].pos
86 | wall = self.board.components['Wall'].pos
87 | new_pos = addTuple(self.board.components[piece].pos, addpos)
88 | if new_pos == wall:
89 | outcome = 1 #block move, player can't move to wall
90 | elif max(new_pos) > (self.board.size-1): #if outside bounds of board
91 | outcome = 1
92 | elif min(new_pos) < 0: #if outside bounds
93 | outcome = 1
94 | elif new_pos == pit:
95 | outcome = 2
96 |
97 | return outcome
98 |
99 | def makeMove(self, action):
100 | #need to determine what object (if any) is in the new grid spot the player is moving to
101 | #actions in {u,d,l,r}
102 | def checkMove(addpos):
103 | if self.validateMove('Player', addpos) in [0,2]:
104 | new_pos = addTuple(self.board.components['Player'].pos, addpos)
105 | self.board.movePiece('Player', new_pos)
106 |
107 | if action == 'u': #up
108 | checkMove((-1,0))
109 | elif action == 'd': #down
110 | checkMove((1,0))
111 | elif action == 'l': #left
112 | checkMove((0,-1))
113 | elif action == 'r': #right
114 | checkMove((0,1))
115 | else:
116 | pass
117 |
118 | def reward(self):
119 | if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
120 | return -10
121 | elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
122 | return 10
123 | else:
124 | return -1
125 |
126 | def display(self):
127 | return self.board.render()
128 |
--------------------------------------------------------------------------------
/Chapter 5/Ch5_book.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Ch 5 - Actor-Critic Models\n",
8 | "### Deep Reinforcement Learning in Action"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "##### Listing 5.1"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 1,
21 | "metadata": {},
22 | "outputs": [
23 | {
24 | "name": "stdout",
25 | "output_type": "stream",
26 | "text": [
27 | "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
28 | " 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47\n",
29 | " 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63]\n",
30 | "8\n",
31 | "[array([ 0, 1, 4, 9, 16, 25, 36, 49]), array([ 64, 81, 100, 121, 144, 169, 196, 225]), array([256, 289, 324, 361, 400, 441, 484, 529]), array([576, 625, 676, 729, 784, 841, 900, 961]), array([1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521]), array([1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209]), array([2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025]), array([3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969])]\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "import multiprocessing as mp\n",
37 | "import numpy as np\n",
38 | "def square(x): #A\n",
39 | " return np.square(x)\n",
40 | "x = np.arange(64) #B\n",
41 | "print(x)\n",
42 | "print(mp.cpu_count())\n",
43 | "pool = mp.Pool(8) #C\n",
44 | "squared = pool.map(square, [x[8*i:8*i+8] for i in range(8)])\n",
45 | "print(squared)"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "##### Listing 5.2"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 2,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "name": "stdout",
62 | "output_type": "stream",
63 | "text": [
64 | "In process 0\n",
65 | "In process 1\n",
66 | "In process 2\n",
67 | "In process 3\n",
68 | "In process 4\n",
69 | "In process 5\n",
70 | "In process 6\n",
71 | "In process 7\n",
72 | "[array([ 0, 1, 4, 9, 16, 25, 36, 49]), array([ 64, 81, 100, 121, 144, 169, 196, 225]), array([256, 289, 324, 361, 400, 441, 484, 529]), array([576, 625, 676, 729, 784, 841, 900, 961]), array([1024, 1089, 1156, 1225, 1296, 1369, 1444, 1521]), array([1600, 1681, 1764, 1849, 1936, 2025, 2116, 2209]), array([2304, 2401, 2500, 2601, 2704, 2809, 2916, 3025]), array([3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969])]\n"
73 | ]
74 | }
75 | ],
76 | "source": [
77 | "def square(i, x, queue):\n",
78 | " print(\"In process {}\".format(i,))\n",
79 | " queue.put(np.square(x))\n",
80 | "processes = [] #A\n",
81 | "queue = mp.Queue() #B\n",
82 | "x = np.arange(64) #C\n",
83 | "for i in range(8): #D\n",
84 | " start_index = 8*i\n",
85 | " proc = mp.Process(target=square,args=(i,x[start_index:start_index+8], queue)) \n",
86 | " proc.start()\n",
87 | " processes.append(proc)\n",
88 | " \n",
89 | "for proc in processes: #E\n",
90 | " proc.join()\n",
91 | " \n",
92 | "for proc in processes: #F\n",
93 | " proc.terminate()\n",
94 | "results = []\n",
95 | "while not queue.empty(): #G\n",
96 | " results.append(queue.get())\n",
97 | "\n",
98 | "print(results)"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "##### Listing 5.4"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 3,
111 | "metadata": {},
112 | "outputs": [
113 | {
114 | "name": "stderr",
115 | "output_type": "stream",
116 | "text": [
117 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/envs/registration.py:307: DeprecationWarning: The package name gym_minigrid has been deprecated in favor of minigrid. Please uninstall gym_minigrid and install minigrid with `pip install minigrid`. Future releases will be maintained under the new package name minigrid.\n",
118 | " fn()\n"
119 | ]
120 | }
121 | ],
122 | "source": [
123 | "import torch\n",
124 | "from torch import nn\n",
125 | "from torch import optim\n",
126 | "import numpy as np\n",
127 | "from torch.nn import functional as F\n",
128 | "import gym\n",
129 | "import torch.multiprocessing as mp #A\n",
130 | "\n",
131 | "class ActorCritic(nn.Module): #B\n",
132 | " def __init__(self):\n",
133 | " super(ActorCritic, self).__init__()\n",
134 | " self.l1 = nn.Linear(4,25)\n",
135 | " self.l2 = nn.Linear(25,50)\n",
136 | " self.actor_lin1 = nn.Linear(50,2)\n",
137 | " self.l3 = nn.Linear(50,25)\n",
138 | " self.critic_lin1 = nn.Linear(25,1)\n",
139 | " def forward(self,x):\n",
140 | " x = F.normalize(x,dim=0)\n",
141 | " y = F.relu(self.l1(x))\n",
142 | " y = F.relu(self.l2(y))\n",
143 | " actor = F.log_softmax(self.actor_lin1(y),dim=0) #C\n",
144 | " c = F.relu(self.l3(y.detach()))\n",
145 | " critic = torch.tanh(self.critic_lin1(c)) #D\n",
146 | " return actor, critic #E"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "##### Listing 5.6"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 4,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "def worker(t, worker_model, counter, params):\n",
163 | " worker_env = gym.make(\"CartPole-v1\")\n",
164 | " worker_env.reset()\n",
165 | " worker_opt = optim.Adam(lr=1e-4,params=worker_model.parameters()) #A\n",
166 | " worker_opt.zero_grad()\n",
167 | " for i in range(params['epochs']):\n",
168 | " worker_opt.zero_grad()\n",
169 | " values, logprobs, rewards = run_episode(worker_env,worker_model) #B \n",
170 | " actor_loss,critic_loss,eplen = update_params(worker_opt,values,logprobs,rewards) #C\n",
171 | " counter.value = counter.value + 1 #D"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "##### Listing 5.7"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 5,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "def run_episode(worker_env, worker_model):\n",
188 | " state = torch.from_numpy(worker_env.env.state).float() #A\n",
189 | " values, logprobs, rewards = [],[],[] #B\n",
190 | " done = False\n",
191 | " j=0\n",
192 | " while (done == False): #C\n",
193 | " j+=1\n",
194 | " policy, value = worker_model(state) #D\n",
195 | " values.append(value)\n",
196 | " logits = policy.view(-1)\n",
197 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
198 | " action = action_dist.sample() #E\n",
199 | " logprob_ = policy.view(-1)[action]\n",
200 | " logprobs.append(logprob_)\n",
201 | " state_, _, done, _, info = worker_env.step(action.detach().numpy())\n",
202 | " state = torch.from_numpy(state_).float()\n",
203 | " if done: #F\n",
204 | " reward = -10\n",
205 | " worker_env.reset()\n",
206 | " else:\n",
207 | " reward = 1.0\n",
208 | " rewards.append(reward)\n",
209 | " return values, logprobs, rewards"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "metadata": {},
215 | "source": [
216 | "##### Listing 5.8"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 6,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "def update_params(worker_opt,values,logprobs,rewards,clc=0.1,gamma=0.95):\n",
226 | " rewards = torch.Tensor(rewards).flip(dims=(0,)).view(-1) #A\n",
227 | " logprobs = torch.stack(logprobs).flip(dims=(0,)).view(-1)\n",
228 | " values = torch.stack(values).flip(dims=(0,)).view(-1)\n",
229 | " Returns = []\n",
230 | " ret_ = torch.Tensor([0])\n",
231 | " for r in range(rewards.shape[0]): #B\n",
232 | " ret_ = rewards[r] + gamma * ret_\n",
233 | " Returns.append(ret_)\n",
234 | " Returns = torch.stack(Returns).view(-1)\n",
235 | " Returns = F.normalize(Returns,dim=0)\n",
236 | " actor_loss = -1*logprobs * (Returns - values.detach()) #C\n",
237 | " critic_loss = torch.pow(values - Returns,2) #D\n",
238 | " loss = actor_loss.sum() + clc*critic_loss.sum() #E\n",
239 | " loss.backward()\n",
240 | " worker_opt.step()\n",
241 | " return actor_loss, critic_loss, len(rewards)"
242 | ]
243 | },
244 | {
245 | "cell_type": "markdown",
246 | "metadata": {},
247 | "source": [
248 | "##### Listing 5.5"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "name": "stderr",
258 | "output_type": "stream",
259 | "text": [
260 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
261 | " if not isinstance(terminated, (bool, np.bool8)):\n",
262 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
263 | " if not isinstance(terminated, (bool, np.bool8)):\n",
264 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
265 | " if not isinstance(terminated, (bool, np.bool8)):\n",
266 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
267 | " if not isinstance(terminated, (bool, np.bool8)):\n",
268 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
269 | " if not isinstance(terminated, (bool, np.bool8)):\n",
270 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
271 | " if not isinstance(terminated, (bool, np.bool8)):\n",
272 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/utils/passive_env_checker.py:233: DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n",
273 | " if not isinstance(terminated, (bool, np.bool8)):\n"
274 | ]
275 | }
276 | ],
277 | "source": [
278 | "MasterNode = ActorCritic() #A\n",
279 | "MasterNode.share_memory() #B\n",
280 | "processes = [] #C\n",
281 | "params = {\n",
282 | " 'epochs':1000,\n",
283 | " 'n_workers':7,\n",
284 | "}\n",
285 | "counter = mp.Value('i',0) #D\n",
286 | "for i in range(params['n_workers']):\n",
287 | " p = mp.Process(target=worker, args=(i,MasterNode,counter,params)) #E\n",
288 | " p.start() \n",
289 | " processes.append(p)\n",
290 | "for p in processes: #F\n",
291 | " p.join()\n",
292 | "for p in processes: #G\n",
293 | " p.terminate()\n",
294 | " \n",
295 | "print(counter.value,processes[1].exitcode) #H"
296 | ]
297 | },
298 | {
299 | "cell_type": "markdown",
300 | "metadata": {},
301 | "source": [
302 | "##### Test the trained agent"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 9,
308 | "metadata": {},
309 | "outputs": [
310 | {
311 | "name": "stderr",
312 | "output_type": "stream",
313 | "text": [
314 | "/home/don/git/DeepReinforcementLearningInAction/venv/lib/python3.10/site-packages/gym/envs/classic_control/cartpole.py:211: UserWarning: \u001b[33mWARN: You are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym(\"CartPole-v1\", render_mode=\"rgb_array\")\u001b[0m\n",
315 | " gym.logger.warn(\n"
316 | ]
317 | },
318 | {
319 | "name": "stdout",
320 | "output_type": "stream",
321 | "text": [
322 | "Lost\n",
323 | "Lost\n",
324 | "Lost\n",
325 | "Lost\n"
326 | ]
327 | }
328 | ],
329 | "source": [
330 | "env = gym.make(\"CartPole-v1\")\n",
331 | "env.reset()\n",
332 | "\n",
333 | "for i in range(100):\n",
334 | " state_ = np.array(env.env.state)\n",
335 | " state = torch.from_numpy(state_).float()\n",
336 | " logits,value = MasterNode(state)\n",
337 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
338 | " action = action_dist.sample()\n",
339 | " state2, reward, done, info, _ = env.step(action.detach().numpy())\n",
340 | " if done:\n",
341 | " print(\"Lost\")\n",
342 | " env.reset()\n",
343 | " state_ = np.array(env.env.state)\n",
344 | " state = torch.from_numpy(state_).float()\n",
345 | " env.render()"
346 | ]
347 | },
348 | {
349 | "cell_type": "markdown",
350 | "metadata": {},
351 | "source": [
352 | "##### Listing 5.9"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 10,
358 | "metadata": {},
359 | "outputs": [],
360 | "source": [
361 | "def run_episode(worker_env, worker_model, N_steps=10):\n",
362 | " raw_state = np.array(worker_env.env.state)\n",
363 | " state = torch.from_numpy(raw_state).float()\n",
364 | " values, logprobs, rewards = [],[],[]\n",
365 | " done = False\n",
366 | " j=0\n",
367 | " G=torch.Tensor([0]) #A\n",
368 | " while (j < N_steps and done == False): #B\n",
369 | " j+=1\n",
370 | " policy, value = worker_model(state)\n",
371 | " values.append(value)\n",
372 | " logits = policy.view(-1)\n",
373 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
374 | " action = action_dist.sample()\n",
375 | " logprob_ = policy.view(-1)[action]\n",
376 | " logprobs.append(logprob_)\n",
377 | " state_, _, done, info = worker_env.step(action.detach().numpy())\n",
378 | " state = torch.from_numpy(state_).float()\n",
379 | " if done:\n",
380 | " reward = -10\n",
381 | " worker_env.reset()\n",
382 | " else: #C\n",
383 | " reward = 1.0\n",
384 | " G = value.detach()\n",
385 | " rewards.append(reward)\n",
386 | " return values, logprobs, rewards, G"
387 | ]
388 | },
389 | {
390 | "cell_type": "markdown",
391 | "metadata": {},
392 | "source": [
393 | "##### Listing 5.10"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "execution_count": 11,
399 | "metadata": {},
400 | "outputs": [
401 | {
402 | "name": "stdout",
403 | "output_type": "stream",
404 | "text": [
405 | "No bootstrapping\n",
406 | "0.010000000000000009 1.99\n",
407 | "With bootstrapping\n",
408 | "0.9901 2.9701\n"
409 | ]
410 | }
411 | ],
412 | "source": [
413 | "#Simulated rewards for 3 steps\n",
414 | "r1 = [1,1,-1]\n",
415 | "r2 = [1,1,1]\n",
416 | "R1,R2 = 0.0,0.0\n",
417 | "#No bootstrapping\n",
418 | "for i in range(len(r1)-1,0,-1): \n",
419 | " R1 = r1[i] + 0.99*R1\n",
420 | "for i in range(len(r2)-1,0,-1):\n",
421 | " R2 = r2[i] + 0.99*R2\n",
422 | "print(\"No bootstrapping\")\n",
423 | "print(R1,R2)\n",
424 | "#With bootstrapping\n",
425 | "R1,R2 = 1.0,1.0\n",
426 | "for i in range(len(r1)-1,0,-1):\n",
427 | " R1 = r1[i] + 0.99*R1\n",
428 | "for i in range(len(r2)-1,0,-1):\n",
429 | " R2 = r2[i] + 0.99*R2\n",
430 | "print(\"With bootstrapping\")\n",
431 | "print(R1,R2)"
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": null,
437 | "metadata": {},
438 | "outputs": [],
439 | "source": []
440 | }
441 | ],
442 | "metadata": {
443 | "kernelspec": {
444 | "display_name": "Python 3 (ipykernel)",
445 | "language": "python",
446 | "name": "python3"
447 | },
448 | "language_info": {
449 | "codemirror_mode": {
450 | "name": "ipython",
451 | "version": 3
452 | },
453 | "file_extension": ".py",
454 | "mimetype": "text/x-python",
455 | "name": "python",
456 | "nbconvert_exporter": "python",
457 | "pygments_lexer": "ipython3",
458 | "version": "3.10.12"
459 | }
460 | },
461 | "nbformat": 4,
462 | "nbformat_minor": 4
463 | }
464 |
--------------------------------------------------------------------------------
/Chapter 6/buffer.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | class ExperienceReplay():
4 | def __init__(self):
5 | self.buffer = []
6 | self.buffer_size = 1000
7 |
8 | def add(self, data):
9 | self.buffer.extend(data)
10 | if len(self.buffer) > self.buffer_size: self.buffer = self.buffer[-self.buffer_size:]
11 |
12 | def sample(self, size):
13 | return random.sample(self.buffer, size)
14 |
15 | # buffer = sorted(self.buffer, key=lambda replay: abs(replay[3]) > 0, reverse=True)
16 | # p = np.array([0.99 ** i for i in range(len(buffer))])
17 | # p = p / sum(p)
18 | # sample_idxs = np.random.choice(np.arange(len(buffer)),size=size, p=p)
19 | # sample_output = [buffer[idx] for idx in sample_idxs]
20 | # # print(sample_output)
21 | # # sample_output = np.reshape(sample_output,(size,-1))
22 | # return sample_output
23 |
24 | def __len__(self):
25 | return len(self.buffer)
--------------------------------------------------------------------------------
/Chapter 6/main.py:
--------------------------------------------------------------------------------
1 | """
2 | python main.py
3 | """
4 |
5 | from argparse import ArgumentParser
6 | import torch
7 | from torch import nn
8 | import torch.nn.functional as F
9 | import numpy as np
10 | import gym
11 |
12 | class Agent(object):
13 | def __init__(self, env, state_space, action_space, weights=[], max_eps_length=500, trials=5):
14 | self.max_eps_length = max_eps_length
15 | self.trials = trials
16 | state_space = state_space[0] # add batch dimension
17 | self.state_space = state_space
18 | self.action_space = action_space
19 |
20 | self.weights = weights if weights else self._get_random_weights()
21 | self.fitness = self._get_fitness(env)
22 |
23 | def model(self, x):
24 | x = F.relu(torch.add(torch.mm(x, self.weights[0]),self.weights[1]))
25 | x = F.relu(torch.add(torch.mm(x, self.weights[2]), self.weights[3]))
26 | x = F.softmax(torch.add(torch.mm(x, self.weights[4]), self.weights[5]))
27 | return x
28 |
29 | def _get_random_weights(self):
30 | return [
31 | torch.rand(self.state_space, 10), # fc1 weights
32 | torch.rand(10), # fc1 bias
33 | torch.rand(10, 10), # fc2 weights
34 | torch.rand(10), # fc2 bias
35 | torch.rand(10, self.action_space), # fc3 weights
36 | torch.rand(self.action_space), # fc3 bias
37 | ]
38 |
39 | def _get_fitness(self, env):
40 | total_reward = 0
41 | for _ in range(self.trials):
42 | observation = env.reset()
43 | for i in range(self.max_eps_length):
44 | action = self.get_action(observation)
45 | observation, reward, done, info = env.step(action)
46 | total_reward += reward
47 | if done: break
48 | return total_reward / self.trials
49 |
50 | def get_action(self, state):
51 | act_prob = self.model(torch.Tensor(state.reshape(1,-1))).detach().numpy()[0] # use predict api when merged
52 | action = np.random.choice(range(len(act_prob)), p=act_prob)
53 | return action
54 |
55 | def save(self, save_file):
56 | self.mod.save_params(save_file)
57 |
58 |
59 | def cross(agent1, agent2, agent_config):
60 | num_params = len(agent1.weights)
61 | crossover_idx = np.random.randint(0, num_params)
62 | new_weights = agent1.weights[:crossover_idx] + agent2.weights[crossover_idx:]
63 | new_weights = mutate(new_weights)
64 | return Agent(weights=new_weights, **agent_config)
65 |
66 |
67 | def mutate(new_weights):
68 | num_params_to_update = np.random.randint(0, num_params) # num of params to change
69 | for i in range(num_params_to_update):
70 | n = np.random.randint(0, num_params)
71 | new_weights[n] = new_weights[n] + torch.rand(new_weights[n].size())
72 | return new_weights
73 |
74 |
75 |
76 | def breed(agent1, agent2, agent_config, generation_size=10):
77 | next_generation = [agent1, agent2]
78 |
79 | for _ in range(generation_size - 2):
80 | next_generation.append(cross(agent1, agent2, agent_config))
81 |
82 | return next_generation
83 |
84 | def reproduce(agents, agent_config, generation_size):
85 | top_agents = sorted(agents, reverse=True, key=lambda a: a.fitness)[:2]
86 | new_agents = breed(top_agents[0], top_agents[1], agent_config, generation_size)
87 | return new_agents
88 |
89 |
90 | def run(n_generations, generation_size, agent_config, save_file=None, render=False):
91 | agents = [Agent(**agent_config), Agent(**agent_config)]
92 | max_fitness = 0
93 | for i in range(n_generations):
94 | next_generation = reproduce(agents, agent_config, generation_size)
95 | ranked_generation = sorted(next_generation, reverse=True, key=lambda a : a.fitness)
96 | avg_fitness = (ranked_generation[0].fitness + ranked_generation[1].fitness) / 2
97 | print(i, avg_fitness)
98 | agents = next_generation
99 | if ranked_generation[0].fitness > max_fitness:
100 | max_fitness = ranked_generation[0].fitness
101 | # ranked_generation[0].save(args.save_file)
102 | test_agent(ranked_generation[0], agent_config, render)
103 |
104 |
105 | def test_agent(agent, agent_config, render):
106 | env = agent_config['env']
107 | obs = env.reset()
108 | total_reward = 0
109 | for i in range(agent_config['max_eps_length']):
110 | if render: env.render()
111 | action = agent.get_action(obs)
112 | obs, reward, done, info = env.step(action)
113 | total_reward += reward
114 | if done: break
115 | print('test', total_reward)
116 | env.close()
117 |
118 |
119 |
120 | if __name__ == '__main__':
121 | env_names = [e.id for e in gym.envs.registry.all()]
122 |
123 | parser = ArgumentParser()
124 | parser.add_argument('--n_generations', default=10000)
125 | parser.add_argument('--render', action='store_true')
126 | parser.add_argument('--generation_size', default=20)
127 | parser.add_argument('--max_eps_length', default=500)
128 | parser.add_argument('--trials', default=5)
129 | parser.add_argument('--env', default='CartPole-v1', choices=env_names)
130 | parser.add_argument('--save_file')
131 |
132 | args = parser.parse_args()
133 | env = gym.make(args.env)
134 |
135 | agent_config = {
136 | 'state_space' : env.observation_space.shape,
137 | 'action_space' : env.action_space.n,
138 | 'max_eps_length' : args.max_eps_length,
139 | 'trials' : args.trials,
140 | 'env': env,
141 | }
142 |
143 | run(args.n_generations, args.generation_size, agent_config, args.save_file, args.render)
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/Chapter 6/simulator.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import functional as F
3 |
4 | class SimulatorState(torch.nn.Module):
5 | def __init__(self):
6 | super(SimulatorState, self).__init__()
7 | self.conv1 = torch.nn.Conv2d(4, 8, kernel_size=3, padding=1)
8 | self.conv2 = torch.nn.Conv2d(8, 16, kernel_size=3, padding=1)
9 | self.conv3 = torch.nn.Conv2d(16, 32, kernel_size=3, padding=1)
10 |
11 | self.s_fc1 = torch.nn.Linear(512, 99)
12 |
13 | self.action_fc1 = torch.nn.Linear(4, 1)
14 |
15 | self.fc1 = torch.nn.Linear(100, 50)
16 | self.fc2 = torch.nn.Linear(50, 16)
17 |
18 | # self.s_fc1 = torch.nn.Linear(50, 16)
19 |
20 | self.reward_fc1 = torch.nn.Linear(50, 30)
21 | self.reward_fc2 = torch.nn.Linear(30, 20)
22 | self.reward_fc3 = torch.nn.Linear(20, 3)
23 |
24 | def forward(self, x):
25 | state = x[:, :64]
26 | a_x = x[:, 64:]
27 |
28 | num_batch = state[0]
29 |
30 | s_x = state.reshape(-1, 4, 4, 4)
31 |
32 | s_x = F.relu(self.conv1(s_x))
33 | s_x = F.relu(self.conv2(s_x))
34 | s_x = F.relu(self.conv3(s_x))
35 | s_x = s_x.view(-1, 8 * 64)
36 | s_x = self.s_fc1(s_x)
37 |
38 | a_x = self.action_fc1(a_x)
39 |
40 | x = torch.cat((s_x, a_x), dim=1)
41 |
42 | x = F.relu(self.fc1(x))
43 | x = self.fc2(x)
44 |
45 | # state_copy = state.reshape(-1,4,16).clone()
46 | # state_copy[np.arange(num_batch)][0][:] = 0
47 | # state_copy[np.arange(num_batch),s_x] = 1
48 |
49 | # r_x = F.relu(self.reward_fc1(state_copy))
50 | # r_x = F.relu(self.reward_fc2(r_x))
51 | # r_x = self.reward_fc3(r_x)
52 |
53 | return F.softmax(x)
54 |
55 |
56 | class SimulatorReward(torch.nn.Module):
57 | def __init__(self):
58 | super(SimulatorReward, self).__init__()
59 | self.conv1 = torch.nn.Conv2d(4, 8, kernel_size=3, padding=1)
60 | self.conv2 = torch.nn.Conv2d(8, 16, kernel_size=3, padding=1)
61 | self.conv3 = torch.nn.Conv2d(16, 32, kernel_size=3, padding=1)
62 |
63 | self.fc1 = torch.nn.Linear(512, 200)
64 | self.fc2 = torch.nn.Linear(200, 100)
65 | self.fc3 = torch.nn.Linear(100, 3)
66 |
67 | def forward(self, x):
68 | x = x.reshape(-1, 4, 4, 4)
69 | x = F.relu(self.conv1(x))
70 | x = F.relu(self.conv2(x))
71 | x = self.conv3(x)
72 |
73 | x = x.view(-1, 512)
74 |
75 | x = F.relu(self.fc1(x))
76 | x = F.relu(self.fc2(x))
77 | x = self.fc3(x)
78 |
79 | return F.softmax(x)
--------------------------------------------------------------------------------
/Chapter 8/script_8.py:
--------------------------------------------------------------------------------
1 | from nes_py.wrappers import JoypadSpace #A
2 | import gym_super_mario_bros
3 | from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT #B
4 | env = gym_super_mario_bros.make('SuperMarioBros-v3', apply_api_compatibility=True, render_mode="rgb_array")
5 | env = JoypadSpace(env, COMPLEX_MOVEMENT) #C
6 |
7 |
8 | done = True
9 | for step in range(2500): #D
10 | if done:
11 | state = env.reset()
12 | state, reward, done, _, info = env.step(env.action_space.sample())
13 | env.render()
14 | env.close()
15 |
16 |
17 | import matplotlib.pyplot as plt
18 | from skimage.transform import resize #A
19 | import numpy as np
20 |
21 | def downscale_obs(obs, new_size=(42,42), to_gray=True):
22 | if to_gray:
23 | return resize(obs, new_size, anti_aliasing=True).max(axis=2) #B
24 | else:
25 | return resize(obs, new_size, anti_aliasing=True)
26 |
27 |
28 | plt.imshow(env.render())
29 | plt.imshow(downscale_obs(env.render()))
30 |
31 |
32 | import torch
33 | from torch import nn
34 | from torch import optim
35 | import torch.nn.functional as F
36 | from collections import deque
37 |
38 | def prepare_state(state): #A
39 | return torch.from_numpy(downscale_obs(state, to_gray=True)).float().unsqueeze(dim=0)
40 |
41 |
42 | def prepare_multi_state(state1, state2): #B
43 | state1 = state1.clone()
44 | tmp = torch.from_numpy(downscale_obs(state2, to_gray=True)).float()
45 | state1[0][0] = state1[0][1]
46 | state1[0][1] = state1[0][2]
47 | state1[0][2] = tmp
48 | return state1
49 |
50 |
51 | def prepare_initial_state(state,N=3): #C
52 | state_ = torch.from_numpy(downscale_obs(state, to_gray=True)).float()
53 | tmp = state_.repeat((N,1,1))
54 | return tmp.unsqueeze(dim=0)
55 |
56 |
57 | def policy(qvalues, eps=None): #A
58 | if eps is not None:
59 | if torch.rand(1) < eps:
60 | return torch.randint(low=0,high=7,size=(1,))
61 | else:
62 | return torch.argmax(qvalues)
63 | else:
64 | return torch.multinomial(F.softmax(F.normalize(qvalues)), num_samples=1) #B
65 |
66 |
67 | from random import shuffle
68 | import torch
69 | from torch import nn
70 | from torch import optim
71 | import torch.nn.functional as F
72 |
73 | class ExperienceReplay:
74 | def __init__(self, N=500, batch_size=100):
75 | self.N = N #A
76 | self.batch_size = batch_size #B
77 | self.memory = []
78 | self.counter = 0
79 |
80 | def add_memory(self, state1, action, reward, state2):
81 | self.counter +=1
82 | if self.counter % 500 == 0: #C
83 | self.shuffle_memory()
84 |
85 | if len(self.memory) < self.N: #D
86 | self.memory.append( (state1, action, reward, state2) )
87 | else:
88 | rand_index = np.random.randint(0,self.N-1)
89 | self.memory[rand_index] = (state1, action, reward, state2)
90 |
91 | def shuffle_memory(self): #E
92 | shuffle(self.memory)
93 |
94 | def get_batch(self): #F
95 | if len(self.memory) < self.batch_size:
96 | batch_size = len(self.memory)
97 | else:
98 | batch_size = self.batch_size
99 | if len(self.memory) < 1:
100 | print("Error: No data in memory.")
101 | return None
102 | #G
103 | ind = np.random.choice(np.arange(len(self.memory)),batch_size,replace=False)
104 | batch = [self.memory[i] for i in ind] #batch is a list of tuples
105 | state1_batch = torch.stack([x[0].squeeze(dim=0) for x in batch],dim=0)
106 | action_batch = torch.Tensor([x[1] for x in batch]).long()
107 | reward_batch = torch.Tensor([x[2] for x in batch])
108 | state2_batch = torch.stack([x[3].squeeze(dim=0) for x in batch],dim=0)
109 | return state1_batch, action_batch, reward_batch, state2_batch
110 |
111 |
112 | class Phi(nn.Module): #A
113 | def __init__(self):
114 | super(Phi, self).__init__()
115 | self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3), stride=2, padding=1)
116 | self.conv2 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
117 | self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
118 | self.conv4 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
119 |
120 | def forward(self,x):
121 | x = F.normalize(x)
122 | y = F.elu(self.conv1(x))
123 | y = F.elu(self.conv2(y))
124 | y = F.elu(self.conv3(y))
125 | y = F.elu(self.conv4(y)) #size [1, 32, 3, 3] batch, channels, 3 x 3
126 | y = y.flatten(start_dim=1) #size N, 288
127 | return y
128 |
129 | class Gnet(nn.Module): #B
130 | def __init__(self):
131 | super(Gnet, self).__init__()
132 | self.linear1 = nn.Linear(576,256)
133 | self.linear2 = nn.Linear(256,12)
134 |
135 | def forward(self, state1,state2):
136 | x = torch.cat( (state1, state2) ,dim=1)
137 | y = F.relu(self.linear1(x))
138 | y = self.linear2(y)
139 | y = F.softmax(y,dim=1)
140 | return y
141 |
142 | class Fnet(nn.Module): #C
143 | def __init__(self):
144 | super(Fnet, self).__init__()
145 | self.linear1 = nn.Linear(300,256)
146 | self.linear2 = nn.Linear(256,288)
147 |
148 | def forward(self,state,action):
149 | action_ = torch.zeros(action.shape[0],12) #D
150 | indices = torch.stack( (torch.arange(action.shape[0]), action.squeeze()), dim=0)
151 | indices = indices.tolist()
152 | action_[indices] = 1.
153 | x = torch.cat( (state,action_) ,dim=1)
154 | y = F.relu(self.linear1(x))
155 | y = self.linear2(y)
156 | return y
157 |
158 |
159 | class Qnetwork(nn.Module):
160 | def __init__(self):
161 | super(Qnetwork, self).__init__()
162 | #in_channels, out_channels, kernel_size, stride=1, padding=0
163 | self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=(3,3), stride=2, padding=1)
164 | self.conv2 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
165 | self.conv3 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
166 | self.conv4 = nn.Conv2d(32, 32, kernel_size=(3,3), stride=2, padding=1)
167 | self.linear1 = nn.Linear(288,100)
168 | self.linear2 = nn.Linear(100,12)
169 |
170 | def forward(self,x):
171 | x = F.normalize(x)
172 | y = F.elu(self.conv1(x))
173 | y = F.elu(self.conv2(y))
174 | y = F.elu(self.conv3(y))
175 | y = F.elu(self.conv4(y))
176 | y = y.flatten(start_dim=2)
177 | y = y.view(y.shape[0], -1, 32)
178 | y = y.flatten(start_dim=1)
179 | y = F.elu(self.linear1(y))
180 | y = self.linear2(y) #size N, 12
181 | return y
182 |
183 |
184 | params = {
185 | 'batch_size':150,
186 | 'beta':0.2,
187 | 'lambda':0.1,
188 | 'eta': 1.0,
189 | 'gamma':0.2,
190 | 'max_episode_len':100,
191 | 'min_progress':15,
192 | 'action_repeats':6,
193 | 'frames_per_state':3
194 | }
195 |
196 | replay = ExperienceReplay(N=1000, batch_size=params['batch_size'])
197 | Qmodel = Qnetwork()
198 | encoder = Phi()
199 | forward_model = Fnet()
200 | inverse_model = Gnet()
201 | forward_loss = nn.MSELoss(reduction='none')
202 | inverse_loss = nn.CrossEntropyLoss(reduction='none')
203 | qloss = nn.MSELoss()
204 | all_model_params = list(Qmodel.parameters()) + list(encoder.parameters()) #A
205 | all_model_params += list(forward_model.parameters()) + list(inverse_model.parameters())
206 | opt = optim.Adam(lr=0.001, params=all_model_params)
207 |
208 |
209 | def loss_fn(q_loss, inverse_loss, forward_loss):
210 | loss_ = (1 - params['beta']) * inverse_loss
211 | loss_ += params['beta'] * forward_loss
212 | loss_ = loss_.sum() / loss_.flatten().shape[0]
213 | loss = loss_ + params['lambda'] * q_loss
214 | return loss
215 |
216 | def reset_env():
217 | """
218 | Reset the environment and return a new initial state
219 | """
220 | env.reset()
221 | state1 = prepare_initial_state(env.render('rgb_array'))
222 | return state1
223 |
224 |
225 | def ICM(state1, action, state2, forward_scale=1., inverse_scale=1e4):
226 | state1_hat = encoder(state1) #A
227 | state2_hat = encoder(state2)
228 | state2_hat_pred = forward_model(state1_hat.detach(), action.detach()) #B
229 | forward_pred_err = forward_scale * forward_loss(state2_hat_pred, \
230 | state2_hat.detach()).sum(dim=1).unsqueeze(dim=1)
231 | pred_action = inverse_model(state1_hat, state2_hat) #C
232 | inverse_pred_err = inverse_scale * inverse_loss(pred_action, \
233 | action.detach().flatten()).unsqueeze(dim=1)
234 | return forward_pred_err, inverse_pred_err
235 |
236 |
237 | def minibatch_train(use_extrinsic=True):
238 | state1_batch, action_batch, reward_batch, state2_batch = replay.get_batch()
239 | action_batch = action_batch.view(action_batch.shape[0],1) #A
240 | reward_batch = reward_batch.view(reward_batch.shape[0],1)
241 |
242 | forward_pred_err, inverse_pred_err = ICM(state1_batch, action_batch, state2_batch) #B
243 | i_reward = (1. / params['eta']) * forward_pred_err #C
244 | reward = i_reward.detach() #D
245 | if use_explicit: #E
246 | reward += reward_batch
247 | qvals = Qmodel(state2_batch) #F
248 | reward += params['gamma'] * torch.max(qvals)
249 | reward_pred = Qmodel(state1_batch)
250 | reward_target = reward_pred.clone()
251 | indices = torch.stack( (torch.arange(action_batch.shape[0]), \
252 | action_batch.squeeze()), dim=0)
253 | indices = indices.tolist()
254 | reward_target[indices] = reward.squeeze()
255 | q_loss = 1e5 * qloss(F.normalize(reward_pred), F.normalize(reward_target.detach()))
256 | return forward_pred_err, inverse_pred_err, q_loss
257 |
258 |
259 | epochs = 5000
260 | env.reset()
261 | state1 = prepare_initial_state(env.render())
262 | eps=0.15
263 | losses = []
264 | episode_length = 0
265 | switch_to_eps_greedy = 1000
266 | state_deque = deque(maxlen=params['frames_per_state'])
267 | e_reward = 0.
268 | last_x_pos = env.env.env._x_position #A
269 | ep_lengths = []
270 | use_explicit = False
271 | for i in range(epochs):
272 | opt.zero_grad()
273 | episode_length += 1
274 | q_val_pred = Qmodel(state1) #B
275 | if i > switch_to_eps_greedy: #C
276 | action = int(policy(q_val_pred,eps))
277 | else:
278 | action = int(policy(q_val_pred))
279 | for j in range(params['action_repeats']): #D
280 | state2, e_reward_, done, info = env.step(action)
281 | last_x_pos = info['x_pos']
282 | if done:
283 | state1 = reset_env()
284 | break
285 | e_reward += e_reward_
286 | state_deque.append(prepare_state(state2))
287 | state2 = torch.stack(list(state_deque),dim=1) #E
288 | replay.add_memory(state1, action, e_reward, state2) #F
289 | e_reward = 0
290 | if episode_length > params['max_episode_len']: #G
291 | if (info['x_pos'] - last_x_pos) < params['min_progress']:
292 | done = True
293 | else:
294 | last_x_pos = info['x_pos']
295 | if done:
296 | ep_lengths.append(info['x_pos'])
297 | state1 = reset_env()
298 | last_x_pos = env.env.env._x_position
299 | episode_length = 0
300 | else:
301 | state1 = state2
302 | if len(replay.memory) < params['batch_size']:
303 | continue
304 | forward_pred_err, inverse_pred_err, q_loss = minibatch_train(use_extrinsic=False) #H
305 | loss = loss_fn(q_loss, forward_pred_err, inverse_pred_err) #I
306 | loss_list = (q_loss.mean(), forward_pred_err.flatten().mean(),\
307 | inverse_pred_err.flatten().mean())
308 | losses.append(loss_list)
309 | loss.backward()
310 | opt.step()
311 |
312 |
313 | done = True
314 | state_deque = deque(maxlen=params['frames_per_state'])
315 | for step in range(5000):
316 | if done:
317 | env.reset()
318 | state1 = prepare_initial_state(env.render('rgb_array'))
319 | q_val_pred = Qmodel(state1)
320 | action = int(policy(q_val_pred,eps))
321 | state2, reward, done, info = env.step(action)
322 | state2 = prepare_multi_state(state1,state2)
323 | state1=state2
324 | env.render()
325 |
--------------------------------------------------------------------------------
/Chapter 9/MAgent/build/render/README.txt:
--------------------------------------------------------------------------------
1 | Folder needed to run the 15th cell of the notebook.
2 |
--------------------------------------------------------------------------------
/Environments/GridBoard.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import sys
4 |
5 | def randPair(s,e):
6 | return np.random.randint(s,e), np.random.randint(s,e)
7 |
8 | class BoardPiece:
9 |
10 | def __init__(self, name, code, pos):
11 | self.name = name #name of the piece
12 | self.code = code #an ASCII character to display on the board
13 | self.pos = pos #2-tuple e.g. (1,4)
14 |
15 | class BoardMask:
16 |
17 | def __init__(self, name, mask, code):
18 | self.name = name
19 | self.mask = mask
20 | self.code = code
21 |
22 | def get_positions(self): #returns tuple of arrays
23 | return np.nonzero(self.mask)
24 |
25 | def zip_positions2d(positions): #positions is tuple of two arrays
26 | x,y = positions
27 | return list(zip(x,y))
28 |
29 | class GridBoard:
30 |
31 | def __init__(self, size=4):
32 | self.size = size #Board dimensions, e.g. 4 x 4
33 | self.components = {} #name : board piece
34 | self.masks = {}
35 |
36 | def addPiece(self, name, code, pos=(0,0)):
37 | newPiece = BoardPiece(name, code, pos)
38 | self.components[name] = newPiece
39 |
40 | #basically a set of boundary elements
41 | def addMask(self, name, mask, code):
42 | #mask is a 2D-numpy array with 1s where the boundary elements are
43 | newMask = BoardMask(name, mask, code)
44 | self.masks[name] = newMask
45 |
46 | def movePiece(self, name, pos):
47 | move = True
48 | for _, mask in self.masks.items():
49 | if pos in zip_positions2d(mask.get_positions()):
50 | move = False
51 | if move:
52 | self.components[name].pos = pos
53 |
54 | def delPiece(self, name):
55 | del self.components['name']
56 |
57 | def render(self):
58 | dtype = '= 4:
7 | self.board = GridBoard(size=size)
8 | else:
9 | print("Minimum board size is 4. Initialized to size 4.")
10 | self.board = GridBoard(size=4)
11 |
12 | #Add pieces, positions will be updated later
13 | self.board.addPiece('Player','P',(0,0))
14 | self.board.addPiece('Goal','+',(1,0))
15 | self.board.addPiece('Pit','-',(2,0))
16 | self.board.addPiece('Wall','W',(3,0))
17 |
18 | if mode == 'static':
19 | self.initGridStatic()
20 | elif mode == 'player':
21 | self.initGridPlayer()
22 | else:
23 | self.initGridRand()
24 |
25 | #Initialize stationary grid, all items are placed deterministically
26 | def initGridStatic(self):
27 | #Setup static pieces
28 | self.board.components['Player'].pos = (0,3) #Row, Column
29 | self.board.components['Goal'].pos = (0,0)
30 | self.board.components['Pit'].pos = (0,1)
31 | self.board.components['Wall'].pos = (1,1)
32 |
33 | #Check if board is initialized appropriately (no overlapping pieces)
34 | #also remove impossible-to-win boards
35 | def validateBoard(self):
36 | valid = True
37 |
38 | player = self.board.components['Player']
39 | goal = self.board.components['Goal']
40 | wall = self.board.components['Wall']
41 | pit = self.board.components['Pit']
42 |
43 | all_positions = [piece for name,piece in self.board.components.items()]
44 | all_positions = [player.pos, goal.pos, wall.pos, pit.pos]
45 | if len(all_positions) > len(set(all_positions)):
46 | return False
47 |
48 | corners = [(0,0),(0,self.board.size), (self.board.size,0), (self.board.size,self.board.size)]
49 | #if player is in corner, can it move? if goal is in corner, is it blocked?
50 | if player.pos in corners or goal.pos in corners:
51 | val_move_pl = [self.validateMove('Player', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
52 | val_move_go = [self.validateMove('Goal', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
53 | if 0 not in val_move_pl or 0 not in val_move_go:
54 | #print(self.display())
55 | #print("Invalid board. Re-initializing...")
56 | valid = False
57 |
58 | return valid
59 |
60 | #Initialize player in random location, but keep wall, goal and pit stationary
61 | def initGridPlayer(self):
62 | #height x width x depth (number of pieces)
63 | self.initGridStatic()
64 | #place player
65 | self.board.components['Player'].pos = randPair(0,self.board.size)
66 |
67 | if (not self.validateBoard()):
68 | #print('Invalid grid. Rebuilding..')
69 | self.initGridPlayer()
70 |
71 | #Initialize grid so that goal, pit, wall, player are all randomly placed
72 | def initGridRand(self):
73 | #height x width x depth (number of pieces)
74 | self.board.components['Player'].pos = randPair(0,self.board.size)
75 | self.board.components['Goal'].pos = randPair(0,self.board.size)
76 | self.board.components['Pit'].pos = randPair(0,self.board.size)
77 | self.board.components['Wall'].pos = randPair(0,self.board.size)
78 |
79 | if (not self.validateBoard()):
80 | #print('Invalid grid. Rebuilding..')
81 | self.initGridRand()
82 |
83 | def validateMove(self, piece, addpos=(0,0)):
84 | outcome = 0 #0 is valid, 1 invalid, 2 lost game
85 | pit = self.board.components['Pit'].pos
86 | wall = self.board.components['Wall'].pos
87 | new_pos = addTuple(self.board.components[piece].pos, addpos)
88 | if new_pos == wall:
89 | outcome = 1 #block move, player can't move to wall
90 | elif max(new_pos) > (self.board.size-1): #if outside bounds of board
91 | outcome = 1
92 | elif min(new_pos) < 0: #if outside bounds
93 | outcome = 1
94 | elif new_pos == pit:
95 | outcome = 2
96 |
97 | return outcome
98 |
99 | def makeMove(self, action):
100 | #need to determine what object (if any) is in the new grid spot the player is moving to
101 | #actions in {u,d,l,r}
102 | def checkMove(addpos):
103 | if self.validateMove('Player', addpos) in [0,2]:
104 | new_pos = addTuple(self.board.components['Player'].pos, addpos)
105 | self.board.movePiece('Player', new_pos)
106 |
107 | if action == 'u': #up
108 | checkMove((-1,0))
109 | elif action == 'd': #down
110 | checkMove((1,0))
111 | elif action == 'l': #left
112 | checkMove((0,-1))
113 | elif action == 'r': #right
114 | checkMove((0,1))
115 | else:
116 | pass
117 |
118 | def reward(self):
119 | if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
120 | return -1
121 | elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
122 | return 1
123 | else:
124 | return 0
125 |
126 | def display(self):
127 | return self.board.render()
128 |
--------------------------------------------------------------------------------
/Errata.md:
--------------------------------------------------------------------------------
1 | # Errata
2 | Here we keep an updated list of book errata listed by chapter. The errata will be addressed periodically in the eBook and liveBook versions and less frequently in print versions. We highly recommend using the GitHub code when following along the book as it is very difficult to maintain code within the book text.
3 |
4 | ## Chapter 2
5 |
6 | ### Section 2.4.1
7 | Code should be:
8 | >>> x = torch.Tensor([2,4]) #input data
9 | >>> m = torch.randn(2, requires_grad=True) #parameter 1
10 | >>> b = torch.randn(1, requires_grad=True) #parameter 2
11 | >>> y = m*x+b #linear model
12 | >>> y_known = torch.Tensor([5,9])
13 | >>> loss = (torch.sum(y_known - y))**2 #loss function
14 | >>> loss.backward() #calculate gradients
15 | >>> m.grad
16 | tensor([ -51.9402, -103.8803])
17 |
18 | ### Listing 2.10
19 | Code should be:
20 | def train(env, epochs=10000, learning_rate=1e-3):
21 |
22 | ### Section 2.5
23 | Text should say: “When we train this network for 10,000 epochs”
24 |
25 | ### In Summary, in bullet point 5
26 | “with probability ε – 1” should be “with probability 1 - ε “
27 |
28 | ## Chapter 3
29 | ### Listing 3.7
30 | model2=model2= copy.deepcopy(model)
31 | ### Listing 3.8
32 | First line of code should be: from IPython.display import clear_output
33 |
34 |
35 | ## Chapter 4
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/Errata/Chapter 4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning in Action\n",
8 | "### by Alex Zai and Brandon Brown\n",
9 | "\n",
10 | "#### Chapter 4"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "##### Supplemental"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 28,
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "from matplotlib import pyplot as plt\n",
27 | "\n",
28 | "def moving_average(x,step=5,window=50):\n",
29 | " num = (x.shape[0] - window) / step\n",
30 | " num = int(num)\n",
31 | " avg = np.zeros(num)\n",
32 | " slider = np.ones(window) / window\n",
33 | " start = 0\n",
34 | " for i in range(num):\n",
35 | " end = start + window\n",
36 | " avg[i] = slider @ x[start:end]\n",
37 | " start = start + step\n",
38 | " return avg"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "##### Listing 4.1"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 2,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "from gym import envs\n",
55 | "#envs.registry.all()"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "##### Listing 4.2"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 3,
68 | "metadata": {},
69 | "outputs": [],
70 | "source": [
71 | "import gym\n",
72 | "env = gym.make('CartPole-v0')"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "##### Listing 4.3"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 4,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": [
88 | "state1 = env.reset()\n",
89 | "action = env.action_space.sample()\n",
90 | "state, reward, done, info = env.step(action)"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "##### Listing 4.4"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 30,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "import gym\n",
107 | "import numpy as np\n",
108 | "import torch\n",
109 | " \n",
110 | "l1 = 4\n",
111 | "l2 = 150\n",
112 | "l3 = 2\n",
113 | " \n",
114 | "model = torch.nn.Sequential(\n",
115 | " torch.nn.Linear(l1, l2),\n",
116 | " torch.nn.LeakyReLU(),\n",
117 | " torch.nn.Linear(l2, l3),\n",
118 | " torch.nn.Softmax(dim=0)\n",
119 | ")\n",
120 | " \n",
121 | "learning_rate = 0.009\n",
122 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "##### Listing 4.5"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": null,
135 | "metadata": {},
136 | "outputs": [],
137 | "source": [
138 | "pred = model(torch.from_numpy(state1).float())\n",
139 | "action = np.random.choice(np.array([0,1]), p=pred.data.numpy())\n",
140 | "state2, reward, done, info = env.step(action)"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "##### Listing 4.6"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 32,
153 | "metadata": {},
154 | "outputs": [],
155 | "source": [
156 | "def discount_rewards(rewards, gamma=0.99):\n",
157 | " lenr = len(rewards)\n",
158 | " disc_return = torch.pow(gamma,torch.arange(lenr).float()) * rewards\n",
159 | " disc_return /= disc_return.max()\n",
160 | " return disc_return"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "##### Listing 4.7"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 33,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "def loss_fn(preds, r):\n",
177 | " return -1 * torch.sum(r * torch.log(preds))"
178 | ]
179 | },
180 | {
181 | "cell_type": "markdown",
182 | "metadata": {},
183 | "source": [
184 | "##### Listing 4.8"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 34,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "MAX_DUR = 200\n",
194 | "MAX_EPISODES = 500\n",
195 | "gamma = 0.99\n",
196 | "score = []\n",
197 | "for episode in range(MAX_EPISODES):\n",
198 | " curr_state = env.reset()\n",
199 | " done = False\n",
200 | " transitions = []\n",
201 | "\n",
202 | " for t in range(MAX_DUR):\n",
203 | " act_prob = model(torch.from_numpy(curr_state).float())\n",
204 | " action = np.random.choice(np.array([0,1]), p=act_prob.data.numpy())\n",
205 | " prev_state = curr_state\n",
206 | " curr_state, _, done, info = env.step(action)\n",
207 | " transitions.append((prev_state, action, t+1))\n",
208 | " if done:\n",
209 | " break\n",
210 | " \n",
211 | " ep_len = len(transitions)\n",
212 | " score.append(ep_len)\n",
213 | " reward_batch = torch.Tensor([r for (s,a,r) in\n",
214 | " transitions]).flip(dims=(0,))\n",
215 | " disc_rewards = discount_rewards(reward_batch)\n",
216 | " state_batch = torch.Tensor([s for (s,a,r) in transitions])\n",
217 | " action_batch = torch.Tensor([a for (s,a,r) in transitions])\n",
218 | " pred_batch = model(state_batch)\n",
219 | " prob_batch = pred_batch.gather(dim=1,index=action_batch.long().view(-1,1)).squeeze()\n",
220 | " loss = loss_fn(prob_batch, disc_rewards)\n",
221 | " optimizer.zero_grad()\n",
222 | " loss.backward()\n",
223 | " optimizer.step()"
224 | ]
225 | },
226 | {
227 | "cell_type": "code",
228 | "execution_count": 35,
229 | "metadata": {},
230 | "outputs": [
231 | {
232 | "data": {
233 | "text/plain": [
234 | "[]"
235 | ]
236 | },
237 | "execution_count": 35,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | },
241 | {
242 | "data": {
243 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3deXxU1f3/8dcnOwESSAgQCBiWALIJEjZFtC4VV7Raq21dsXRxqdpWbf22tv3W1vp1qa3+bKni0lqUqlXcRWsF2QPIvoWEJYHsELKRbc7vjxltIgkJ2SaZeT8fDx6ZOffOzYd5TN65Offcc8w5h4iIBJYQfxcgIiJtT+EuIhKAFO4iIgFI4S4iEoAU7iIiASjM3wUA9OnTxyUnJ/u7DBGRLmXt2rUFzrmEhrZ1inBPTk4mLS3N32WIiHQpZra3sW3qlhERCUAKdxGRAKRwFxEJQAp3EZEApHAXEQlATYa7mQ0ys4/NbKuZbTGzH/ra48xssZnt8n3t7Ws3M/ujmaWb2UYzO7W9/xMiIlJfc87ca4AfOedGA9OAW8xsNHAv8JFzLgX4yPcc4AIgxfdvLvBUm1ctIiLH1eQ4d+fcQeCg73GJmW0DBgKzgbN8uz0P/Ae4x9f+gvPOJbzSzHqZWaLvOCIiAaGyppajVZ4vnlfVeqioqqWsqoayyhoKy6oo8v2rrK5t9DipyXHMHNHgfUitckI3MZlZMjARWAX0qxPYOUA/3+OBwP46L8vytdULdzObi/fMnsGDB59g2SIi/rN27yFueHY1JUdrmrW/WePbvnfmMP+Gu5n1AF4F7nDOHbE61TrnnJmd0Kofzrl5wDyA1NRUrRgiIl3CjpwSbnpuDXHdI7jj3BF8noRhoUZ0RBjdI0KJjgwjLjqC+B4RxHWPICo8tMPrbFa4m1k43mB/0Tn3mq859/PuFjNLBPJ87dnAoDovT/K1iYh0afsKy7n2mVVEhYfw9zlTGRQX7e+SGtVkuJv3FP0ZYJtz7tE6mxYB1wMP+r6+Uaf9VjN7CZgKFKu/XUS6gsqaWj7cmkdFA33kzjme+DidyhoPC787vVMHOzTvzP104Fpgk5l95mv7Gd5QX2hmc4C9wFW+be8AFwLpQDlwY5tWLCLSDjwexx0vfca7m3Ma3adHZBgvzJnCyP49O7CylmnOaJlPgcYuB5zTwP4OuKWVdYmIdKjfvrONdzfncM+sUVw8PrHBfXpFh9MzKryDK2uZTjHlr4iIPz23LJOnP83khtOS+d6ZQ7HjDW/pIjT9gIgEtQ+25PCrt7Zy3uh+/Pzi0QER7KBwF5EgtmJ3IbctWM/4gbE8fvUEQkMCI9hB4S4iQWrD/sPc/PwaBsdF8+yNU4iOCKxeaoW7iASdnbklXP/sauJ6RPC3OVOJ6x7h75LanMJdRIJKcXk13356FRGhIbw4Zxr9Y6P8XVK7CKy/Q0REmvDeloPklVTyyvemMzi+c9+I1Bo6cxeRoPLOphwGxXVj0km9/V1Ku1K4i0jQKK6oZvnuAi4YmxgwQx4bo3AXkaDx0bZcqmsds8b293cp7U7hLiJB493NOSTGRjEhqZe/S2l3CncRCQqllTV8sjOf88f0JySAblZqjMJdRILCx9vzqKrxcEEQdMmAwl1EgsR7m3Po0yOC1OQ4f5fSIRTuIhLwKqpq+XhHHueP6R9Q88ccj8JdRALeJzvzKa+q5YKxDc/THogU7iIS8N7ceIDe0eFMHRocXTLQjHA3s/lmlmdmm+u0TTCzlWb2mZmlmdkUX7uZ2R/NLN3MNprZqe1ZvIhIU4rKqvhgSw6XTRxIeGjwnM8253/6HDDrS20PAb9yzk0AfuF7DnABkOL7Nxd4qm3KFBFpmdfWZVFd67h68mB/l9Khmgx359wSoOjLzUCM73EscMD3eDbwgvNaCfQys+Dp5BKRTsU5x4LV+5g4uFeXWNS6LbV0Vsg7gPfN7GG8vyBO87UPBPbX2S/L13bwywcws7l4z+4ZPDi4fqOKSMdYu/cQu/PLeOiK8f4upcO1tAPq+8CdzrlBwJ3AMyd6AOfcPOdcqnMuNSEhoYVliIg0bsHq/XSPCOWi8cHXgdDScL8eeM33+J/AFN/jbGBQnf2SfG0iIh3qyNFq3t50gEsnDKR7ZPAtXdHScD8AnOl7fDawy/d4EXCdb9TMNKDYOXdMl4yISHt747MDHK32cM2UQU3vHICa/HVmZguAs4A+ZpYF3A98B3jczMKAo/j6zoF3gAuBdKAcuLEdahYROS7nHC+t3sfJiTGMGxjr73L8oslwd85d08imSQ3s64BbWluUiEhrLN1VwJYDR3jg8rEBvyhHY4JnRL+IBAXnHI8s3snAXt24clKSv8vxG4W7iASUj7blsWH/YW47eziRYaH+LsdvFO4ickIKSiu56s8r+GBLjr9LOYbH43h08U5Oio/miiA+a4eW38QkIkGo1uO4fcF6Vu8pYm9RGTNS+hAd0Xli5P0tOWw9eIRHrzolqOaRaUhw/+9F5IQ8ungHy3cXct30k8g9Uslfl2T6u6Qv1PrO2ocldGf2hIH+LsfvFO4i0iwfbs3lyY93c82UQfx69lguGNufvyzZTd6Ro/4uDYC3Nh5gV14pd5w7ImgW5DgehbuINCk9r4S7Fn7G2IEx3H/JGADuvWAU1bUeHvlgp5+r8/a1P/lxOil9e3DRuOCbaqAhCncRaVRNrYe/fLKbi/74KaEhxlPfmkRUuHcEyknx3bluejIL1+5n28Ejfq1z8bZcduaWcstXhhOis3ZA4S4ijdiec4SvPbWc3727nZkjEnjvjpkMiouut8/tZ6cQ2y2ce1/bRK6fumec8561D46L5uIgnCCsMQp3ETlG9uEKrvrzCrIPVfDENycy79pJ9IuJOma/2Ohw/nf2WLYfPMK5j37CwrT9eG9U7zhLdhWwMauY7581jLAgHyFTl94JEamn1uO46+XPqPU4XvvBaVw8fsBxb+G/5JQBvHfHTE7uH8Pdr2zk+mfXsDKjEI+nY0L+yX+nkxgbxddO1QiZuhTuIlLP00szWJVZxP2XjuGk+O7Nes2QPt15ae40fj17DOv2HuLqeSs546GP+b/3t5NT3H7dNaszi1i9p4i5M4cG9d2oDVG4i8gXNmcX8/AHO7hgbH++foJ3eIaEGNdNT2b1fefwh29MYHjfHjz1n91cP391u5zFezyOxxbvJL57RNCtj9ocCncRAeBodS13vPwZcd0j+O3l41o8m2J0RBiXTRzI8zdN4Q9XT2RHbglvbWr7ZR3+vGQ3KzIKufO8EXSL0Fn7lyncRQSARxfvJD2vlIe/fgq9u0e0yTEvHpfIyH49+cOHO6mp9bTJMQFWZhTy8Ps7uGh8It+aqrP2hijcRYQN+w/z9NIMrpkymDNS2m5N45AQ487zUsjIL+ONzw60yTHzSyq5fcF6kuO78/srxgftfO1N6Twz/kin55zjkQ92criiir49o+jbM5JJJ/UmpV9Pf5cmrVBV4+HuVzbSt2cUP71wVJsf//wx/RkzIIbHP9rFpRMG1JvQq9bj2FdUzq7cEnLqjJP3eByllTUUV1RzuLwaM7yfuZhI3tp4kOKKal6YM4UeQbg2anM1Z5m9+cDFQJ5zbmyd9tvwrrpUC7ztnLvb1/5TYI6v/Xbn3PvtUbh0vEUbDvDEx+n0iAyjtLIGgIiwEN6+bYYCvgt78uN0duSWMP+GVGKiwtv8+GbGXeeNYM7zaby6NotzR/fjrQ0HWLThAJsPHKGqpvHumm7hofSKDsfjHAWlVdT6Lsw+dOV4RvWPafNaA4k1dcOBmc0ESoEXPg93M/sKcB9wkXOu0sz6OufyzGw0sACYAgwAPgRGOOdqj/c9UlNTXVpaWuv/N9JuyqtqOPvhT+jTM4I3bplBda2HfUXlXD1vJQN7deO1H5wW9FOsdiTnHD95ZSORYSHcf8kYIsIaf+8zC8p447NsiiuqKTlaQ3lVDYmx3RiW0IOYbmHc+fJnXDgukcevntiu9V7+/5azK7eEyhoPNR7H6MQYZqT0YXjfHozo15OBvbpRd+aAHlFh9YY31nochWWVeDzQP/bYG6qCkZmtdc6lNrStOWuoLjGz5C81fx940DlX6dsnz9c+G3jJ155pZul4g35FC2uXTuKp/+wm58hRnvjmREJDjNCQUEb068kDl43l+y+u48mP07nj3BH+LjNovLXxIK+szQJgT2EZT317UoNn3XsLy/j6n1dQUFpJz8gwYrqFExUewr+353G02nvGHN894ovJwNqLmXHfRSfz89c3c+bIBL42MYmR/U/sr73QEKNvT4V6c7W0w2oEcIaZPQAcBX7snFsDDARW1tkvy9d2DDObC8wFGDxYV7s7s/1F5fxlSQaXnjKA1OS4etsuGJfI5RMH8qd/p3P2qL6MT+rlpyqDR2llDb95eytjB8Zw3fRkfvbaJq768wqeu3FKvTPavJKjXPvMamo9Hj6860yG9+3xxTaPx3GguIL0vFKSekcT10ajY45ncnIc790xs92/j3i19O/oMCAOmAb8BFhoJ3jJ2jk3zzmX6pxLTUhou6vz0vZ++842Qs0avdj2y0vHkNAjkrsWbqCi6rg9cNIGHv9wJ3kllfzv7LFclTqIZ2+cTNahCi7+06c8+O52NmUVc+RoNTfMX0NBaSXP3jilXrCDdxRLUu9ozhrZ95htEhhaGu5ZwGvOazXgAfoA2cCgOvsl+dqki1qVUci7m3P4wVnDSIzt1uA+sd3CeejK8aTnlfKNeSvIPlzRwVUGjx05JcxftoerJw9i4uDeAJyRksDC705n9IAYnl6awSVPfMrUBz5iZ24Jf/72JCYM0l9Twail4f468BUAMxsBRAAFwCLgajOLNLMhQAqwui0KFf9YmJZFz6gwvjNz6HH3mzkigXnXTiIzv4xL/vQpy9ILOqjC4FBWWUNmQRk/f30zMVFh3H1+/b+iRg+I4YWbprDmvnN56IrxnJHShz9dM5GZI/RXcbBqzlDIBcBZQB8zywLuB+YD881sM1AFXO+8w262mNlCYCtQA9zS1EgZ6byqajws3prDeaP7fbFAw/F8dUx/3ri1B9/921qufWYVv7x0DNdNT27/QgPUvsJyHnxvG0t2Fnwx9BTg91eMa/QO0t7dI7hq8iCumjyowe0SPJozWuaaRjZ9u5H9HwAeaE1R0jmsyCjkyNEaLhzb/AUQhib04PVbTueHL63nl4u2MKp/DFOGxDX9QvlCaWUNT/w7nfmfZhIWalw+cSBJvaPp2zOS5D7dmXRSb3+XKF2Abu+SRr276SDdI0KZkdLnhF7XPTKMP1w9kYv/uJQ7XlrPOz88g17R7T8aIxDkHjnK7CeWkXPkKFecmsTds0Y2uEiGSFN014k0qKbWwwdbcznn5OZ1yXxZj8gw/nTNqeSXVnLvq5s6fHWersg5xz2vbuRwRRWvfn86j1x1ioJdWkzhLg1anVlEUVkVF47r3+JjjEuK5Sfnj+S9LTn8Y/W+NqwuML20Zj//2ZHPvbNGMekkdWVJ66hbRhr0zuaDdAsP5cwRfVt1nJtnDGXprgJ+/eZWJgzqxZgBsfW2O+fIOlRBfmklhaVVlFXW8JVRfYnt1vZznHRm+4vK+c1bWzltWLwuQkubULjLMWo9jvc25/KVUQmtXgQhJMR49KoJXPKnT5n7wloW3Xo68T0iAW/Xz09e2ci/1te/FWLMgBj+8Z1pQRPwHo/jx//cgJnx0JXjCQnRFLbSeuqWkWOs3XuIgtJKLjiBUTLHk9AzknnXTaKgtJLvv7iO6loPNbUe7ly4gX+tz+a7M4fy7A2TeeOW03nym6eyM7eEG59dTVmd4X+B6sjRau55dSOrMov4xSWjSeod7e+SJEDozF2O8c6mg0SEhfCVUa3rkqlrfFIvHrxiHHe+vIH7F22huLyatzcd5KcXjOK7Zw77Yr9TBvUiNARu+cd6bn4+jWdvnNyiC7qdnXOO97fk8Is3tlBQWsn3zhx2wmuWihyPwl2O8eG2XGam9GnzhRAun5jEtoMlzFuSAcD/XHQyN59x7J2vs8Ym8vDXa7lr4QZu/cd65l07KaC6Kjwexx0vf8aiDQcYnRjD09enasI1aXMKd6nnwOEKsg5VcNPpQ9rl+PfMGkVldS2jEmO4Zkrjs4FePjGJw+XV/OrNrfxlSQbfP2tYo/t2NU99sptFGw5w+9nDue2cFM2DL+1C4S71rNlTBNBud5WGhhi/mj226R2BG05LZu3eQzz8wQ5Sk3szObnrDw9cu/cQjy7eyUXjE7nzvBFa/1PajU4ZpJ5VmUX0jAzj5ET/L2FmZvzua+MYHBfNbf9YT2Fppb9LapXiimpuX7CexNgofve1cQp2aVcKd6lndWYRqcm9Ce0kfdw9o8J54psTKSqv4s6FG/B4Gr7TtbyqhlUZhfz5k93cvmA9b2440OSxq2s9HXbnrHOOn762kZwjR/njNRPbZa1SkbrULSNfKCytJD2vlK+d2uDiWX4zZkAs918ymvv+tZkbn1vDo1ed8sVY+aPVtTzywQ6eXbaHGl/wx3YLZ9GGA+zIKeGu80bUuxhbXFHNf3bk8cHWXD7ZkU+/mEiev2lKi4cg/m3lXlZmFHL/xaPp28hUAc45Hl28k3c25XDPrFGcOlgTf0n7U7jLF9bsOQTA1E44i+M3pwzGOfj1W1u58I9L+dM1pxJicPcrG8koKOPrk5KYNbY/Ewb1omdUOP/z+iae+DidzMIyHrhsLJ+mF/D6+gN8sjOP6lpHnx6RXDC2P+9tyeEbf1nJP74zlZPiu59QTR9uzeUXb2zGOVi5u5BHrjqFs0bWHz7q8Th+/dZWnlu+h2+kDuK7TcyLL9JWrDNM6JSamurS0tL8XUbQ+/WbW3lx1V42/fJ8IsI6Z4/dlgPF3PqP9ewtLMMBA2K78dCV4zl9eP2ZK51zzFuSwYPvbfc9h34xkVwyfgAXjEtk4qBehIQYm7OLufaZVUSEhfDizdOaveTcztwSLn9yGUMTevC7r43jx//cwPacEm6eMYRLJwxgUO9oekaFcferG3ltXTY3zxjCfRedrH52aVNmttY5l9rgNoW7fO7iPy2lZ2Q4C+ZO83cpx1VaWcMDb28lKjyUH3115HHH43+8PY+luwo49+S+TB0a3+C1hB05JXzr6ZVU1XgY3rcHYaEhRISGcOWkJC6beGwX1aGyKmY/uYyK6lrevHUG/WOjOFpdy2/e3srfV/53grTIsBAqazz86LwR3Hr2cAW7tDmFuzSp5Gg1p/zqA249O4W7zhvh73I63O78Uh79YCdHjlZTVeMhr6SSPYVlPPnNU7lw3H+nYSitrGHOc2tYv/8wL8+d9sU6pnWPk55Xyv6icrIOVTBxcC9mT+hc1zAkcBwv3JuzzN584GIgzzk39kvbfgQ8DCQ45wrMe2ryOHAhUA7c4Jxb19r/gLS/tXsP4XGds7+9IwxL6MGT3zr1i+cVVbV86+mV3PHyZ8R3j2Dq0Hj2FJTxnRfSyCgo49GrTjkm2D8/zrCE5nXtiLSn5nSsPgfM+nKjmQ0CvgrUnaj7AryLYqcAc4GnWl+idITVmUWEhRgTB+s2eIBuEaE8c/1kBvXuxs0vpPG3FXu49IlPKSit5IWbpuhsXDq9JsPdObcEKGpg02PA3UDdfp3ZwAvOayXQy8zaZmpBaVdr9hQxLimW6AgNoPpc7+4RPH/TFLqFh/LzN7YwoFc3Ft0645iLtyKdUYt+ks1sNpDtnNvwpYtEA4H9dZ5n+doONnCMuXjP7hk8uPE5RqT9Ha2uZcP+Ym48PdnfpXQ6Sb2jefHmqby7OYebzxiiX37SZZzwJ9XMooGf4e2SaTHn3DxgHngvqLbmWNI6S3cVUFXrabf5ZLq6lH49SenX099liJyQlpyGDAOGAJ+ftScB68xsCpANDKqzb5KvTTopj8fxyAc7OCk+mpkjEvxdjoi0kRO+U8U5t8k519c5l+ycS8bb9XKqcy4HWARcZ17TgGLn3DFdMtJ5vLEhm+05Jfz4qyM19axIAGnyp9nMFgArgJFmlmVmc46z+ztABpAO/BX4QZtUKe2isqaWh9/fydiBMVw0Tte9RQJJk90yzrlrmtieXOexA25pfVnSEf6+ch/Zhyv4/RValFkk0Ojv8CB15Gg1T/x7F2ek9GFGiob2iQQahXuQ+uuSDA6VV3PPrFH+LkVE2oHCPQgdLq/i2WV7uGhcImMHxvq7HBFpBwr3IDR/2R5KK2u47Zzh/i5FRNqJwj3IFFdU8+yyTGaN6c+o/v5fJ1VE2ofCPcg8v3wPJUd11i4S6BTuQaTkaDXPfJrJuSf3Y8wA9bWLBDKFexB5YcVeiiuquV1n7SIBT+EeJI5W1/L00gzOGpnA+CTN2S4S6BTuQSJtzyEOlVdz3fST/F2KiHQAhXuQWL67gNAQY8qQeH+XIiIdQOEeJFZkFHJKUiw9IrXYhEgwULgHgdLKGjZmFTN9mM7aRYKFwj0IrMksotbjOG2YJggTCRYK9yCwfHcBEaEhTDqpt79LEZEOonAPAisyCpk4uBdR4aH+LkVEOkhzVmKab2Z5Zra5Ttv/mdl2M9toZv8ys151tv3UzNLNbIeZnd9ehUvzHC6vYsuBI+qSEQkyzTlzfw6Y9aW2xcBY59x4YCfwUwAzGw1cDYzxveb/mZlOF/1oVWYRzqGLqSJBpslwd84tAYq+1PaBc67G93QlkOR7PBt4yTlX6ZzLxLuW6pQ2rFdO0IrdhUSFhzBhkO5KFQkmbdHnfhPwru/xQGB/nW1Zvjbxk+W7C5icHEdEmC6viASTVv3Em9l9QA3wYgteO9fM0swsLT8/vzVlSCPySyrZmVuqLhmRINTicDezG4CLgW8555yvORsYVGe3JF/bMZxz85xzqc651ISEhJaWIcexMqMQgOlDFe4iwaZF4W5ms4C7gUudc+V1Ni0CrjazSDMbAqQAq1tfprTEioxCekSGMU7rpIoEnSYnGjGzBcBZQB8zywLuxzs6JhJYbGYAK51z33PObTGzhcBWvN01tzjnatureDm+FbsLmTokjrBQ9beLBJsmw905d00Dzc8cZ/8HgAdaU5S03oHDFWQWlPHtaZriVyQY6ZQuQC3f7e1vP00XU0WCksI9QC3fXUBc9whG9uvp71JExA8U7gHIOceK3YVMHxpPSIj5uxwR8QOFewDaU1jOweKjGt8uEsQU7gFoWXoBoP52kWCmcA9AK3YXkhgbxZA+3f1dioj4icI9wHg8jhUZhUwfFo/vHgQRCUIK9wCzI7eEorIqzd8uEuQU7gHm8/HtupgqEtwU7gFmxe4CkuOjGdirm79LERE/UrgHkFqPY1VGkc7aRUThHki2HTxCSWUN0zTFr0jQU7gHkNWZ3tUQJyfH+bkSEfE3hXsAWbOniIG9ujFA/e0iQU/hHiCcc6zOLGLqEJ21i4jCPWBkFJRRWFbFZIW7iKBwDxhrfP3tUxTuIkIzwt3M5ptZnpltrtMWZ2aLzWyX72tvX7uZ2R/NLN3MNprZqe1ZvPzX6swi+vSIYKjmkxERmnfm/hww60tt9wIfOedSgI98zwEuwLsodgowF3iqbcqUpqzeU8Tk5DjNJyMiQDPC3Tm3BCj6UvNs4Hnf4+eBy+q0v+C8VgK9zCyxrYqVhh04XEHWoQoNgRSRL7S0z72fc+6g73EO0M/3eCCwv85+Wb62Y5jZXDNLM7O0/Pz8FpYh4B0CCepvF5H/avUFVeecA1wLXjfPOZfqnEtNSEhobRlBbXVmET0iwzg5McbfpYhIJ9HScM/9vLvF9zXP154NDKqzX5KvTdrR6swiJp3Um1CtlyoiPi0N90XA9b7H1wNv1Gm/zjdqZhpQXKf7RtrBobIqduWVqktGROoJa2oHM1sAnAX0MbMs4H7gQWChmc0B9gJX+XZ/B7gQSAfKgRvboWapY7X620WkAU2Gu3PumkY2ndPAvg64pbVFSfMtSy8gOiKU8Umx/i5FRDoR3aHaxX26q4CpQ+KIDAv1dyki0oko3LuwrEPlZBSUMSNFo41EpD6Fexf26a4CAGamaDFsEalP4d6FLd1VQL+YSIb37eHvUkSkk1G4d1G1Hsey3QWckZKg+WRE5BgK9y5qc3Yxh8urOUNdMiLSAIV7F/Vpure//fThCncROZbCvYtauiuf0Ykx9OkR6e9SRKQTUrh3QWWVNazde4gzRuisXUQapnDvglZnFlFd6zhjuMa3i0jDFO5d0JJd+USGhZCa3NvfpYhIJ6Vw74KWpRcwZUgcUeGackBEGqZw72LySyrZmVuqUTIiclwK9y5m+W7vEMjThsX7uRIR6cwU7l3Mit2FxESFMWaApvgVkcYp3LuY5bsLmTY0XkvqichxKdy7kP1F5ewrKleXjIg0qVXhbmZ3mtkWM9tsZgvMLMrMhpjZKjNLN7OXzSyirYoNdit2FwJwmi6mikgTWhzuZjYQuB1Idc6NBUKBq4HfA48554YDh4A5bVGoeC+m9ukRSYqm+BWRJrS2WyYM6GZmYUA0cBA4G3jFt/154LJWfg8BnHMs313IacPiNcWviDSpxeHunMsGHgb24Q31YmAtcNg5V+PbLQsY2NDrzWyumaWZWVp+fn5Lywgau/NLySupVH+7iDRLa7plegOzgSHAAKA7MKu5r3fOzXPOpTrnUhMSNEdKU5Z/3t8+TP3tItK01nTLnAtkOufynXPVwGvA6UAvXzcNQBKQ3coaBVieXkhS724Mjo/2dyki0gW0Jtz3AdPMLNq8ncDnAFuBj4ErfftcD7zRuhKl1uNYkVGoLhkRabbW9LmvwnvhdB2wyXesecA9wF1mlg7EA8+0QZ1BbUPWYYorqtUlIyLNFtb0Lo1zzt0P3P+l5gxgSmuOK/X9a102kWEhnH1yX3+XIiJdhO5Q7eSOVteyaMMBZo3tT0xUuL/LEZEuQuHeyX20LY/iimqunJTk71JEpAtRuHdyr67LIjE2Sv3tInJCFO6dWN6Ro3yyM5/LJw7ULJAickIU7p3Y659lU+txXKEuGRE5QQr3Tso5xytrszh1cC+GJWiiMBE5MQr3Tmpz9hF25pZy5aRB/i5FRLoghXsntO3gEf7njc1EhAukZZsAAAlxSURBVIVw0fhEf5cjIl1Qq25ikvpqaj28vekgf12aQXhoCL+5bOxx1zp1zlFcUU1oiBERFkJFVS2PLd7J31buJaZbOP935Xhiu2lsu4icOIV7G8g9cpQPtuby9NIM9haWM7xvD3KPVDL7iWXcdnYKP/jKMMJD6/+RlHWonJ/9azNLdtaf7jjE4FtTT+JHXx1Br2gtYiUiLaNwb6H0vBL+vnIfn6YXkJ5XCsApSbH87NpJnHdyP4orqvnlm1t47MOdvLv5IJdOGMDUIfGMGRDDy2v28/v3tmPA7eekEBMVRmWNh5pax7mj+x73bF9EpDnMOefvGkhNTXVpaWl+raGqxsOiDQd4bnkm1TWO6cPimT4snmlD4omNrt818uraLO57fRMAU4fEc/rweE4f3ofRiTHHrJL03uYcHvlgB7t8vwBCQ4xaj2PmiAR+e/lYknprCl8RaRkzW+ucS21wW7CHe3Wth/mfZjJ/WSa5RyoZ1b8nCT0jSdtziIrqWkJDjDNS+nDZhIGcOSKBh97fzoLV+5k2NI4/XjORvj2jmvV9CkorSdtTxLp9hxkzIIZLTxmg5fJEpFUU7o1wzvGjf27gtXXZzBjeh+/MHMrMlD6YGVU1HjZkHeajbXm8ueEA2YcrvnjdLV8Zxp3njiAsVIONRMR/jhfuQd3n/uB723ltXTZ3nTeC289JqbctIiyEyclxTE6O4+7zR5K29xAfbc/ltGF9OHOElgUUkc4taMP96aUZ/OWTDK6ddhK3nT38uPuGhBhThsQxZUhcB1UnItI6Qdmv8Pr6bH7z9jYuHNefX146Rn3fIhJwWhXuZtbLzF4xs+1mts3MpptZnJktNrNdvq+926rYtvDxjjx+/M8NTB8az2PfmKDZFkUkILX2zP1x4D3n3CjgFGAbcC/wkXMuBfjI97xTWLfvED/4+zpG9u/JvOsmERkW6u+SRETaRYvD3cxigZn4FsB2zlU55w4Ds4Hnfbs9D1zW2iLbwq7cEm56bg39YiJ57sYp9NSSdSISwFpz5j4EyAeeNbP1Zva0mXUH+jnnDvr2yQH6NfRiM5trZmlmlpafn9/QLm2muLya6+avJjw0hL/NmUpCz8h2/X4iIv7WmnAPA04FnnLOTQTK+FIXjPMOom9wIL1zbp5zLtU5l5qQ0L5DCx98bzt5JZXMv34yg+J0R6iIBL7WhHsWkOWcW+V7/gresM81s0QA39e81pXYOmv3FrFg9T5uPC2ZcUmas0VEgkOLw905lwPsN7ORvqZzgK3AIuB6X9v1wButqrAVqms93PevzSTGRnHneSP8VYaISIdr7U1MtwEvmlkEkAHciPcXxkIzmwPsBa5q5fdoseeW7WF7Tgl/uXYS3SOD9n4tEQlCrUo859xnQEPzGpzTmuO2hezDFTy6eCfnntyXr45u8JquiEjACtg7VB9+fwcOpztQRSQoBWS478ot4fXPsrnhtCGaL11EglJAhvtjH+6ke0QY35051N+liIj4RcCF++bsYt7ZlMNNM4bQu7vWIBWR4BRw4f7Y4p3Edgtnzowh/i5FRMRvAirc1+07xEfb85g7cyix3TR3jIgEr4AJ91qP4/fvbie+ewQ3nJbs73JERPwqIMLdOcev39zCqswifnL+SN2wJCJBLyDC/a9LM3h+xV5unjGEq6cM9nc5IiJ+1+XDfdGGA/z2ne1cND6Rn114sr/LERHpFLp0uK/MKOTHCzcwJTmOR75+CiFaMk9EBOji4d47OoKpQ+OYd90kosK1ZJ6IyOe69JXHkf178rc5U/1dhohIp9Olz9xFRKRhCncRkQCkcBcRCUAKdxGRANTqcDezUDNbb2Zv+Z4PMbNVZpZuZi/7luATEZEO1BZn7j8EttV5/nvgMefccOAQMKcNvoeIiJyAVoW7mSUBFwFP+54bcDbwim+X54HLWvM9RETkxLX2zP0PwN2Ax/c8HjjsnKvxPc8CBjb0QjOba2ZpZpaWn5/fyjJERKSuFt/EZGYXA3nOubVmdtaJvt45Nw+Y5ztWvpntbWEpfYCCFr42UOk9qU/vx7H0ntTXVd+Pkxrb0Jo7VE8HLjWzC4EoIAZ4HOhlZmG+s/ckILupAznnElpahJmlOedSW/r6QKT3pD69H8fSe1JfIL4fLe6Wcc791DmX5JxLBq4G/u2c+xbwMXClb7frgTdaXaWIiJyQ9hjnfg9wl5ml4+2Df6YdvoeIiBxHm0wc5pz7D/Af3+MMYEpbHLeZ5nXg9+oq9J7Up/fjWHpP6gu498Occ/6uQURE2pimHxARCUAKdxGRANSlw93MZpnZDt88Nvf6u56OZmaDzOxjM9tqZlvM7Ie+9jgzW2xmu3xfe/u71o6k+Y7qM7NeZvaKmW03s21mNj2YPyNmdqfv52WzmS0ws6hA/Ix02XA3s1DgSeACYDRwjZmN9m9VHa4G+JFzbjQwDbjF9x7cC3zknEsBPvI9Dyaa76i+x4H3nHOjgFPwvjdB+Rkxs4HA7UCqc24sEIp3KHfAfUa6bLjjHZGT7pzLcM5VAS8Bs/1cU4dyzh10zq3zPS7B+0M7EO/78Lxvt6Ca30fzHdVnZrHATHxDkp1zVc65wwTxZwTvKMFuZhYGRAMHCcDPSFcO94HA/jrPG53HJhiYWTIwEVgF9HPOHfRtygH6+aksf2jxfEcBagiQDzzr66p62sy6E6SfEedcNvAwsA9vqBcDawnAz0hXDnfxMbMewKvAHc65I3W3Oe9Y16AY71p3viN/19KJhAGnAk855yYCZXypCybIPiO98f7VMgQYAHQHZvm1qHbSlcM9GxhU53mz5rEJNGYWjjfYX3TOveZrzjWzRN/2RCDPX/V1sM/nO9qDt5vubOrMd+TbJ9g+J1lAlnNule/5K3jDPlg/I+cCmc65fOdcNfAa3s9NwH1GunK4rwFSfFe5I/BeFFnk55o6lK8/+Rlgm3Pu0TqbFuGd1weCaH4fzXd0LOdcDrDfzEb6ms4BthKknxG83THTzCza9/Pz+fsRcJ+RLn2Hqm9Gyj/gveI93zn3gJ9L6lBmNgNYCmziv33MP8Pb774QGAzsBa5yzhX5pUg/8U1D/WPn3MVmNhTvmXwcsB74tnOu0p/1dSQzm4D3AnMEkAHciPfELig/I2b2K+AbeEebrQduxtvHHlCfkS4d7iIi0rCu3C0jIiKNULiLiAQghbuISABSuIuIBCCFu4hIAFK4i4gEIIW7iEgA+v+ugbCrAuGgIgAAAABJRU5ErkJggg==\n",
244 | "text/plain": [
245 | ""
246 | ]
247 | },
248 | "metadata": {
249 | "needs_background": "light"
250 | },
251 | "output_type": "display_data"
252 | }
253 | ],
254 | "source": [
255 | "plt.plot(moving_average(np.array(score)))"
256 | ]
257 | }
258 | ],
259 | "metadata": {
260 | "kernelspec": {
261 | "display_name": "Python 3",
262 | "language": "python",
263 | "name": "python3"
264 | },
265 | "language_info": {
266 | "codemirror_mode": {
267 | "name": "ipython",
268 | "version": 3
269 | },
270 | "file_extension": ".py",
271 | "mimetype": "text/x-python",
272 | "name": "python",
273 | "nbconvert_exporter": "python",
274 | "pygments_lexer": "ipython3",
275 | "version": "3.7.4"
276 | }
277 | },
278 | "nbformat": 4,
279 | "nbformat_minor": 4
280 | }
281 |
--------------------------------------------------------------------------------
/Errata/Chapter 5.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning in Action\n",
8 | "### by Alex Zai and Brandon Brown\n",
9 | "\n",
10 | "#### Chapter 5"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "##### Listing 5.1"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 1,
23 | "metadata": {},
24 | "outputs": [
25 | {
26 | "name": "stdout",
27 | "output_type": "stream",
28 | "text": [
29 | "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n",
30 | " 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47\n",
31 | " 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63]\n"
32 | ]
33 | }
34 | ],
35 | "source": [
36 | "import multiprocessing as mp\n",
37 | "from multiprocess import queues\n",
38 | "\n",
39 | "import numpy as np\n",
40 | "def square(x):\n",
41 | " return np.square(x)\n",
42 | "x = np.arange(64)\n",
43 | "print(x)"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 2,
49 | "metadata": {},
50 | "outputs": [
51 | {
52 | "data": {
53 | "text/plain": [
54 | "4"
55 | ]
56 | },
57 | "execution_count": 2,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "mp.cpu_count()"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 3,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "if __name__ == '__main__': # added this line for process safety\n",
73 | " pool = mp.Pool(8)\n",
74 | " squared = pool.map(square, [x[8*i:8*i+8] for i in range(8)])\n",
75 | " squared"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "##### Listing 5.2"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 4,
88 | "metadata": {},
89 | "outputs": [
90 | {
91 | "name": "stdout",
92 | "output_type": "stream",
93 | "text": [
94 | "In process 0\n",
95 | "In process 1\n",
96 | "In process 2\n",
97 | "In process 3\n",
98 | "In process 4\n",
99 | "In process 5\n",
100 | "In process 6\n",
101 | "In process 7\n"
102 | ]
103 | }
104 | ],
105 | "source": [
106 | "def square(i, x, queue):\n",
107 | " print(\"In process {}\".format(i,))\n",
108 | "\n",
109 | "queue = mp.Queue()\n",
110 | "queue.put(np.square(x))\n",
111 | "processes = []\n",
112 | "if __name__ == '__main__': #adding this for process safety\n",
113 | " x = np.arange(64)\n",
114 | " for i in range(8):\n",
115 | " start_index = 8*i\n",
116 | " proc = mp.Process(target=square,args=(i,x[start_index:start_index+8],\n",
117 | " queue)) \n",
118 | " proc.start()\n",
119 | " processes.append(proc)\n",
120 | "\n",
121 | " for proc in processes:\n",
122 | " proc.join()\n",
123 | "\n",
124 | " for proc in processes:\n",
125 | " proc.terminate()\n",
126 | "\n",
127 | " results = []\n",
128 | " while not queue.empty():\n",
129 | " results.append(queue.get())"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 5,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/plain": [
140 | "[array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100,\n",
141 | " 121, 144, 169, 196, 225, 256, 289, 324, 361, 400, 441,\n",
142 | " 484, 529, 576, 625, 676, 729, 784, 841, 900, 961, 1024,\n",
143 | " 1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849,\n",
144 | " 1936, 2025, 2116, 2209, 2304, 2401, 2500, 2601, 2704, 2809, 2916,\n",
145 | " 3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969])]"
146 | ]
147 | },
148 | "execution_count": 5,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "results"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "##### Listing 5.3: Pseudocode (not shown)"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {},
167 | "source": [
168 | "##### Listing 5.4"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 6,
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "import torch\n",
178 | "from torch import nn\n",
179 | "from torch import optim\n",
180 | "import numpy as np\n",
181 | "from torch.nn import functional as F\n",
182 | "import gym\n",
183 | "import torch.multiprocessing as mp\n",
184 | "\n",
185 | "class ActorCritic(nn.Module):\n",
186 | " def __init__(self):\n",
187 | " super(ActorCritic, self).__init__()\n",
188 | " self.l1 = nn.Linear(4,25)\n",
189 | " self.l2 = nn.Linear(25,50)\n",
190 | " self.actor_lin1 = nn.Linear(50,2)\n",
191 | " self.l3 = nn.Linear(50,25)\n",
192 | " self.critic_lin1 = nn.Linear(25,1)\n",
193 | " def forward(self,x):\n",
194 | " x = F.normalize(x,dim=0)\n",
195 | " y = F.relu(self.l1(x))\n",
196 | " y = F.relu(self.l2(y))\n",
197 | " actor = F.log_softmax(self.actor_lin1(y),dim=0)\n",
198 | " c = F.relu(self.l3(y.detach()))\n",
199 | " critic = torch.tanh(self.critic_lin1(c))\n",
200 | " return actor, critic"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "##### Listing 5.5 \n",
208 | "##### NOTE 1: This will not run on its own, you need to run listing 5.6 - 5.8 first then come back and run this cell.\n",
209 | "##### NOTE 2: This will not record losses for plotting. If you want to record losses, you'll need to create a multiprocessing shared array and modify the `worker` function to write each loss to it. See < https://docs.python.org/3/library/multiprocessing.html > Alternatively, you could use process locks to safely write to a file."
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 11,
215 | "metadata": {},
216 | "outputs": [
217 | {
218 | "name": "stdout",
219 | "output_type": "stream",
220 | "text": [
221 | "6998 0\n"
222 | ]
223 | }
224 | ],
225 | "source": [
226 | "MasterNode = ActorCritic()\n",
227 | "MasterNode.share_memory()\n",
228 | "processes = []\n",
229 | "params = {\n",
230 | " 'epochs':1000,\n",
231 | " 'n_workers':7,\n",
232 | "}\n",
233 | "counter = mp.Value('i',0)\n",
234 | "if __name__ == '__main__': #adding this for process safety\n",
235 | " for i in range(params['n_workers']):\n",
236 | " p = mp.Process(target=worker, args=(i,MasterNode,counter,params))\n",
237 | " p.start() \n",
238 | " processes.append(p)\n",
239 | " for p in processes:\n",
240 | " p.join()\n",
241 | " for p in processes:\n",
242 | " p.terminate()\n",
243 | " \n",
244 | "print(counter.value,processes[1].exitcode)"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": [
251 | "##### Listing 5.6"
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 7,
257 | "metadata": {},
258 | "outputs": [],
259 | "source": [
260 | "def worker(t, worker_model, counter, params):\n",
261 | " worker_env = gym.make(\"CartPole-v1\")\n",
262 | " worker_env.reset()\n",
263 | " worker_opt = optim.Adam(lr=1e-4,params=worker_model.parameters())\n",
264 | " worker_opt.zero_grad()\n",
265 | " for i in range(params['epochs']):\n",
266 | " worker_opt.zero_grad()\n",
267 | " values, logprobs, rewards = run_episode(worker_env,worker_model)\n",
268 | " actor_loss,critic_loss,eplen = update_params(worker_opt,values,logprobs,rewards)\n",
269 | " counter.value = counter.value + 1"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "##### Listing 5.7"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 8,
282 | "metadata": {},
283 | "outputs": [],
284 | "source": [
285 | "def run_episode(worker_env, worker_model):\n",
286 | " state = torch.from_numpy(worker_env.env.state).float()\n",
287 | " values, logprobs, rewards = [],[],[]\n",
288 | " done = False\n",
289 | " j=0\n",
290 | " while (done == False):\n",
291 | " j+=1\n",
292 | " policy, value = worker_model(state)\n",
293 | " values.append(value)\n",
294 | " logits = policy.view(-1)\n",
295 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
296 | " action = action_dist.sample()\n",
297 | " logprob_ = policy.view(-1)[action]\n",
298 | " logprobs.append(logprob_)\n",
299 | " state_, _, done, info = worker_env.step(action.detach().numpy())\n",
300 | " state = torch.from_numpy(state_).float()\n",
301 | " if done:\n",
302 | " reward = -10\n",
303 | " worker_env.reset()\n",
304 | " else:\n",
305 | " reward = 1.0\n",
306 | " rewards.append(reward)\n",
307 | " return values, logprobs, rewards"
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "metadata": {},
313 | "source": [
314 | "##### Listing 5.8"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 9,
320 | "metadata": {},
321 | "outputs": [],
322 | "source": [
323 | "def update_params(worker_opt,values,logprobs,rewards,clc=0.1,gamma=0.95):\n",
324 | " rewards = torch.Tensor(rewards).flip(dims=(0,)).view(-1)\n",
325 | " logprobs = torch.stack(logprobs).flip(dims=(0,)).view(-1)\n",
326 | " values = torch.stack(values).flip(dims=(0,)).view(-1)\n",
327 | " Returns = []\n",
328 | " ret_ = torch.Tensor([0])\n",
329 | " for r in range(rewards.shape[0]):\n",
330 | " ret_ = rewards[r] + gamma * ret_\n",
331 | " Returns.append(ret_)\n",
332 | " Returns = torch.stack(Returns).view(-1)\n",
333 | " Returns = F.normalize(Returns,dim=0)\n",
334 | " actor_loss = -1*logprobs * (Returns - values.detach())\n",
335 | " critic_loss = torch.pow(values - Returns,2)\n",
336 | " loss = actor_loss.sum() + clc*critic_loss.sum()\n",
337 | " loss.backward()\n",
338 | " worker_opt.step()\n",
339 | " return actor_loss, critic_loss, len(rewards)"
340 | ]
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {},
345 | "source": [
346 | "##### Supplement\n",
347 | "##### Test the trained model"
348 | ]
349 | },
350 | {
351 | "cell_type": "code",
352 | "execution_count": 15,
353 | "metadata": {},
354 | "outputs": [],
355 | "source": [
356 | "env = gym.make(\"CartPole-v1\")\n",
357 | "env.reset()\n",
358 | "\n",
359 | "for i in range(100):\n",
360 | " state_ = np.array(env.env.state)\n",
361 | " state = torch.from_numpy(state_).float()\n",
362 | " logits,value = MasterNode(state)\n",
363 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
364 | " action = action_dist.sample()\n",
365 | " state2, reward, done, info = env.step(action.detach().numpy())\n",
366 | " if done:\n",
367 | " print(\"Lost\")\n",
368 | " env.reset()\n",
369 | " state_ = np.array(env.env.state)\n",
370 | " state = torch.from_numpy(state_).float()\n",
371 | " env.render()"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 13,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "env.close()"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {},
386 | "source": [
387 | "### N-step actor-critic"
388 | ]
389 | },
390 | {
391 | "cell_type": "markdown",
392 | "metadata": {},
393 | "source": [
394 | "##### Listing 5.9"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": null,
400 | "metadata": {},
401 | "outputs": [],
402 | "source": [
403 | "def run_episode(worker_env, worker_model, N_steps=10):\n",
404 | " raw_state = np.array(worker_env.env.state)\n",
405 | " state = torch.from_numpy(raw_state).float()\n",
406 | " values, logprobs, rewards = [],[],[]\n",
407 | " done = False\n",
408 | " j=0\n",
409 | " G=torch.Tensor([0])\n",
410 | " while (j < N_steps and done == False):\n",
411 | " j+=1\n",
412 | " policy, value = worker_model(state)\n",
413 | " values.append(value)\n",
414 | " logits = policy.view(-1)\n",
415 | " action_dist = torch.distributions.Categorical(logits=logits)\n",
416 | " action = action_dist.sample()\n",
417 | " logprob_ = policy.view(-1)[action]\n",
418 | " logprobs.append(logprob_)\n",
419 | " state_, _, done, info = worker_env.step(action.detach().numpy())\n",
420 | " state = torch.from_numpy(state_).float()\n",
421 | " if done:\n",
422 | " reward = -10\n",
423 | " worker_env.reset()\n",
424 | " else:\n",
425 | " reward = 1.0\n",
426 | " G = value.detach()\n",
427 | " rewards.append(reward)\n",
428 | " return values, logprobs, rewards, G"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": null,
434 | "metadata": {},
435 | "outputs": [],
436 | "source": []
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": null,
441 | "metadata": {},
442 | "outputs": [],
443 | "source": []
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": null,
448 | "metadata": {},
449 | "outputs": [],
450 | "source": []
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": null,
455 | "metadata": {},
456 | "outputs": [],
457 | "source": []
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": null,
462 | "metadata": {},
463 | "outputs": [],
464 | "source": []
465 | }
466 | ],
467 | "metadata": {
468 | "kernelspec": {
469 | "display_name": "Python 3",
470 | "language": "python",
471 | "name": "python3"
472 | },
473 | "language_info": {
474 | "codemirror_mode": {
475 | "name": "ipython",
476 | "version": 3
477 | },
478 | "file_extension": ".py",
479 | "mimetype": "text/x-python",
480 | "name": "python",
481 | "nbconvert_exporter": "python",
482 | "pygments_lexer": "ipython3",
483 | "version": "3.7.4"
484 | }
485 | },
486 | "nbformat": 4,
487 | "nbformat_minor": 4
488 | }
489 |
--------------------------------------------------------------------------------
/Errata/GridBoard.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | import sys
4 |
5 | def randPair(s,e):
6 | return np.random.randint(s,e), np.random.randint(s,e)
7 |
8 | class BoardPiece:
9 |
10 | def __init__(self, name, code, pos):
11 | self.name = name #name of the piece
12 | self.code = code #an ASCII character to display on the board
13 | self.pos = pos #2-tuple e.g. (1,4)
14 |
15 | class BoardMask:
16 |
17 | def __init__(self, name, mask, code):
18 | self.name = name
19 | self.mask = mask
20 | self.code = code
21 |
22 | def get_positions(self): #returns tuple of arrays
23 | return np.nonzero(self.mask)
24 |
25 | def zip_positions2d(positions): #positions is tuple of two arrays
26 | x,y = positions
27 | return list(zip(x,y))
28 |
29 | class GridBoard:
30 |
31 | def __init__(self, size=4):
32 | self.size = size #Board dimensions, e.g. 4 x 4
33 | self.components = {} #name : board piece
34 | self.masks = {}
35 |
36 | def addPiece(self, name, code, pos=(0,0)):
37 | newPiece = BoardPiece(name, code, pos)
38 | self.components[name] = newPiece
39 |
40 | #basically a set of boundary elements
41 | def addMask(self, name, mask, code):
42 | #mask is a 2D-numpy array with 1s where the boundary elements are
43 | newMask = BoardMask(name, mask, code)
44 | self.masks[name] = newMask
45 |
46 | def movePiece(self, name, pos):
47 | move = True
48 | for _, mask in self.masks.items():
49 | if pos in zip_positions2d(mask.get_positions()):
50 | move = False
51 | if move:
52 | self.components[name].pos = pos
53 |
54 | def delPiece(self, name):
55 | del self.components['name']
56 |
57 | def render(self):
58 | dtype = '= 4:
7 | self.board = GridBoard(size=size)
8 | else:
9 | print("Minimum board size is 4. Initialized to size 4.")
10 | self.board = GridBoard(size=4)
11 |
12 | #Add pieces, positions will be updated later
13 | self.board.addPiece('Player','P',(0,0))
14 | self.board.addPiece('Goal','+',(1,0))
15 | self.board.addPiece('Pit','-',(2,0))
16 | self.board.addPiece('Wall','W',(3,0))
17 | #
18 | if mode == 'static':
19 | self.initGridStatic()
20 | elif mode == 'player':
21 | self.initGridPlayer()
22 | else:
23 | self.initGridRand()
24 |
25 | #Initialize stationary grid, all items are placed deterministically
26 | def initGridStatic(self):
27 | #Setup static pieces
28 | self.board.components['Player'].pos = (0,3) #Row, Column
29 | self.board.components['Goal'].pos = (0,0)
30 | self.board.components['Pit'].pos = (0,1)
31 | self.board.components['Wall'].pos = (1,1)
32 |
33 | #Check if board is initialized appropriately (no overlapping pieces)
34 | #also remove impossible-to-win boards
35 | def validateBoard(self):
36 | valid = True
37 |
38 | player = self.board.components['Player']
39 | goal = self.board.components['Goal']
40 | wall = self.board.components['Wall']
41 | pit = self.board.components['Pit']
42 |
43 | all_positions = [piece for name,piece in self.board.components.items()]
44 | all_positions = [player.pos, goal.pos, wall.pos, pit.pos]
45 | if len(all_positions) > len(set(all_positions)):
46 | return False
47 |
48 | corners = [(0,0),(0,self.board.size), (self.board.size,0), (self.board.size,self.board.size)]
49 | #if player is in corner, can it move? if goal is in corner, is it blocked?
50 | if player.pos in corners or goal.pos in corners:
51 | val_move_pl = [self.validateMove('Player', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
52 | val_move_go = [self.validateMove('Goal', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
53 | if 0 not in val_move_pl or 0 not in val_move_go:
54 | #print(self.display())
55 | #print("Invalid board. Re-initializing...")
56 | valid = False
57 |
58 | return valid
59 |
60 | #Initialize player in random location, but keep wall, goal and pit stationary
61 | def initGridPlayer(self):
62 | #height x width x depth (number of pieces)
63 | self.initGridStatic()
64 | #place player
65 | self.board.components['Player'].pos = randPair(0,self.board.size)
66 |
67 | if (not self.validateBoard()):
68 | #print('Invalid grid. Rebuilding..')
69 | self.initGridPlayer()
70 |
71 | #Initialize grid so that goal, pit, wall, player are all randomly placed
72 | def initGridRand(self):
73 | #height x width x depth (number of pieces)
74 | self.board.components['Player'].pos = randPair(0,self.board.size)
75 | self.board.components['Goal'].pos = randPair(0,self.board.size)
76 | self.board.components['Pit'].pos = randPair(0,self.board.size)
77 | self.board.components['Wall'].pos = randPair(0,self.board.size)
78 |
79 | if (not self.validateBoard()):
80 | #print('Invalid grid. Rebuilding..')
81 | self.initGridRand()
82 |
83 | def validateMove(self, piece, addpos=(0,0)):
84 | outcome = 0 #0 is valid, 1 invalid, 2 lost game
85 | pit = self.board.components['Pit'].pos
86 | wall = self.board.components['Wall'].pos
87 | new_pos = addTuple(self.board.components[piece].pos, addpos)
88 | if new_pos == wall:
89 | outcome = 1 #block move, player can't move to wall
90 | elif max(new_pos) > (self.board.size-1): #if outside bounds of board
91 | outcome = 1
92 | elif min(new_pos) < 0: #if outside bounds
93 | outcome = 1
94 | elif new_pos == pit:
95 | outcome = 2
96 |
97 | return outcome
98 |
99 | def makeMove(self, action):
100 | #need to determine what object (if any) is in the new grid spot the player is moving to
101 | #actions in {u,d,l,r}
102 | def checkMove(addpos):
103 | if self.validateMove('Player', addpos) in [0,2]:
104 | new_pos = addTuple(self.board.components['Player'].pos, addpos)
105 | self.board.movePiece('Player', new_pos)
106 |
107 | if action == 'u': #up
108 | checkMove((-1,0))
109 | elif action == 'd': #down
110 | checkMove((1,0))
111 | elif action == 'l': #left
112 | checkMove((0,-1))
113 | elif action == 'r': #right
114 | checkMove((0,1))
115 | else:
116 | pass
117 |
118 | def reward(self):
119 | if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
120 | return -10
121 | elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
122 | return 10
123 | else:
124 | return -1
125 |
126 | def display(self):
127 | return self.board.render()
128 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 DeepReinforcementLearning
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Reinforcement Learning In Action
2 |
3 | Code Snippets from the [Deep Reinforcement Learning in Action](https://www.manning.com/books/deep-reinforcement-learning-in-action) book from Manning, Inc
4 |
5 | ## How this is Organized
6 |
7 | The code snippets, listings, and projects are all embedded in Jupyter Notebooks
8 | organized by chapter. Visit [http://jupyter.org/install](http://jupyter.org/install) for
9 | instructions on installing Jupyter Notebooks.
10 |
11 | We keep the original Jupyter Notebooks in their respective chapter folders. As we discover errata, we update notebooks in the Errata folder, so those notebooks are the most up-to-date in terms of errors corrected, but we keep the original Jupyter Notebooks to match the book code snippets.
12 |
13 | ## Requirements
14 |
15 | In order to run many of the projects, you'll need at least the [NumPy](http://www.numpy.org/) library
16 | and [PyTorch](http://pytorch.org/).
17 |
18 | ```
19 | pip install -r requirements.txt
20 | ```
21 |
22 | ## Special Instructions
23 | In the notebook 9, there's an issue (appearing in the 15th cell) you can solve by following the instructions of @scottmayberry in Farama-Foundation/MAgent2#14. That means to copy all the files and folders from https://github.com/Farama-Foundation/MAgent2/tree/main/magent2 to the local folder /lib/python3.X/site-packages/magent2 (or similar path if your OS is other than Linux) - Thanks to [donlaiq](https://github.com/donlaiq) for this
24 |
25 | ## Contribute
26 |
27 | If you experience any issues running the examples, please file an issue.
28 | If you see typos or other errors in the book, please edit the [Errata.md](https://github.com/DeepReinforcementLearning/DeepReinforcementLearningInAction/blob/master/Errata.md) file and create a pull request.
29 |
--------------------------------------------------------------------------------
/old_but_more_detailed/Ch2_N Armed Bandits.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning in Action \n",
8 | "## N-Armed Bandits\n",
9 | "### Chapter 2"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 4,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import torch as th\n",
20 | "from torch.autograd import Variable\n",
21 | "from matplotlib import pyplot as plt\n",
22 | "import random\n",
23 | "%matplotlib inline"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "This defines the main contextual bandit class we'll be using as our environment/simulator to train a neural network."
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 5,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "class ContextBandit:\n",
40 | " def __init__(self, arms=10):\n",
41 | " self.arms = arms\n",
42 | " self.init_distribution(arms)\n",
43 | " self.update_state()\n",
44 | " \n",
45 | " def init_distribution(self, arms):\n",
46 | " # Num states = Num Arms to keep things simple\n",
47 | " self.bandit_matrix = np.random.rand(arms,arms)\n",
48 | " #each row represents a state, each column an arm\n",
49 | " \n",
50 | " def reward(self, prob):\n",
51 | " reward = 0\n",
52 | " for i in range(self.arms):\n",
53 | " if random.random() < prob:\n",
54 | " reward += 1\n",
55 | " return reward\n",
56 | " \n",
57 | " def get_state(self):\n",
58 | " return self.state\n",
59 | " \n",
60 | " def update_state(self):\n",
61 | " self.state = np.random.randint(0,self.arms)\n",
62 | " \n",
63 | " def get_reward(self,arm):\n",
64 | " return self.reward(self.bandit_matrix[self.get_state()][arm])\n",
65 | " \n",
66 | " def choose_arm(self, arm):\n",
67 | " reward = self.get_reward(arm)\n",
68 | " self.update_state()\n",
69 | " return reward\n",
70 | " "
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {},
76 | "source": [
77 | "Here we define our simple neural network model using PyTorch"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 6,
83 | "metadata": {},
84 | "outputs": [],
85 | "source": [
86 | "def softmax(av, tau=1.12):\n",
87 | " n = len(av)\n",
88 | " probs = np.zeros(n)\n",
89 | " for i in range(n):\n",
90 | " softm = ( np.exp(av[i] / tau) / np.sum( np.exp(av[:] / tau) ) )\n",
91 | " probs[i] = softm\n",
92 | " return probs\n",
93 | "\n",
94 | "def one_hot(N, pos, val=1):\n",
95 | " one_hot_vec = np.zeros(N)\n",
96 | " one_hot_vec[pos] = val\n",
97 | " return one_hot_vec\n",
98 | "\n",
99 | "arms = 10\n",
100 | "# N is batch size; D_in is input dimension;\n",
101 | "# H is hidden dimension; D_out is output dimension.\n",
102 | "N, D_in, H, D_out = 1, arms, 100, arms\n",
103 | "\n",
104 | "model = th.nn.Sequential(\n",
105 | " th.nn.Linear(D_in, H),\n",
106 | " th.nn.ReLU(),\n",
107 | " th.nn.Linear(H, D_out),\n",
108 | " th.nn.ReLU(),\n",
109 | ")\n",
110 | "\n",
111 | "loss_fn = th.nn.MSELoss(size_average=False)\n",
112 | "\n",
113 | "env = ContextBandit(arms)"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "Next we define the training function, which accepts an instantiated ContextBandit object."
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 7,
126 | "metadata": {
127 | "collapsed": true
128 | },
129 | "outputs": [],
130 | "source": [
131 | "def train(env):\n",
132 | " epochs = 5000\n",
133 | " #one-hot encode current state\n",
134 | " cur_state = Variable(th.Tensor(one_hot(arms,env.get_state())))\n",
135 | " reward_hist = np.zeros(50)\n",
136 | " reward_hist[:] = 5\n",
137 | " runningMean = np.average(reward_hist)\n",
138 | " learning_rate = 1e-2\n",
139 | " optimizer = th.optim.Adam(model.parameters(), lr=learning_rate)\n",
140 | " plt.xlabel(\"Plays\")\n",
141 | " plt.ylabel(\"Mean Reward\")\n",
142 | " for i in range(epochs):\n",
143 | " y_pred = model(cur_state) #produce reward predictions\n",
144 | " av_softmax = softmax(y_pred.data.numpy(), tau=2.0) #turn reward distribution into probability distribution\n",
145 | " av_softmax /= av_softmax.sum() #make sure total prob adds to 1\n",
146 | " choice = np.random.choice(arms, p=av_softmax) #sample an action\n",
147 | " cur_reward = env.choose_arm(choice)\n",
148 | " one_hot_reward = y_pred.data.numpy().copy()\n",
149 | " one_hot_reward[choice] = cur_reward\n",
150 | " reward = Variable(th.Tensor(one_hot_reward))\n",
151 | " loss = loss_fn(y_pred, reward)\n",
152 | " if i % 50 == 0:\n",
153 | " runningMean = np.average(reward_hist)\n",
154 | " reward_hist[:] = 0\n",
155 | " plt.scatter(i, runningMean)\n",
156 | " reward_hist[i % 50] = cur_reward\n",
157 | " optimizer.zero_grad()\n",
158 | "\n",
159 | " # Backward pass: compute gradient of the loss with respect to model\n",
160 | " # parameters\n",
161 | " loss.backward()\n",
162 | "\n",
163 | " # Calling the step function on an Optimizer makes an update to its\n",
164 | " # parameters\n",
165 | " optimizer.step()\n",
166 | " cur_state = Variable(th.Tensor(one_hot(arms,env.get_state())))"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 8,
172 | "metadata": {},
173 | "outputs": [
174 | {
175 | "data": {
176 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt0lPWd+PH3JxeSgJiUWxNAhFCLWk2FTa2X0qrZQi1N\ncW3XQ9XVXvawdmtF+sMeqaeW2rV2V1vEtb96WHvzV1vXKopZrNhG2lJUWgQaoIBIRCUkcluCxCTk\n8vn98TwzzExmJs8k88z18zonJzPfeTLzfUKYzzzf7+f7+YqqYowxxgAUpLsDxhhjMocFBWOMMUEW\nFIwxxgRZUDDGGBNkQcEYY0yQBQVjjDFBFhSMMcYEWVAwxhgTZEHBGGNMUFG6O5CocePG6dSpU9Pd\nDWOMySqvvPLKYVUdP9hxWRcUpk6dyqZNm9LdDWOMySoi8oaX42z4yBhjTJAFBWOMMUEWFIwxxgRZ\nUDDGGBNkQcEYY0yQBQVjjDFBFhSMMcYEWVAwxhgTZEHBmBzX3tDAnivq2HnOuey5oo72hoZ0d8lk\nsKxb0WyM8a69oYHWb96JdnUB0HvgAK3fvBOA8vr6dHbNZCi7UjAmhx1cfn8wIARoVxcHl9+fph6Z\nTGdBwZgc1tvamlC7MRYUjMlhRVVVCbUbY0HBmBw2YfGtSGlpWJuUljJh8a1p6pHJdDbRbEwOC0wm\nH1x+P72trRRVVTFh8a02yWxisqBgTI4rr6+3IJAET29p4d61uzlwrJOJFWXcNncGV82clO5uJZ2v\nw0cislhEdojIdhH5lYiURjx+mYi0i8hW9+tOP/tjjDFD8fSWFpau2kbLsU4UaDnWydJV23h6S0u6\nu5Z0vgUFEZkE3ALUqup5QCGwIMqh61X1AvfrLr/6Y4wxQ3Xv2t109vSFtXX29HHv2t1p6pF//J5o\nLgLKRKQIGAkc8Pn1jDEm6Q4c60yoPZv5FhRUtQW4D3gTaAXaVfX5KIdeIiJNIvIbEflAtOcSkYUi\nsklENh06dMivLhtjTFQTK8oSas9mfg4fvQeYD0wDJgKjROT6iMM2A1NUtQb4T+DpaM+lqitVtVZV\na8ePH+9Xl02SPL2lhUu/9wLTbl/Dpd97ISfHXU1+uW3uDMqKC8PayooLuW3ujDT1yD9+Dh/9PfC6\nqh5S1R5gFXBJ6AGqelxVT7i3nwWKRWScj30yPsunCTkT3ZrmNcx5Yg41P69hzhNzWNO8Jt1dGrar\nZk7inqvPZ1JFGQJMqijjnqvPz8nsIz9TUt8ELhKRkUAnUAdsCj1ARCqBt1VVReRCnCB1xMc+GZ/F\nm5CL9R/o1Y1tvLR6LyeOdnPamBIunj+d93+4MhXdNUm2pnkNy15cRlefU2+ptaOVZS8uA2Be9bw0\n9mz4rpo5KSeDQCQ/5xQ2Ak/gDBFtc19rpYjcJCI3uYd9FtguIn8FHgAWqKr61Sfjv0Qn5F7d2Ma6\nR3dx4mg3ACeOdrPu0V28urHNtz4a/6zYvCIYEAK6+rpYsXlFmnpkEuXr4jVV/RbwrYjmh0IefxB4\n0M8+mNSaWFFGS5QAEGtC7qXVe+k92R/W1nuyn5dW77WrhWFY07yGFZtX0NbRRuWoShbNWpSST+pt\nHdGDeax2k3ms9pFJqkQn5AJXCF7bzeACQzitHa0oGhzCScXYfuWo6IE8VrvJPFbmwiRVYMzVazmA\n08aURA0Ap40pCbvf1NREY2Mj7e3tlJeXU1dXR01NTfJPIAfEG8Lx+2ph0axFYXMKAKWFpSyatcjX\n1zXJY0HBJF0iE3IXz5/Oukd3hQ0hFY0o4OL504P3m5qaaGhooKenB4D29nYa3C0lLTAMlM4hnEDQ\nScfQVUo1PQ6Nd0H7fiifDHV3Qs016e5VUlhQyCFPth3lnuZWWrp7mFRSzNLqKj5TOSbd3YorMG8Q\nL/uosbExGBACenp6aGxstKAQReWoSlo7Bm6ik6ohnHnV83IvCIRqehwaboEed+6s/S3nPuREYLCg\nkCOebDvKkt1v0dnvJG/t7+5hye63ALIiMMSbVG5vb0+oPR0yqYKmDeH4rPGuUwEhoKfTac+BoGAT\nzTninubWYEAI6OxX7mnO/m0Xy8vLE2pPtUxbsDeveh7LLllG1agqBKFqVBXLLlmW25/eU6l9f2Lt\nWcauFHJES3dPQu3ZpK6uLmxOAaC4uJi6uro09uqUoSzY81vOD+GkU/lkZ8goWnsOsKCQIyaVFLM/\nSgCYVFKcht4kV2DeIJB9dOaZbUydtpVDh3/Khg1VVE9fQlXl/LT1LxcqaHZsOcjxtfvoO9ZNYUUJ\np8+dyqiZE7w/QQ5PvA5Qd2f4nAJAcZnTngMsKOSIpdVVYXMKAGUFwtLq3NigvaamhpqaGlrbVrNr\n1x309zv/Ibu6D7Br1x0A4YFhGG9Sib5BJrpgL9N0bDnIsVV70B4nA6zvWDfHVu0B8BYYcnzidYDA\nOeVoELSgkCMCk8nZln2UqOa99wUDQkB/fyfNe+87FRQ8vklFq7k0aURBwm+Qt82dwdJV28KGkLKp\ngubxtfuC5xugPf0cX7vPW1BI4sRrrPUoGZdZV3NNzgSBSBYUcshnKsfkXBCI1NUdfeI8rN3Dm1Sg\n5lJgfUSg5tKVY0soSPANMtEFe5mm71j01eOx2gdI0sRrrPUojV39PNApWZlZl40sKOSx9oYGDi6/\nn97WVoqqqpiw+NaM3+C9tKSKru6BG/iVloQMk0W8Ga0ZNZIV76mgrQgqn5jDolmLOLy6ImrNJens\njfq6g71BZnMFzcKKkqjnV1hREuXoKJI08Rq5HmX8+GamTtvK1098iE4JD8iBzDoLCslnQSFPtTc0\n0PrNO9EuJ5e998ABWr/pTJRlcmConr4kbE4BoKCgjOrpS04dFPImtWbUSJaNG0NXgZN9HagD9Pmj\n3wNkwPN39ikjCwe2e36DzEKnz50aNmQGIMUFnD53qrcnGMLE687161j/2CO8c+Qwo8eOY/aCG8LW\nnYwf38xZ73+ZwsI+jhB9i5WW7p6Mn+DOxrLwtk4hTx1cfn8wIARoVxcHl9/v+Tla21azYcNsGl94\nHxs2zKa1bXWyuzlAVeV8zj77bkpLJgJCaclEzj777vBJ5ro7nTclYMV7KoIBIaCrr4t3S49Hff7m\nQkGKw49P6A0yC42aOYGKq88KBr7CihIqrj7Le/ZRzTVQ/wCUnwGI873+gZhvzjvXr+P5lQ/yzuFD\noMo7hw/x/MoHGVl6KvBOnbaVwkJnjmYch6M+zxePvOAEo/a3AD01d9T0uOdz91O2loW3K4U81dsa\nfWw+Vnskz1lAPqiqnB//NUKyQ9pi/IW/OHk1n3jz8wNqLlX/w1lUjCgYXnpmFho1c8LwzjGBidf1\njz1C78nw4arek92MOLifnrET6enpoaSkI/jYNTzKw/plTkppsK2sQPjGvoeHNcEdmmXWfNphNhXv\n5XjniaQVXMzWsvAWFPJUUVUVvQcGjs0XVXlLYfWUBZRO7ptU5RNzwuoAXbqjj2t/r4w7vpFDM0bz\nevV8OjoLBlza53oQSKd3jkT/5N934E3qP/8vNDY20t09itJSJzBcyp8AeFyv44iMZ1LJCJZWVzFq\n3cC/X8DTBHdoGu5rBa2s79lFX6/zBh5acHHPhMlDznrK1rLwvg4fichiEdkhIttF5FciIaHeeVxE\n5AEReU1EmkRklp/9MadMWHwrUhr2z4GUljJh8a2eft5TFlAGWDRrEaWFznleuqOPf3lWGX/cmU2Y\nsPt3XLTuNq6f18GN3700oz+95ZLRY6PPEYweO46amhoWL17MrFnfoaDg1DqPS/kT/1nwNTaf8yab\nLvmA88YcayLbwwR3aBrupqJm+iT8E31PTw8PvLKNJbvfYn93D8qprKcn2456Os/I8u+DtWcK34KC\niEwCbgFqVfU8oBBYEHHYlcBZ7tdC4Ed+9ceEK6+vp+o7d1E0cSKIUDRxIlXfuWvQSeampiaWL19O\nV9fIqI+HZQFlgNA6QNf+XimNSC5KdB7FDN/sBTdQNCL8jbFoRAmzF9wQvJ/o3FGQx5XFodlWJ6Qr\n6jG/r5w2rHpiF8+fTtGI8LfYyLLwmcjv4aMioExEeoCRQOT13nzgEXdf5pdFpEJEqlQ1sz5u5qjy\n+vqEMo1C88j3vX5BMDskYEAWUIYI1AHa+c1zgYFbgHudRzHJcc7sywEGZB8F2gMSmTtKNPsoNA33\nNC2NGhhOlERfke61npiXsvCQeRtI+RYUVLVFRO4D3gQ6gedV9fmIwyYBoQnO+902+1+agULzyA8d\nqgacLJGSkg7KSicmVIMoHWskhjuPkgrDrkHko2SuKj5n9uUDgsCQDHFlcWgabm1vNeuLd4UNIRUX\nFzOhUDjYP/BnE6knNlhZ+EzcQMq3oCAi78G5EpgGHAN+LSLXq+ovhvBcC3GGl5gyZUpS+5mN0vXJ\nInL/gkOHqoPBYdmyZd6fJ01rJCYsvjXsdSGxeRS/DbsGkY+yeb+OaAK/z+Nr9/G+Y1UUFBcPyD46\ne8Jk3+uJZeIGUn4OH/098LqqHgIQkVXAJUBoUGgBzgi5P9ltC6OqK4GVALW1tQOv//NIOj9ZlJeX\nR93YJtF9DeKtkfAzKASeO1NXcQ+7BlGSRPvQcc+Jwpjj69kYFCA8DXcy8FG3vbVtNc17v0LF4VYW\nFn2Kx4uup623yJeaS5m4gZSfQeFN4CIRGYkzfFQHbIo45hngZhF5DPgw0G7zCfH59cnCy8rLZO1r\nMNw1EsOR6DyKX6L9vkcOtwZREsT60NFySfS9GXJhv45QketvLuxt4KKC33H2OXf7kmqdrA9ayeRb\n9pGqbgSeADYD29zXWikiN4nITe5hzwLNwGvAfwH/6ld/coUfnyy8rrysqamhvr4++AdbXl5OfX19\nwsEo1hh+Jo3t+ynW77u/LPpnNM8lNpoeh+XnwbIK5/sQVvbG+tAx+mT0vSGybb+OnevXsfIrX+D7\nC+pZ+ZUvsHP9urDH462/8UNdXR3FxeG/w8AHrSfbjlL74g6q1m2l9sUdnlNhh8vX7CNV/RbwrYjm\nh0IeV+ArfvYh10R+sggUDSsp6WDDhsYhbTiTyMrLwL4Gw5HpY/t+i/X7/ltnL+cXFwytBlGS9jSI\n9eHiQ69t58XzLszo/TrWNK9hxeYVtHW0UTmqkkWzFoXtPhcorxFYTR0orwGnMqJSvf4mcgOpwHDd\nnoj5jFTO4VjtowzQ3tDAnivq2HnOuey5oo52d44gmtBPFoGiYaWlHYicKjWRaA2iVK+8HOoaiVwR\n6/e691jP0GsQxSsXnoBYwxa1J09w34wzmFxSjACTS4q5b8YZGTOfsKZ5DcteXEZrRyuKBgsfrmle\nEzwmVnmN9Y89Erwfa52Nn+tvAgv2li1bxuLFi6mpqUnrnutW5iLNEs3ECf1kEVo0LGAopSZOG1MS\n9Y3Kz5WXmTK2nw7xft9DrkGUpD0N4s0b1WTwfh0rNq+gqy88eaGrr4sVm1cErxZildcIbfdUhTcF\n0rnnul0ppNlQqpUGPlmUlr4b9fFEL3WzdeVltvLl9z2Mkg+hkjVvlGptHdErj4a2xyuvEeBpJXUK\nxJqrScUcjl0ppFm8TJxoNedDF/zE23Dm6S0tnncC87ryMhNl3DaNHvjy+07iZvLJmDcK8LJIMRkL\n9ipHVYYVPgxtD5i94IawOQUYWF4DPKykToF07rkuzlxv9qitrdVNmyIzW7PXnivqoq6ybZt2Bk3j\nRg/4A56z8OZgYIhMnwPnUvcNvsu/N44YsGfwPVefn7W7g0UTuaAKnP84mTTW7cVgwd8zDxvOpHLT\nl8ihUXASCkLnjyIX7IEzuZ7Qfg6cmlMIHUIqLSxl2SXLBkw2J+V3nQLJ/sAjIq+oau2gx1lQSK9Y\n/3H+MGsGJzpODDh+9LjxLPzhT4P3nYU299HV3UppSRXV05fw2Z+NpuXYwBTCSRVlbLj9Cn9OJA1q\nX9zB/ihjrJNLitl0yQfS0KPERWbEwMDgnyyR+1I7r1XA5ded7UtgiPWBp2jiRM56oRGA1u/9OeZW\noFW3X5jQ6w2WfZRJ0lHOxGtQsOGjNIu1yvbEoyujHh85WRbtUvfAsTVEcyBKoMhm6ZyMS5Z4GTHJ\nDgrJ3vRlsCFKL4sUYy3MG8qCvUDhw0yXyeVMwIJCRoiWiTP6uVXOdoURYk2WhZpYURb1SmFiRfSq\nj9lqUklx1CuFbFpQ5SUjJlmSmXr89JYWlq7aFhyibDnWydJV2wCCgSGyAGHbhFr2Vn+a7pIx/Pkb\nG7h4/nRGh1QrDZXLe2LHK2dyvOqlAVf+qZ7fsOyjDOWl5nwst82dQVlxYVhbWXEht82dwZrmNcx5\nYg41P69hzhNzwvK4s83S6irKCiSsLdMWVA0mXkZMYO+KZcuWsXz5cpqamob1Wsnc9OXetbvD5qwA\nOnv6uHft7uD90I2c2ibUsmvGtXSXjgWR4Crud99XkXd7Yse6Cjpauo5du+5wk0d0yOuOhsuCQoY6\nZ/blzFl4M6PHjQcRRo8b73mc+aqZk7jn6vOZVFGG4Mwl3HP1+RSXbx10gY+fkh2QPlM5JmkLqgYr\nf5CQBMpNxAr+Uz42l4aGhuAK4xEjtrB///U0vvA+NmyYPaQ3imSmwsYaigxtD12kuLf60/QXhp9n\n78l+/rT50NAX7GWQRP62Y10FHZ6xKqUlNmKxieY8Midiv+KAqlFVPP/ZyK0ukstrdkg6JDLZG21i\nP+zyPrLcBDipofUPxCw3ES0j5rlNW4MBIbByPXJDo6Hkzycr++jS772QUDLDD296IeZzfeWh7E5+\nSPRvO1bG1a7LbyTaJlAg1F3x2rD7aRPNOSYZ2QpeFvgEJDt1z8uK03TxOtkbmQIcuLwHTr05xys3\nESMoRNtw5r8b/xC8HW/l+sbWWs/rUWDwTV+8um3ujLA5BTg1RBlNOlbNp0rk3/alO/q49vcnGPdv\nS9gz8QcD1maE7uUQ+v+59N3Y645SyYJCFkhWtoKXBT7grXBYohIJSAGpStvzOtkbWUFzAx/h8f7r\nOLJzPJOadzh55EkqNxFa+LCkpCPqMeten8iju+NP9vol8PxeA9LF86dHTYfNhVXzoX/Dl+7o41+e\nPbUXeKyyNdHKmVS3ZUaJDZtTyALxshUSsWjWIkoLS8PaSgtLWTRrUVibl8JhiYoMPLHaA2P7v/7S\n1zn02PbgpFwgEHZsOTjkPsTipfwBhJcP2cBHeJgvc1gmoEiwimXHaROjv0iC5SZCCx92d4+KeszT\ne+cPOtnrp6tmTmLD7Vfw+vfmseH2Kwa9Qrn8urODVwanjSnxbX1EqoX+DV/7+1MBIWCwsjUBmVJi\nw64UskCycrkDwzSDLfDxI01y0axFUcddQwNS6BXKxyZ/hiIJTy31axcyr+UPQsuKPM51nJTwANvZ\nr3x36j9z9+7/GHa5idDCh/tev4D3z9hIQcGpd5uCgjKOdJ4e9WczdT1KsoauMk3o3/bY49GP8bqB\nVCaU2LCgkAUKk5jL7WWBz+ix44a8RiLe60L8gBR6hTKyKPobnh+7kAWGxAabQwmtoHmY6L+Ln4y9\ngrvrJw9absKL0BpE0Sa4J748cljrUbKp5EOivNRcSpbQv+0jp7/F+CiBoaiqKmt+375lH4nIDOC/\nQ5qqgTtV9f6QYy4DVgOvu02rVDVuAfh8zD5KVn0YryLnFKaMOoeaMZcxsmg0RRWliY3te6jHE/D9\nBfXg/j1+avJNjCoeWNs/XvmDRIoAJiJ0z+Izz2xj6rStfLX/Lg7LwN9BZImN0NIL8/aM5nN/7Kf4\nUHtS3qgiF5CB9xpXqSyvkWpeai6l+rU7vvhP/OkvG9L6+0579pGq7gYucDtTCLQAT0U5dL2qfsqv\nfmSbeCmDqaqVEvrJ+T1d4/nQuE8Eh3ISmuROcDew0CuUpv/9Ax8adyVFBaeGkOItavKywnYoIvcs\nfuONSg4cqOdLH5/AA50St4plaKripTv6uObZoxQPMgGZiEQne0OlsrxGqsUrR+93UIhVtua/n1uV\nNb/vVA0f1QF7VfWNFL1eVoosWBZY9QnOeGwqF/QE0iSjFSzzPLafYHpm6Nj+mx07ATxfocRbYTuc\noBBrz2I2rOO+z90Yt4plaKpivAnI4bxRXTVz0pDOL5XlNVLNS80lP0UrW/OOx1pmmSBVQWEB8KsY\nj10iIk04VxJLVHVHivqUcZJdsCwZhjXJnWB6ZuTY/v+WHaZ//kjOmP3RQV/KywrboYi1Z3F7ezuf\nGWQnstBUxeFOQCabH/NGmcJLzaVU/3/Kpt+37ympIjIC+DTw6ygPbwamqGoN8J/A0zGeY6GIbBKR\nTYcODfzF5opU75XsRazJbE+T3EPYDeyc2Zez8Ic/5f881sDCH/7U86V1rMnV4RYBjLVncaz2UKGp\nikeiz5tTVJWeOk3Dqa2V6bzUXHp1Y+z1MX7Ipt93KtYpXAlsVtW3Ix9Q1eOqesK9/SxQLCIDQqeq\nrlTVWlWtHT9+vP89TpNkFixLltPnTh16wbK6O510zFBD3A1sMPGKAA5H6HqBgMCexYMJXRfyy8uE\nrojrciktZcLiWxPu05NtR6l9cQdV67ZS++IOnmw7mvBzDKe2VqbzUnPppdV7U9qnbPp9+177SEQe\nA9aq6k+jPFYJvK2qKiIXAk8AZ2qcTuVy9lGqN0HxalgrixPIPhquVGQflZeXO5vYe9yuMtnZR7my\n29xwJPLvnMs1lxKVETuvicgo4E2gWlXb3babAFT1IRG5Gfgy0At0Al9T1RfjPWcuBwVI7XaJJvvk\nwm5zw5FoGu7Pv7EhZs2lG797qa99zTTDTkkVkVnxflBVNw/25KraAYyNaHso5PaDwIODPU8+ydVV\nnyY5cmG3ueFINMssl2su+SVe9tH33e+lQC3wV0CAGmATcLG/XTPGRMqF3eaGI9Ess8AHLLv69i5m\nUFDVywFEZBUwS1W3uffPA5alpHc5LJs2GTeZY2l1VdQ5hWzabW44hrLVrF19J8ZL9tGMQEAAUNXt\nwDn+dSn3BVa6pmsHNJO9krnbXDbyK8vMnOJl8do2EXkY+IV7/zpgeJvF5pFomRL/d2/mbjhjMt9g\ni+Zy2XBKexhvBs0+EpFSnAyhwLLSPwI/UtWu2D/ln2zKPoqVKVH0vq8Tbds9QWi60Yd4m8K0UGNM\nZkpKQTy3kN2PVfU6YHmyOpcvYmVKnN5bgRb974DjY21EMywJFqUzxuS3uHMKqtoHnOmWqjAJipUR\n0fn2HE87oCVFvKJ0xhgTwcucQjOwQUSeAYKbxarqD3zrVY6IlSkxoeASvnHJzNRkHyVpz2BjTH7w\nEhT2ul8FwGh/u5Nbbps7I+qcwm1zZzCvelJqJpXLJztDRtHajTEmwqBBQVW/nYqO5KKMyJSouzN8\nTgF8K0pnjMl+gwYFERkPfB34AM7qZgBUNb+qSQ3RUDdBSZrAZLJlHxljPPAyfPQozl7LnwJuAm4E\ncndTg1xUc40FAWOMJ15WNI9V1R8DPar6B1X9ImBXCSZMMmr8G2PSz8uVQqD6VquIzAMOAPm5nNJE\nFVnjf393D0t2O5Pb+bry1phs5eVK4d9EpBz4P8AS4GFgsa+9MlnlnubWsAJtAJ39yj3N6dl/2Bgz\ndF6uFH7nlrRoBzJv7ziTdvle49+YXOIlKGwXkbeB9e7XnwK7qBkDVuPfmFwy6PCRqr4P+BywDZgH\n/FVEtg72cyIyQ0S2hnwdF5FbI44REXlARF4TkabBdnszmWlpdRVlBRLWlk81/o3JJV7WKUwGLgVm\nAx8EdgB/GuznVHU3cIH7HIVAC/BUxGFXAme5Xx8GfuR+N1kkMJl8T3MrLd09TCopZml1lU0yG5OF\nvAwfvQn8Bfiuqt40xNepA/aq6hsR7fOBR9Sp3/2yiFSISJWq2gxllsnnGv/G5BIvQWEm8BHgWhG5\nHdgD/MFdu+DVAuBXUdonAaGFefa7bWFBQUQWAgsBpkyZksDLpk5r22qa995HV3crpSVVVE9fQlXl\n/HR3yxhjEuKl9tFfRSRQFG82cD3wMcBTUHDLbn8aWDrUTqrqSmAlOJvsDPV5kq2pqYnGxkZGjNjC\n+2dspKCgF4Cu7gPs2nUHgAUGY0xWGXSiWUQ2AS8B/wDsBD6qqmcm8BpXAptV9e0oj7UAZ4Tcn+y2\nZbympiYaGhpob29n6rStwYAQ0N/fSfPe+9LUO2OMGRovw0dXqupwah19juhDRwDPADeLyGM4E8zt\n2TKf0NjYSE+Pk4ZZUtIR9Ziu7qw4FWOMCfKyorlARH4sIr8BEJFzReRLXp5cREYBHwdWhbTdJCKB\nCetncTbxeQ34L+BfE+l8OrW3n1qq0d09KuoxpSWWkmmMyS5egsLPgLXARPf+q8CtMY8Ooaodqjo2\ndLGbqj6kqg+5t1VVv6Kq01X1fFXdlFj306e8vDx4e9/rF9DXVxj2eEFBGdXTl6S6W8YYMyxegsI4\nVX0c6AdQ1V6gL/6P5L66ujqKi50Vu4cOVbPn1Yvo7hoFCKUlEzn77LttktkYk3W8zCl0iMhYQAFE\n5CKcOkh5raamBnDmFtrb2zl5ciaTJy8JthtjTDbyEhS+hjMhPF1ENgDjgX/0tVdZoqamxoKAyWm2\n/ib/eFmnsFlEPgbMAATYrapW/tKYHNfatppdu+6gv9/Z39vW3+QHL3MKqGqvqu5Q1e3AZSLyW5/7\nlZuaHofl58GyCud70+Pp7pExMTXvvS8YEAJs/U3uixkUROQKEXlVRE6IyC9E5Hx3Idv3cArXmUQ0\nPQ4Nt0D7W4A63xtuscBgMlasdTa2/ia3xbtS+D5OvaGxwBM4q5p/pqp/p6qr4vyciabxLugJ/9RF\nT6fTbkwGirXOxtbf5LZ4QUFV9feq2q2qTwMtqvpgqjqWc9r3J9ZuTJpVT19CQUFZWJutv8l98Saa\nK0Tk6tBjQ+/b1UKCyie7Q0dR2o3JQIHJZMs+yi/xgsIfgPqQ+38Mua+ElK4wHtTd6cwhhA4hFZc5\n7cZkqKrK+RYE8kzMoKCqX0hlR3JezTXO98a7nCGj8slOQAi0G2NMBvCyeM0kS801FgSMMRnN0zoF\nY4wx+cHlDTYcAAAQl0lEQVSCgjHGmCBPw0cicgkwNfR4VX3Epz4ZY4xJk0GDgoj8P2A6sJVTJbMV\nsKBgjDE5xsuVQi1wrqpqok8uIhXAw8B5OIHki6r6UsjjlwGrgdfdplWqakt8jTEmTbwEhe1AJTCU\ngicrgOdU9bMiMgIYGeWY9ar6qSE8tzHGmCTzEhTGAX8TkT8D3YFGVf10vB8SkXLgo8Dn3eNPAieH\n3FNjjDG+8xIUlg3xuacBh4CfisgHgVeARaraEXHcJSLSBLQAS1R1xxBfzxhjzDB52WTnD8N47lnA\nV1V1o4isAG4HvhlyzGZgiqqeEJFPAk8DZ0U+kYgsxKnYypQpU4bYHWOMMYMZdJ2CiFwkIn9x91U4\nKSJ9InLcw3PvB/ar6kb3/hM4QSJIVY+r6gn39rNAsYiMi3wiVV2pqrWqWjt+/HgPL22MMWYovCxe\nexD4HLAHKAP+GfjhYD+kqm3AWyIyw22qA/4WeoyIVIqIuLcvdPtzxHPvjTHGJJWnxWuq+pqIFKpq\nH84cwRZgqYcf/SrwqJt51Ax8QURucp/zIeCzwJdFpBfoBBYMJfXVGGNMcngJCu+6b+pbReQ/cFJT\nve7tvBVnnUOoh0IefxDnSiTjvLqxjZdW7+XE0W5OG1PCxfOn8/4PV6a7W8YY4ysvb+7/5B53M9AB\nnAF8xs9OpdurG9tY9+guThx1MnBPHO1m3aO7eHVjW5p7Zowx/vKSffSGiJQBVar67RT0Ke1eWr2X\n3pP9YW29J/t5afVeu1owxuQ0L9lH9Th1j55z718gIs/43bF0ClwheG03xphc4WX4aBlwIXAMgvME\n03zsU9qdNqYkoXZjjMkVXoJCj6q2R7TldIbQxfOnUzQi/FdTNKKAi+dPT1OPjDEmNbxkH+0QkWuB\nQhE5C7gFeNHfbqVXYN7Aso+MMfnGS1D4KnAHTjG8XwFrge/42alM8P4PV1oQMMbkHS/ZR+/iBIU7\n/O+OMcaYdIoZFAbLMBqsdLYxxpjsE+9K4WLgLZwho42ApKRHxhhj0iZeUKgEPo5TDO9aYA3wK9vv\nwBhjclfMoOAWv3sOeE5ESnCCw+9F5NtuzaK8sXP9OtY/9gjvHDnM6LHjmL3gBs6ZfXm6u2WMMUkX\nd6LZDQbzcALCVOAB4Cn/u5U5dq5fx/MrH6T3pLOa+Z3Dh3h+pRMTLTAYY3JNzMVrIvII8BLOxjjf\nVtUPqep3VLUlZb3LAOsfeyQYEAJ6T3az/rFH0tQjY4zxT7wrhetxqqIuAm5x98IBZ8JZVfV0n/uW\ncu0NDRxcfj+9ra0UVVUxYfGtvHPkcNRjY7UbY0w2izen4GnPhFzR3tBA6zfvRLu6AOg9cIDWb97J\nabNmcKLjxIDjR48dsGuoMcZkPV/f+EWkQkSeEJFdIrJTRC6OeFxE5AEReU1EmkRkVqzn8tvB5fcH\nA0KAdnXx/tajFI0IL4RXNKKE2QtuSGX3jDEmJfy+GlgBPKeqZwMfBHZGPH4lcJb7tRD4kc/9iam3\ntTVqe+W+/cxZeDOjx40HEUaPG8+chTfbJLMxJid52qN5KESkHPgo8HkAVT0JnIw4bD7wiLsv88vu\nlUWVqkZ/h/ZRUVUVvQcORG0/a/blFgSMMXnBzyuFacAh4KciskVEHhaRURHHTMJZNR2w321LuQmL\nb0VKS8PapLSUCYtvTUd3jDEmLfwMCkU46aw/UtWZOJlMtw/liURkoYhsEpFNhw4dSmYfg8rr66n6\nzl0UTZwIIhRNnEjVd+6ivL7el9czxphM5NvwEc6n/v2qutG9/wQDg0ILcEbI/cluWxhVXQmsBKit\nrfVtg5/y+noLAsaYvObblYKqtgFvicgMt6kO+FvEYc8AN7hZSBcB7emYTzDGGOPw80oBnA16HhWR\nEUAz8AURuQlAVR8CngU+CbwGvAt8wef+GGOMicPXoKCqW4HaiOaHQh5X4Ct+9sEYY4x3ebVq2Rhj\nTHx+Dx/lpSfbjnJPcyst3T1MKilmaXUVn6kck+5uGWPMoCwoJNmTbUdZsvstOvudJKn93T0s2e0s\nxbDAYIzJdDZ8lGT3NLcGA0JAZ79yT7MlVRljMp8FhSRr6e5JqN0YYzKJBYUkm1RSnFC7McZkEgsK\nSba0uoqyAglrKysQllZXpalHxhjjnU00J6hjy0GOr91H37FuCitKOH3uVEbNnBB8PDCZbNlHxphs\nZEEhAR1bDnJs1R60px+AvmPdHFu1B2BAYLAgYIzJRjZ8lIDja/cFA0KA9vRzfO2+9HTIGGOSzIJC\nAvqOdSfUbowx2caCQgIKK0oSajfGmGxjQSEBp8+dihSH/8qkuIDT505NT4eMMSbJbKI5AYHJ5HjZ\nR8YYk80sKCRo1MwJFgSMMTnLho+MMcYEWVAwxhgT5OvwkYjsA94B+oBeVa2NePwyYDXwutu0SlXv\n8rNPxhhjYkvFnMLlqno4zuPrVfVTKeiHMcaYQdjwkTHGmCC/g4ICvxORV0RkYYxjLhGRJhH5jYh8\nINoBIrJQRDaJyKZDhw7511tjjMlzfg8ffURVW0RkAvBbEdmlqn8MeXwzMEVVT4jIJ4GngbMin0RV\nVwIrAWprazXycWOMMcnh65WCqra43w8CTwEXRjx+XFVPuLefBYpFZJyffTLGGBObb0FBREaJyOjA\nbWAOsD3imEoREff2hW5/jvjVJ2OMMfH5OXz0XuAp9z2/CPilqj4nIjcBqOpDwGeBL4tIL9AJLFBV\nGx4yxpg08S0oqGoz8MEo7Q+F3H4QeNCvPkTV9Dg03gXt+6F8MtTdCTXXpLQLxhiTqfKr9lHT49Bw\nC/R0Ovfb33LugwUGY4wh39YpNN51KiAE9HQ67cYYY/IsKLTvT6zdGGPyTH4FhfLJibUbY0yeya+g\nUHcnFJeFtxWXOe3GGGPyLCjUXAP1D0D5GYA43+sfsElmY4xx5Vf2ETgBwIKAMcZElV9XCsYYY+Ky\noGCMMSbIgoIxxpggCwrGGGOCLCgYY4wJsqBgjDEmyIKCMcaYIAsKxhhjgiwoGGOMCfI1KIjIPhHZ\nJiJbRWRTlMdFRB4QkddEpElEZvnZH2OMMfGloszF5ap6OMZjVwJnuV8fBn7kfk+qp7e0cO/a3Rw4\n1snEijJumzuDq2ZOSvbLGGNM1kt37aP5wCPuvswvi0iFiFSpamuyXuDpLS0sXbWNzp4+AFqOdbJ0\n1TYACwzGGBPB7zkFBX4nIq+IyMIoj08C3gq5v99tS5p71+4OBoSAzp4+7l27O5kvY4wxOcHvK4WP\nqGqLiEwAfisiu1T1j4k+iRtQFgJMmTIloZ89cKwzoXZjjMlnvl4pqGqL+/0g8BRwYcQhLcAZIfcn\nu22Rz7NSVWtVtXb8+PEJ9WFiRVlC7cYYk898CwoiMkpERgduA3OA7RGHPQPc4GYhXQS0J3M+AeC2\nuTMoKy4MaysrLuS2uTOS+TLGGJMT/Bw+ei/wlIgEXueXqvqciNwEoKoPAc8CnwReA94FvpDsTgQm\nky37yBhjBidO4k/2qK2t1U2bBix5MMYYE4eIvKKqtYMdZyuajTHGBOV9UFjTvIY5T8yh5uc1zHli\nDmua16S7S8YYkzbpXryWVmua17DsxWV09XUB0NrRyrIXlwEwr3peGntmjDHpkddXCis2rwgGhICu\nvi5WbF6Rph4ZY0x65XVQaOtoS6jdGGNyXV4HhcpRlQm1G2NMrsvroLBo1iJKC0vD2koLS1k0a1Ga\nemSMMemV1xPNgcnkFZtX0NbRRuWoShbNWmSTzMaYvJXXQQGcwGBBwBhjHHk9fGSMMSacBQVjjDFB\nFhSMMcYEWVAwxhgTZEHBGGNMUNaVzhaRQ8AbQ/zxccDhJHYnG9g55wc75/wwnHM+U1UH3boy64LC\ncIjIJi/1xHOJnXN+sHPOD6k4Zxs+MsYYE2RBwRhjTFC+BYWV6e5AGtg55wc75/zg+znn1ZyCMcaY\n+PLtSsEYY0wceRMUROQTIrJbRF4TkdvT3Z/hEJGfiMhBEdke0jZGRH4rInvc7+8JeWype967RWRu\nSPvficg297EHRERSfS5eiMgZIrJORP4mIjtEZJHbnsvnXCoifxaRv7rn/G23PWfPOUBECkVki4j8\nj3s/p89ZRPa5fd0qIpvctvSds6rm/BdQCOwFqoERwF+Bc9Pdr2Gcz0eBWcD2kLb/AG53b98O/Lt7\n+1z3fEuAae7vodB97M/ARYAAvwGuTPe5xTjfKmCWe3s08Kp7Xrl8zgKc5t4uBja6/c7Zcw45968B\nvwT+J9f/tt2+7gPGRbSl7Zzz5UrhQuA1VW1W1ZPAY8D8NPdpyFT1j8DRiOb5wM/d2z8Hrgppf0xV\nu1X1deA14EIRqQJOV9WX1fmLeiTkZzKKqraq6mb39jvATmASuX3Oqqon3LvF7peSw+cMICKTgXnA\nwyHNOX3OMaTtnPMlKEwC3gq5v99tyyXvVdVW93Yb8F73dqxzn+TejmzPaCIyFZiJ88k5p8/ZHUbZ\nChwEfquqOX/OwP3A14H+kLZcP2cFficir4jIQrctbeec95vs5CJVVRHJubQyETkNeBK4VVWPhw6Z\n5uI5q2ofcIGIVABPich5EY/n1DmLyKeAg6r6iohcFu2YXDtn10dUtUVEJgC/FZFdoQ+m+pzz5Uqh\nBTgj5P5kty2XvO1eQuJ+P+i2xzr3Fvd2ZHtGEpFinIDwqKqucptz+pwDVPUYsA74BLl9zpcCnxaR\nfThDvFeIyC/I7XNGVVvc7weBp3CGu9N2zvkSFP4CnCUi00RkBLAAeCbNfUq2Z4Ab3ds3AqtD2heI\nSImITAPOAv7sXpoeF5GL3CyFG0J+JqO4/fsxsFNVfxDyUC6f83j3CgERKQM+Duwih89ZVZeq6mRV\nnYrzf/QFVb2eHD5nERklIqMDt4E5wHbSec7pnnlP1RfwSZyslb3AHenuzzDP5VdAK9CDM3b4JWAs\n0AjsAX4HjAk5/g73vHcTkpEA1Lp/gHuBB3EXM2baF/ARnHHXJmCr+/XJHD/nGmCLe87bgTvd9pw9\n54jzv4xT2Uc5e844GZF/db92BN6b0nnOtqLZGGNMUL4MHxljjPHAgoIxxpggCwrGGGOCLCgYY4wJ\nsqBgjDEmyIKCMXGISJ9bvXK7iPxaREa67ScG+1ljspEFBWPi61TVC1T1POAkcFO6O2SMnywoGOPd\neuB9oQ0icpqINIrIZreW/Xy3/S4RuTXkuLtFZJGIVInIH0OuPman+ByMicsWrxkTh4icUNXTRKQI\np/bSc6r6o4j2keoU6BsHvIxTeuBMYJWqzhKRApyVqRcCnwdKVfVuESl0f/adtJycMVFYlVRj4itz\ny1eDc6Xw44jHBfiuiHwUp9zzJJyyx/tE5IiIzMQpe7xFVY+IyF+An7gF/p5W1a0Yk0EsKBgTX6eq\nXhDn8euA8cDfqWqPW+Gz1H3sYZwrg0rgJ+BskOQGkHnAz0TkB6r6iF+dNyZRNqdgzPCU4+wB0CMi\nl+MMGwU8hVPu+kPAWgARORN4W1X/CydozEpxf42Jy64UjBmeR4EGEdkGbMIpbw2Aqp4UkXXAMXU2\nzAGn+udtItIDnMApcWxMxrCJZmN84k4wbwb+UVX3pLs/xnhhw0fG+EBEzsXZVL3RAoLJJnalYIwx\nJsiuFIwxxgRZUDDGGBNkQcEYY0yQBQVjjDFBFhSMMcYEWVAwxhgT9P8B5FIXkTox720AAAAASUVO\nRK5CYII=\n",
177 | "text/plain": [
178 | ""
179 | ]
180 | },
181 | "metadata": {},
182 | "output_type": "display_data"
183 | }
184 | ],
185 | "source": [
186 | "train(env)"
187 | ]
188 | }
189 | ],
190 | "metadata": {
191 | "kernelspec": {
192 | "display_name": "Python [default]",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.6.5"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 2
211 | }
212 |
--------------------------------------------------------------------------------
/old_but_more_detailed/Ch3_Gridworld.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import torch as th\n",
11 | "from torch.autograd import Variable\n",
12 | "from matplotlib import pyplot as plt\n",
13 | "import random\n",
14 | "import sys\n",
15 | "from Gridworld import *\n",
16 | "%matplotlib inline"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/plain": [
27 | "array([['+', '-', ' ', 'P'],\n",
28 | " [' ', 'W', ' ', ' '],\n",
29 | " [' ', ' ', ' ', ' '],\n",
30 | " [' ', ' ', ' ', ' ']], dtype='\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mloss_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMSELoss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize_average\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mlearning_rate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1e-4\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0moptimizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mAdam\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlearning_rate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m '''for t in range(500):\n\u001b[1;32m 5\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
102 | "\u001b[0;31mNameError\u001b[0m: name 'torch' is not defined"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "loss_fn = th.nn.MSELoss(size_average=False)\n",
108 | "learning_rate = 1e-4\n",
109 | "optimizer = th.optim.Adam(model.parameters(), lr=learning_rate)\n",
110 | "'''for t in range(500):\n",
111 | " y_pred = model(x)\n",
112 | " loss = loss_fn(y_pred, y)\n",
113 | " print(t, loss.item())\n",
114 | " optimizer.zero_grad()\n",
115 | " loss.backward()\n",
116 | " optimizer.step()'''"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "metadata": {},
123 | "outputs": [],
124 | "source": []
125 | }
126 | ],
127 | "metadata": {
128 | "kernelspec": {
129 | "display_name": "Python 3",
130 | "language": "python",
131 | "name": "python3"
132 | },
133 | "language_info": {
134 | "codemirror_mode": {
135 | "name": "ipython",
136 | "version": 3
137 | },
138 | "file_extension": ".py",
139 | "mimetype": "text/x-python",
140 | "name": "python",
141 | "nbconvert_exporter": "python",
142 | "pygments_lexer": "ipython3",
143 | "version": "3.6.4"
144 | }
145 | },
146 | "nbformat": 4,
147 | "nbformat_minor": 2
148 | }
149 |
--------------------------------------------------------------------------------
/old_but_more_detailed/Ch3_Gridworld_exp.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Users/brandonbrown/anaconda3/envs/deeprl/lib/python3.6/site-packages/matplotlib/font_manager.py:278: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
13 | " 'Matplotlib is building the font cache using fc-list. '\n"
14 | ]
15 | }
16 | ],
17 | "source": [
18 | "import numpy as np\n",
19 | "import torch as th\n",
20 | "from torch.autograd import Variable\n",
21 | "from matplotlib import pyplot as plt\n",
22 | "import random\n",
23 | "import sys\n",
24 | "sys.path.append(\"/Users/brandonbrown/Desktop/Projects/pycolab/\")\n",
25 | "%matplotlib inline"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {},
32 | "outputs": [],
33 | "source": [
34 | "def randPair(s,e):\n",
35 | " return np.random.randint(s,e), np.random.randint(s,e)\n",
36 | "\n",
37 | "class BoardPiece:\n",
38 | " \n",
39 | " def __init__(self, name, code, pos):\n",
40 | " self.name = name #name of the piece\n",
41 | " self.code = code #an ASCII character to display on the board\n",
42 | " self.pos = pos #2-tuple e.g. (1,4)\n",
43 | " \n",
44 | "class BoardMask:\n",
45 | " \n",
46 | " def __init__(self, name, mask, code):\n",
47 | " self.name = name\n",
48 | " self.mask = mask\n",
49 | " self.code = code\n",
50 | " \n",
51 | " def get_positions(self): #returns tuple of arrays\n",
52 | " return np.nonzero(self.mask)\n",
53 | "\n",
54 | "def zip_positions2d(positions): #positions is tuple of two arrays\n",
55 | " x,y = positions\n",
56 | " return list(zip(x,y))\n",
57 | "\n",
58 | "class GridBoard:\n",
59 | " \n",
60 | " def __init__(self, size=4):\n",
61 | " self.size = size #Board dimensions, e.g. 4 x 4\n",
62 | " self.components = {} #name : board piece\n",
63 | " self.masks = {}\n",
64 | " \n",
65 | " def addPiece(self, name, code, pos=(0,0)):\n",
66 | " newPiece = BoardPiece(name, code, pos)\n",
67 | " self.components[name] = newPiece\n",
68 | " \n",
69 | " #basically a set of boundary elements\n",
70 | " def addMask(self, name, mask, code):\n",
71 | " #mask is a 2D-numpy array with 1s where the boundary elements are\n",
72 | " newMask = BoardMask(name, mask, code)\n",
73 | " self.masks[name] = newMask\n",
74 | " \n",
75 | " def movePiece(self, name, pos):\n",
76 | " move = True\n",
77 | " for _, mask in self.masks.items():\n",
78 | " if pos in zip_positions2d(mask.get_positions()):\n",
79 | " move = False\n",
80 | " if move:\n",
81 | " self.components[name].pos = pos\n",
82 | " \n",
83 | " def delPiece(self, name):\n",
84 | " del self.components['name']\n",
85 | " \n",
86 | " def render(self):\n",
87 | " dtype = '= 4:\n",
130 | " self.board = GridBoard(size=size)\n",
131 | " else:\n",
132 | " print(\"Minimum board size is 4. Initialized to size 4.\")\n",
133 | " self.board = GridBoard(size=4)\n",
134 | " \n",
135 | " #Add pieces, positions will be updated later\n",
136 | " self.board.addPiece('Player','P',(0,0))\n",
137 | " self.board.addPiece('Goal','+',(1,0))\n",
138 | " self.board.addPiece('Pit','-',(2,0))\n",
139 | " self.board.addPiece('Wall','W',(3,0))\n",
140 | " \n",
141 | " if mode == 'static':\n",
142 | " self.initGridStatic()\n",
143 | " elif mode == 'player':\n",
144 | " self.initGridPlayer()\n",
145 | " else:\n",
146 | " self.initGridRand()\n",
147 | " \n",
148 | " #Initialize stationary grid, all items are placed deterministically\n",
149 | " def initGridStatic(self):\n",
150 | " #Setup static pieces\n",
151 | " self.board.components['Player'].pos = (0,3) #Row, Column\n",
152 | " self.board.components['Goal'].pos = (0,0)\n",
153 | " self.board.components['Pit'].pos = (0,1)\n",
154 | " self.board.components['Wall'].pos = (1,1)\n",
155 | " \n",
156 | " #Check if board is initialized appropriately (no overlapping pieces)\n",
157 | " def validateBoard(self):\n",
158 | " all_positions = [piece.pos for name,piece in self.board.components.items()]\n",
159 | " if len(all_positions) > len(set(all_positions)):\n",
160 | " return False\n",
161 | " else:\n",
162 | " return True\n",
163 | "\n",
164 | " #Initialize player in random location, but keep wall, goal and pit stationary\n",
165 | " def initGridPlayer(self):\n",
166 | " #height x width x depth (number of pieces)\n",
167 | " self.initGridStatic()\n",
168 | " #place player\n",
169 | " self.board.components['Player'].pos = randPair(0,self.board.size)\n",
170 | "\n",
171 | " if (not self.validateBoard()):\n",
172 | " #print('Invalid grid. Rebuilding..')\n",
173 | " self.initGridPlayer()\n",
174 | "\n",
175 | " #Initialize grid so that goal, pit, wall, player are all randomly placed\n",
176 | " def initGridRand(self):\n",
177 | " #height x width x depth (number of pieces)\n",
178 | " self.board.components['Player'].pos = randPair(0,self.board.size)\n",
179 | " self.board.components['Goal'].pos = randPair(0,self.board.size)\n",
180 | " self.board.components['Pit'].pos = randPair(0,self.board.size)\n",
181 | " self.board.components['Wall'].pos = randPair(0,self.board.size)\n",
182 | "\n",
183 | " if (not self.validateBoard()):\n",
184 | " #print('Invalid grid. Rebuilding..')\n",
185 | " self.initGridRand()\n",
186 | "\n",
187 | " def makeMove(self, action):\n",
188 | " #need to determine what object (if any) is in the new grid spot the player is moving to\n",
189 | " #actions in {u,d,l,r}\n",
190 | " def checkMove(addpos=(0,0)):\n",
191 | " new_pos = addTuple(self.board.components['Player'].pos, addpos)\n",
192 | " if new_pos == self.board.components['Wall'].pos:\n",
193 | " pass #block move, player can't move to wall\n",
194 | " elif max(new_pos) > (self.board.size-1): #if outside bounds of board\n",
195 | " pass\n",
196 | " elif min(new_pos) < 0: #if outside bounds\n",
197 | " pass\n",
198 | " else:\n",
199 | " self.board.movePiece('Player', new_pos)\n",
200 | " if action == 'u': #up\n",
201 | " checkMove((-1,0))\n",
202 | " elif action == 'd': #down\n",
203 | " checkMove((1,0))\n",
204 | " elif action == 'l': #left\n",
205 | " checkMove((0,-1))\n",
206 | " elif action == 'r': #right\n",
207 | " checkMove((0,1))\n",
208 | " else:\n",
209 | " pass\n",
210 | "\n",
211 | " def reward(self):\n",
212 | " if (self.board.components['Player'].pos == self.board.components['Pit'].pos):\n",
213 | " return -10\n",
214 | " elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):\n",
215 | " return 10\n",
216 | " else:\n",
217 | " return -1\n",
218 | "\n",
219 | " def display(self):\n",
220 | " return self.board.render()"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "metadata": {},
227 | "outputs": [],
228 | "source": []
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 29,
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "data": {
237 | "text/plain": [
238 | "array([['+', '-', ' ', 'P'],\n",
239 | " [' ', 'W', ' ', ' '],\n",
240 | " [' ', ' ', ' ', ' '],\n",
241 | " [' ', ' ', ' ', ' ']], dtype='= 5:\n",
463 | " self.board = GridBoard(size=size)\n",
464 | " else:\n",
465 | " print(\"Minimum board size is 5. Initialized to size 5.\")\n",
466 | " self.board = GridBoard(size=5)\n",
467 | " \n",
468 | " #Add pieces, positions will be updated later\n",
469 | " self.board.addPiece('Player','P',(0,0))\n",
470 | " self.board.addPiece('Goal','+',(0,4))\n",
471 | " self.board.addPiece('Box','B',(2,1))\n",
472 | " \n",
473 | " mask = np.array([\n",
474 | " [0, 0, 1, 0, 0],\n",
475 | " [0, 0, 1, 0, 0],\n",
476 | " [1, 0, 1, 1, 0],\n",
477 | " [1, 0, 0, 0, 0],\n",
478 | " [1, 0, 1, 1, 0]])\n",
479 | " self.board.addMask('boundary', mask, code='#')\n",
480 | " \n",
481 | " if mode != 'static':\n",
482 | " self.initGridRand()\n",
483 | " \n",
484 | " def initGridRand(self):\n",
485 | " self.board.components['Player'].pos = (np.random.randint(0,2), np.random.randint(0,2))\n",
486 | " self.board.components['Goal'].pos = (np.random.randint(0,2), np.random.randint(3,5))\n",
487 | " \n",
488 | " def makeMove(self, action):\n",
489 | " #need to determine what object (if any) is in the new grid spot the player is moving to\n",
490 | " #actions in {u,d,l,r}\n",
491 | " def checkMove(addpos=(0,0)):\n",
492 | " new_pos = addTuple(self.board.components['Player'].pos, addpos)\n",
493 | " if max(new_pos) > (self.board.size-1): #if outside bounds of board\n",
494 | " pass\n",
495 | " elif min(new_pos) < 0: #if outside bounds\n",
496 | " pass\n",
497 | " else:\n",
498 | " self.board.movePiece('Player', new_pos)\n",
499 | " if action == 'u': #up\n",
500 | " checkMove((-1,0))\n",
501 | " elif action == 'd': #down\n",
502 | " checkMove((1,0))\n",
503 | " elif action == 'l': #left\n",
504 | " checkMove((0,-1))\n",
505 | " elif action == 'r': #right\n",
506 | " checkMove((0,1))\n",
507 | " else:\n",
508 | " pass\n",
509 | " \n",
510 | " def getReward(self):\n",
511 | " if (self.board.components['Player'].pos == self.board.components['Goal'].pos):\n",
512 | " return 10\n",
513 | " else:\n",
514 | " return -1"
515 | ]
516 | }
517 | ],
518 | "metadata": {
519 | "kernelspec": {
520 | "display_name": "Python [conda env:deeprl]",
521 | "language": "python",
522 | "name": "conda-env-deeprl-py"
523 | },
524 | "language_info": {
525 | "codemirror_mode": {
526 | "name": "ipython",
527 | "version": 3
528 | },
529 | "file_extension": ".py",
530 | "mimetype": "text/x-python",
531 | "name": "python",
532 | "nbconvert_exporter": "python",
533 | "pygments_lexer": "ipython3",
534 | "version": "3.6.5"
535 | }
536 | },
537 | "nbformat": 4,
538 | "nbformat_minor": 2
539 | }
540 |
--------------------------------------------------------------------------------
/old_but_more_detailed/Ch4_PolicyGradients.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning in Action \n",
8 | "## Ch. 4 - Policy Gradients"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "metadata": {},
15 | "outputs": [],
16 | "source": [
17 | "import gym\n",
18 | "import numpy as np\n",
19 | "import torch\n",
20 | "from matplotlib import pylab as plt"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "#### Helper functions"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 2,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "def running_mean(x, N=50):\n",
37 | " cumsum = np.cumsum(np.insert(x, 0, 0)) \n",
38 | " return (cumsum[N:] - cumsum[:-N]) / float(N)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "#### Defining Network"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "l1 = 4\n",
55 | "l2 = 150\n",
56 | "l3 = 2\n",
57 | "\n",
58 | "model = torch.nn.Sequential(\n",
59 | " torch.nn.Linear(l1, l2),\n",
60 | " torch.nn.LeakyReLU(),\n",
61 | " torch.nn.Linear(l2, l3),\n",
62 | " torch.nn.Softmax()\n",
63 | ")\n",
64 | "\n",
65 | "learning_rate = 0.0009\n",
66 | "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "#### Objective Function"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 4,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "def loss_fn(preds, r): \n",
83 | " # pred is output from neural network, a is action index\n",
84 | " # r is return (sum of rewards to end of episode), d is discount factor\n",
85 | " return -torch.sum(r * torch.log(preds)) # element-wise multipliy, then sum"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "#### Training Loop"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 5,
98 | "metadata": {
99 | "scrolled": true
100 | },
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "\u001b[33mWARN: gym.spaces.Box autodetected dtype as . Please provide explicit dtype.\u001b[0m\n"
107 | ]
108 | },
109 | {
110 | "name": "stderr",
111 | "output_type": "stream",
112 | "text": [
113 | "/anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py:91: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n",
114 | " input = module(input)\n"
115 | ]
116 | }
117 | ],
118 | "source": [
119 | "env = gym.make('CartPole-v0')\n",
120 | "MAX_DUR = 200\n",
121 | "MAX_EPISODES = 500\n",
122 | "gamma_ = 0.99\n",
123 | "time_steps = []\n",
124 | "for episode in range(MAX_EPISODES):\n",
125 | " curr_state = env.reset()\n",
126 | " done = False\n",
127 | " transitions = [] # list of state, action, rewards\n",
128 | " \n",
129 | " for t in range(MAX_DUR): #while in episode\n",
130 | " act_prob = model(torch.from_numpy(curr_state).float())\n",
131 | " action = np.random.choice(np.array([0,1]), p=act_prob.data.numpy())\n",
132 | " prev_state = curr_state\n",
133 | " curr_state, reward, done, info = env.step(action)\n",
134 | " transitions.append((prev_state, action, reward))\n",
135 | " if done:\n",
136 | " break\n",
137 | "\n",
138 | " # Optimize policy network with full episode\n",
139 | " ep_len = len(transitions) # episode length\n",
140 | " time_steps.append(ep_len)\n",
141 | " preds = torch.zeros(ep_len)\n",
142 | " discounted_rewards = torch.zeros(ep_len)\n",
143 | " for i in range(ep_len): #for each step in episode\n",
144 | " discount = 1\n",
145 | " future_reward = 0\n",
146 | " # discount rewards\n",
147 | " for i2 in range(i, ep_len):\n",
148 | " future_reward += transitions[i2][2] * discount\n",
149 | " discount = discount * gamma_\n",
150 | " discounted_rewards[i] = future_reward\n",
151 | " state, action, _ = transitions[i]\n",
152 | " pred = model(torch.from_numpy(state).float())\n",
153 | " preds[i] = pred[action]\n",
154 | " loss = loss_fn(preds, discounted_rewards)\n",
155 | " optimizer.zero_grad()\n",
156 | " loss.backward()\n",
157 | " optimizer.step()\n",
158 | " \n",
159 | "env.close()"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 7,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "[]"
171 | ]
172 | },
173 | "execution_count": 7,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | },
177 | {
178 | "data": {
179 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmgAAAGtCAYAAACiIojTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd4VFXixvHvSQgEAoQQei/SFKSFjkgvogJiQ1RUEAssNlzLb1FwV0VZKyqgooIIiICgiEgNAoKSEEroLRAgJHRCgLQ5vz+IbNAAATJzJ5P38zx5mDn3zsw7+6zw5t5z7zHWWkRERETEe/g5HUBERERELqSCJiIiIuJlVNBEREREvIwKmoiIiIiXUUETERER8TIqaCIiIiJeRgVNRERExMuooImIiIh4GRU0ERERES+Tz+kA16JEiRK2SpUqTscQERERuazIyMjD1tqS2dk3Vxe0KlWqEBER4XQMERERkcsyxuzJ7r46xSkiIiLiZVTQRERERLyM2wqaMaaiMWaJMWazMWajMeapjPHixpgFxpjtGX+GZIwbY8yHxpgdxpj1xphG7somIiIi4s3ceQQtDXjOWlsHaA4MMsZcD7wILLLW1gAWZTwH6AbUyPgZCIxxYzYRERERr+W2gmatjbPWrsl4nAhsBsoDPYAJGbtNAHpmPO4BTLTnrAKKGWPKuiufiIiIiLfyyBw0Y0wVoCHwO1DaWhsH50ocUCpjt/JAbKaX7csYExEREclT3F7QjDGFgRnA09bak5faNYsxm8X7DTTGRBhjIg4dOpRTMUVERES8hlsLmjEmgHPl7Btr7cyM4fg/T11m/JmQMb4PqJjp5RWAA399T2vtp9baMGttWMmS2brXm4iIiEiu4s6rOA0wHthsrX0306YfgH4Zj/sBszONP5hxNWdz4MSfp0JFRERE8hJ3riTQCngA2GCMWZsx9jIwEphmjOkP7AXuytg2F7gF2AGcBh52YzYRERERr+W2gmatXU7W88oAOmSxvwUGuSuPiIiISG6hlQREREREvIwKmoiIiIiXUUETERER8TIqaCIiInlAUkoSqempTseQbFJBExER8XEu66L1l62pP7Y+e47vIc2V5nQkuQx33mZDREREvMCCnQtYe3AtBkOVD6pQPaQ6mwdtJsA/wOlochEqaCIiIj5k7va5PDL7EfL55aNN5TaEFgwlfE84ZQqX4cc+PzJh7QQ+Wv0Rv+z8hVtr3up0XLkIFTQREREfMnnDZFLSU2hXtR1L9yzlbNpZDIYRbUcQVi6M+qXrM23TNL5c+6UKmhdTQRMREfER1loW715M1+u6Mrn35Cz3CfAP4P569zP6j9H8uudXmldozqp9qwgrF0ahgEIeTiwXo4sEREREfMTWI1uJOxVH+6rtL7nfi61f5Lri19F+QnuCRwZz81c3M2zxMA+llOxQQRMREfEBy/Ys4/kFzwPQrkq7S+5bMqgkS/ot4fmWz/Nk2JO0q9KOz6M+JzE50RNRJRt0ilNERCQXiD0RS3xSPGHlwrLc/ticx9h2ZButK7WmWki1y75f6cKlebPjmwCs2reKFuNbMGHdBAY3HZyjueXqqKCJiIh4qYnrJrL18FZSXal8GvkpSalJLHxgITdXufmC/TYd2sTmw5v5+JaPebLJk1f8Oc0rNKdZ+WZ8+PuHPNnkSfyMTrA5TQVNRETEC+08upN+s/rhZ/zwM340Ld+UI6eP0Htab+bdP4+U9BTunX4v8UnxVCxaEYPhjjp3XPXnPdXsKe6beR9To6fSq3YvCgYUzMFvI1dKFVlERMQLfbX2KwyGPU/vIXVYKiseWcGc++ZQOH9hmn7WlNZftKZAvgL0q9+PXcd20aZyG8oULnPVn3fn9XdSrkg5+s7sS5UPqjBvx7wc/DZypYy11ukMVy0sLMxGREQ4HUNERCTHnE49zX9+/Q/jo8bTqGwjfu778wXb4xLjGBc5Dj/jxxNhT1AyqCTr49dTvGBxKhStcE2fHZ0Qzap9q/jw9w/ZfHgzyx9eTrMKza7pPeV/jDGR1tqsJxH+dV8VNBEREe8xZvUYnpz7JCULlWTSHZPoXL2zxzMcO3OMRp82wlpL1GNRhBQM8XgGX3QlBU2nOEVERLyEtZZxkeNoWKYh8UPjHSlnACEFQ5jaeyr7E/fzyA+PkJsP5uRWKmgiIiIOm71lNh0mdqDthLasi1/HwMYDMcY4mqlZhWa81fEtZm2ZxcR1Ex3NkhepoImIiDhs1G+jiIqLwmVd3F7rdvrW6+t0JACebv40LSq0YOiCoRw5fcTpOHmKCpqIiIiDEpMT+X3/7zzW+DGWPbyM2ffOpkiBIk7HAsDP+PHxLR9z+PRhpkRPcTpOnqKCJiIi4qBf9/xKmiuNjtU6Oh0lSw3LNqRycGXCY8KdjpKnqKCJiIg45KdtP/HeqvcIzBdIq0qtnI5zUW2rtGXpnqW4rMvpKHmGVhIQERHxoDRXGp+s/oRV+1adP2149w13E5gv0OFkF9e2SlsmrJvApkObqFuqrtNx8gQVNBEREQ/6YNUHDF0wlKCAIJ5q9hSvt3+dQgGFnI51SW2rtAVg3o55KmgeolOcIiIiHhKdEM2wJcO4reZtJL6UyPtd3ycof5Djt9S4nCrFqtC6UmveX/U+Z9POOh0nT1BBExERcaOnfn6K0LdDeXj2w3T+ujPFAosxpvsYry9lfzX85uHsT9zPLd/cwgerPlBRczMVNBERETdJSEpgXOQ4ggsEM3f7XEIKhrDggQWUL1re6WhXrH3V9jzV7CliT8by9C9Pc+vkW52O5NM0B01ERMRNxkaMJTk9mbl951K7RG2n41wTYwzvd32f97u+z0sLX2LkipEcOX2E0EKhTkfzSTqCJiIi4gYu6+LzNZ/TuXrnXF/O/qp7ze7AuXu4iXuooImIiOSQE2dPnH+8bM8yYk/G8lD9h5wL5CZNyjWhYL6CLN2z1OkoPksFTUREJAd8t/E7Qt4KYeTykczdPpdRv42icP7C9Kjdw+loOa5AvgK0rNhSqwu4keagiYiIXKPjZ48zZN4Q8vvn56VFL50f79+wv9ff4+xqdanehX8u/CcrY1fSomILp+P4HBU0ERGRa/T6r68Tfyqe3/r/xpq4NdQMrUnFohWpFlLN6Whu80STJ3j/9/d5+penWdl/JX5GJ+VykgqaiIjINYg9EcvoP0bzYP0HaV6hOc0rNHc6kkcUzl+Yf7f7N/1/6M/i3Yu9drH33Ep1V0RE5Bp89MdHuKyLEW1HOB3F4/rW60uJQiUYEzHG6Sg+RwVNRETkGszfNZ9WlVpRuVhlp6N4XIF8BXi4wcPM3jKbvSf2Oh3Hp6igiYiIXKVDSYdYe3AtHavm3dN7g5oMIsA/gGd+ecbpKD5FBU1EROQqLYlZAkCHah0cTuKcysUqM/zm4czcPJMao2swd/tcpyP5BLcVNGPMF8aYBGNMdKaxb40xazN+YowxazPGqxhjzmTaNtZduURERK5VmiuNXt/2os+MPhQtUJSwcmFOR3LUsy2e5Y32b5CUksSo30Y5HccnuPMqzq+Aj4CJfw5Ya+/587Ex5h3gRKb9d1prG7gxj4iISI54edHLzNoyi0cbPUr3Gt3J55e3b4oQ4B/ASze9RKorlVfDXyX2RCwVgys6HStXc9sRNGvtr8DRrLYZYwxwNzDFXZ8vIiLiDsfPHuf9Ve/zUIOH+PS2T31ypYCr1bdeXwAmb5jscJLcz6k5aDcB8dba7ZnGqhpjoowxS40xNzmUS0RE5JJ+2vYTqa5UBjYa6HQUr1O9eHVuqnQTYyPHku5KdzpOruZUQevDhUfP4oBK1tqGwLPAZGNM0axeaIwZaIyJMMZEHDp0yANRRUREzklOS2b65umUK1KOZhWaOR3HKz3d/Glijscwe+tsp6Pkah4vaMaYfMAdwLd/jllrk621RzIeRwI7gZpZvd5a+6m1NsxaG1ayZElPRBYRkTxu97Hd3PTlTRR6oxCztsyiV+1eWtroInrU6kHVYlV5NfxVTqeedjpOruXErMaOwBZr7b4/B4wxJYGj1tp0Y0w1oAawy4FsIiKSByUmJzJsyTASkhL+tq1s4bKs2r+K6IRoXmj1AiULlaRPvT4OpMwd/P38+aT7J9zyzS08+dOTfNXzK6cj5UpuK2jGmClAW6CEMWYf8Kq1djxwL3+/OKAN8JoxJg1IBx631mZ5gYGIiEhOe2HhC4yNGMt1xa/727aY4zGkulL5qsdX9GvQz4F0uU/X67ry8k0v8/qy1xnQaACtK7V2OlKuY6y1Tme4amFhYTYiIsLpGCIikotFHogk7LMwnmn+DO92efdv2/cc38OauDX0rN2TczchkOxISkmi5kc1KV+kPKsGrNIpYcAYE2mtzdZN8/S/loiI5Gk/bP0BP+PHsDbDstxeuVhletXppXJ2hYLyBzGyw0hWH1jNx3987HScXEcFTURE8rTFMYtpXLYxIQVDnI7ic+6/8X66XdeNFxe9SFxinNNxchUVNBERybNOpZxi1b5VdKiad9fSdCdjDKO7jeZs2lk+/P1Dp+PkKipoIiLik37Z8Qu1P6pNtQ+qUefjOmxM2Pi3fZbvXU6aK432Vds7kDBvqF68OnfUuYMxEWOYtnEaKekpTkfKFVTQRETE55xKOUX/H/qT6kqldaXW7Dq2i/FR4/+230/bfqJgvoK0qtTKgZR5x8utX8ZlXdwz/R76/9Cf3HyBoqeooImIiE8Z8vMQir9VnP2J+/m619dM7DWRLtW7MH3T9AuKgcu6+H7L93S5rguFAgo5mNj3NSzbkITnExjWZhiT1k/i243fXv5FeZwKmoiI+IyDpw4yNmIsrSq1Ytqd02hZsSUAd15/J7EnY3l50css2rWIo2eOEh4Tzv7E/dxR+w6HU+cNgfkCGd52OOWLlOeHrT84HcfrObGSgIiIiFt8FvkZqa5Uxt06jpqh/1sx8PZatxNaMJSRK0YycsXI8+P5/PJxa81bnYiaJ/kZP5qWb0rEAd3D9HJU0ERExGd8s+EbOlTtcEE5AygWWIy45+I4lXKKJTFLiD0RC0CtErV0ew0Pa1KuCd9v+Z7jZ49TLLCY03G8lgqaiIj4hJPJJ9l6ZCv333h/ltsD/AMIKRjCHXV0StNJYeXO3Ug/8kAkHarp9iYXozloIiKSq7msi1eWvML3m78HoFHZRg4nkktpXK4xABEHIrDWkpqeesH25LRkRoSPYPX+1U7E8xo6giYiIrnaxoSN/PvXfxMUEASooHm74gWLUyu0FjO3zGT70e3M3zmfpQ8tpWpIVQ4kHuD+mfezJGYJX6//mugnownMF+h0ZEfoCJqIiORqUQejAEhKTaJM4TKUKVzG4URyOc+1eI4/9v/B+KjxxJ6Mpds33YhOiOaGT25g5b6VDGk6hJ3HdvLw7IeZu31uljcZ9nU6giYiIrlaVFzU+cc6epY7PNTgId7+7W3OpJ7h/a7vc9d3d9Hp606kpKew9rG11CpRi4IBBXl35btMjZ4KwE2VbqJ2idr8t/N/KVqgqMPfwP1U0EREJFeLOhhFWLkwDIbba97udBzJhgD/AJY9vAxrLWUKl6F1pdYs37ucoS2GUqtELQBGdhzJcy2eY+exnSzctZDZW2fz2ZrPCAkM4a1Obzn8DdzP5OblFsLCwmxEhO6lIiKSV7msi5C3Quhbry+fdP/E6ThylVbGruSZX55h9r2zKV249EX3e2jWQ0yJnkL0E9HUCK2RY5//07afKF+0PA3KNMix98yKMSbSWhuWnX01B01ERHKt3cd2czL5JA3LNHQ6ilyDFhVbsGrAqkuWM4A3OrxBwXwFeWj2QxxKOpQjn3349GH6zerHM788kyPvl1NU0EREJNf6Y/8fwP/urSW+rVyRcnx8y8f8Fvsbpf5bipcWvnRN7+eyLp755RlOJJ9gdLfROZQyZ6igiYhIrvVb7G8EBQRRr3Q9p6OIh9xX7z7m9JlDj1o9GPXbqKu+wjPNlUafGX2YtH4SL7d+mbql6uZw0mujgiYiIrnWyn0raVq+Kfn8dM1bXmGMoXvN7nx222cUKVCEZ+c/y9XMp3950ctM2ziNkR1GMrzt8JwPeo1U0EREJFc6nXqadfHraFGhhdNRxAElg0ry6s2vMn/nfH7a/tPftu85vofktOQLxhKTE5m8YTKPz3mcUb+N4omwJ3ih9QsYYzwVO9tU0EREJNeZtH4SoW+HkuZKo0VFFbS8alCTQdQuUZt/LvgnLus6P77r2C5qjK5Bg3ENGLZ4GCtjVwLw2tLX6DuzL+Mix/Fs82f5oOsHTkW/LBU0ERHJdT5f8znBBYK5+4a7aVulrdNxxCEB/gGMaDuCzYc3M2PTjPPjYyPG4rIuUtJT+M+y/9B+Ynvmbp/LZ2s+o1ftXhx87iDvdHmHAP8AB9NfmgqaiIjkKglJCSzbu4yBjQfy7Z3fUjh/YacjiYN61+lNrdBajFg6gn0n9zEifASfr/mcnrV7snPIThKGJlApuBLdJ3fnRPIJnm/5/GVv5+ENNKtSRERylRmbZuCyLnrX6e10FPEC/n7+jOo0itun3k7N0TU5m3aWIgWK8FyL54Bzc9VW9l/JR398xKmUUzSv0NzhxNmjlQRERCTXWLRrEbdNuY3aJWoTOTDSKyd3izNeXfIq7616j9n3zqZd1XZOx8nSlawkoIImIiK5RsNxDUlKSWL5I8spFVTK6TjiZVLTU716XpmWehIREZ+TkJTA2oNr6Ve/n8qZZMmby9mVUkETEZFcYeGuhQB0rt7Z4SQi7qeCJiIiucKCXQsICQyhUdlGTkcRcTsVNBERyRWWxiylXdV2+Pv5Ox1FxO1U0ERExOudOHuC3cd3E1Y2W/OrRXI9FTQREfF66+PXA1C/TH2Hk4h4hgqaiIh4vXXx6wCoX1oFTfIGFTQREfF6aw+uJbRgKOWKlHM6iohHqKCJiIjXSklPYcjPQ/hl5y80KNNAKwdInqGCJiIiXmvOtjmM/mM0+07uo0m5Jk7HEfEYLZYuIiJe68u1X1K2cFnm3DeHmqE1nY4j4jE6giYiIl5h1IpR3PXdXayMXUlqeirjIsbx8/afebD+gzQq24jC+Qs7HVHEY3QETUREHHcm9Qyv/foaSSlJzNk2hx61evDtxm+5oeQNDGoyyOl4Ih7ntiNoxpgvjDEJxpjoTGPDjTH7jTFrM35uybTtJWPMDmPMVmNMF3flEhER7zNn2xxOpZxiYq+JFPAvwLcbv+Wxxo+x4YkNVAyu6HQ8EY9z5xG0r4CPgIl/GX/PWvvfzAPGmOuBe4EbgHLAQmNMTWttuhvziYiIl5gcPZmyhcvSp24fAvMF8vmazxnVaZSu2pQ8y21H0Ky1vwJHs7l7D2CqtTbZWrsb2AE0dVc2ERHxHtuObOOHrT9w/4334+/nz53X38m8++dRpEARp6OJOMaJiwQGG2PWZ5wCDckYKw/EZtpnX8aYiIj4uBFLRxCYL5ChLYc6HUXEa3i6oI0BqgMNgDjgnYzxrI5h26zewBgz0BgTYYyJOHTokHtSioiIR8QlxjE1eipPhj1JqaBSTscR8RoeLWjW2nhrbbq11gV8xv9OY+4DMs8CrQAcuMh7fGqtDbPWhpUsWdK9gUVExK2+3fgtLuuif6P+TkcR8SoeLWjGmLKZnvYC/rzC8wfgXmNMAWNMVaAG8Icns4mIiGfFn4pn0vpJNCjTgNolajsdR8SruPM2G1OAlUAtY8w+Y0x/4G1jzAZjzHqgHfAMgLV2IzAN2ATMAwbpCk4RkdzJWnv+z2GLh7EyduXf9vlp20+Uf7c8kXGR9K3X19MRRbye+fM/pNwoLCzMRkREOB1DREQy/Lz9Zx6c9SBrBq4hOT2ZGqNrUCywGCv7r6R2idpsTNjIxHUTmbZpGgXzFeT19q/TrUY3AvMFOh1dxO2MMZHW2rDs7KuVBEREJMfM2zGPw6cP887Kd6hbqi4A6a50Hpn9CCseWcHbv73NxHUTMRiW9FvCzVVudjixiHfSWpwiIpJjIuMiAfhszWdM2ziNMoXL8N/O/2XlvpXM3jqbhbsW0rN2T7b9Y5vKmcglqKCJiEiOSHelE3Uwis7VO5PuSmfBrgW0rdKWRxo+Qq3QWjw25zEOJB6ge43uXFf8Oqfjing1FTQREckRW49s5XTqae6rex9jbx0LQMeqHcnnl4/R3UaTkJQAQKdqnZyMKZIraA6aiIhcszOpZ5i+aToAjcs1pm6pujQo0+D8PLRO1TvxUIOHiE6IpnKxyk5GFckVVNBEROSaPf7T40xcN5GShUqev6dZgzINLthn/O3jnYgmkivpFKeIiFwTl3Xx8/af6VGrB9v+sY18fln/7u9n/PAz+mdHJDv0X4qIiFyT6IRoDp0+RM/aPSkWWMzpOCI+QQVNRESuyaJdiwDoULWDw0lEfIcKmoiIXJPFMYupUbwGFYMrOh1FxGeooImIyDVZvX81LSu2dDqGiE9RQRMRkasWlxhHfFI8Dcs0dDqKiE9RQRMRkau29uBaABqWVUETyUkqaCIictWiDkYBUL90fYeTiPgWFTQREblqaw+upVpINYIDg52OIuJTVNBEROSqrYlb87cVA0Tk2qmgiYjIVYk9EcvOYztpVbGV01FEfI4KmoiIXJVFu8/doLZjtY4OJxHxPSpoIiJyVRbuWkipoFLUK1XP6SgiPkcFTURErpi1loW7FtKxWkeMMU7HEfE5KmgiInLFohOiiU+Kp2NVnd4UcQcVNBERuWILdy0ENP9MxF1U0ERE5Iot3L2QWqG1tEC6iJuooImIyBVJSU9hacxSHT0TcSMVNBERuSJRcVEkpSbRrko7p6OI+CwVNBERuSJr4tYA0KR8E4eTiPguFTQREbkia+LWEFowlIpFNf9MxF1U0ERE5IpExkXSqGwj3f9MxI1U0EREJNuS05KJToimUdlGTkcR8WkqaCIikm3RCdGkulJV0ETcTAVNRESybfbW2fgZP1pXau10FBGfpoImIiLZ4rIuvl7/NR2rdaRckXJOxxHxaSpoIiKSLcv3LifmeAz96vdzOoqIz1NBExGRbPlx64/k989Pj1o9nI4i4vNU0EREJFsWxyymRYUWBOUPcjqKiM9TQRMRkcs6euYoUXFRtK/a3ukoInmCCpqIiFzW0pilWKwKmoiHqKCJiMglTd80nUd/fJSQwBCalm/qdByRPCGf0wFERMT7nDh7gv/8+h8STicwaf0kmpRrwme3fUZ+//xORxPJE9xW0IwxXwC3AgnW2roZY6OA24AUYCfwsLX2uDGmCrAZ2Jrx8lXW2sfdlU1ERC7ubNpZwj4LY/ex3RQMKEinap2YcfcMXRwg4kHuPMX5FdD1L2MLgLrW2huBbcBLmbbttNY2yPhRORMRcciiXYvYcXQHk3tP5uSLJ5l3/zyVMxEPc1tBs9b+Chz9y9h8a21axtNVQAV3fb6IiFyd77d8T9ECRelZuyfGGKfjiORJTl4k8Ajwc6bnVY0xUcaYpcaYm5wKJSKSV8UlxtFnRh9mbp5J9xrdNd9MxEGOXCRgjPk/IA34JmMoDqhkrT1ijGkMzDLG3GCtPZnFawcCAwEqVarkqcgiIj7vx20/MjV6KgD31bvP4TQieZvHC5oxph/nLh7oYK21ANbaZCA543GkMWYnUBOI+OvrrbWfAp8ChIWFWU/lFhHxdRviN1A4f2F2P7WbEoVKOB1HJE/z6ClOY0xX4AXgdmvt6UzjJY0x/hmPqwE1gF2ezCYiktdtSNhA3VJ1Vc5EvIDbCpoxZgqwEqhljNlnjOkPfAQUARYYY9YaY8Zm7N4GWG+MWQdMBx631h7N8o1FRCTHWWvZkLCBeqXqOR1FRHDjKU5rbZ8shsdfZN8ZwAx3ZRERkUuLOxXH0TNHVdBEvISWehIRETbEbwCgXmkVNBFvoIImIiKsi18HoCNoIl5CBU1ERIg4EEHVYlUJLRTqdBQRQQVNREQ4V9DCyoU5HUNEMqigiYjkcYdPH2b38d0qaCJeRAVNRCSPizwQCUCTck0cTiIif1JBExHJ4yIOnFu0pVHZRg4nEZE/qaCJiORxqw+splZoLYIDg52OIiIZVNBERPI4XSAg4n1U0ERE8rC4xDj2J+5XQRPxMipoIiJ52J/zz3SBgIh3UUETEcnDIg5E4Gf8aFCmgdNRRCQTFTQRkTzqdOpppkRPoUGZBgTlD3I6johkks/pACIi4oyh84ey/eh2Fj+42OkoIvIX2SpoxpiSwKNAlcyvsdY+4p5YIiLiTl+t/YoxEWN4vuXztKvazuk4IvIX2T2CNhtYBiwE0t0XR0REckJyWjL5/fNjjPnbtuNnjzN47mDaV23PGx3ecCCdiFxOdgtaIWvtC25NIiIiOcJlXbT6ohUu6+Kn+36ibJGyAMSfiqd04dJMWDuBpNQkRnUaRT4/zXQR8UbZvUhgjjHmFrcmERGRHDF3+1wi4yJZF7+Ou767C4D5O+dT7t1yrIlbwycRn9C8QnMt7STixbJb0J7iXEk7a4xJzPg56c5gIiJydUb9NoqKRSvy6s2vsiJ2BftP7mdpzFJc1sXjcx5n25FtDGoyyOmYInIJ2Spo1toi1lo/a21gxuMi1tqi7g4nIiKXt/nQZnpP682JsydYsnsJv+75lWdbPEvvOr0B+HHbj0TGRQLn1t0sWagkd11/l5ORReQysj35wBhzO9Am42m4tXaOeyKJiMiV+OfCfzJn2xy6VO/ChHUTKF+kPI+HPU4B/wJUD6nO7K2ziYyLpEShEhw+fZgBjQZQIF8Bp2OLyCVk9zYbI4EmwDcZQ08ZY1pba190WzIREbmspTFLmbPt3O/L/7f4/zh8+jBju48lMF8gAL3r9GbUb6OwWN7t/C7Hzx5nSLMhTkYWkWzI7hy0W4BO1tovrLVfAF0zxkRExCFzt8+l6zddqVKsCkNbDOXw6cM0KNOAAY0GnN/nn63+SUjBEABaVmzJiHYjCC0U6lRkEcmmK1nqqVimx8E5HURERLLPWsvQ+UOpWqwqvw/4ncFNB1O7RG3GdB+Dv5//+f1CC4Xyfpf3ub7k9dQvU9/BxCJyJbI7B+1NIMoYswQwnJuL9pLbUomIyCUtiVnC5sN6gy8SAAAgAElEQVSb+arHV5QKKgXA5kGbs9z3gfoP8ED9BzwZT0SuUbYKmrV2ijEmnHPz0AzwgrX2oDuDiYhI1hKSEnhh4QuEFgzlnrr3OB1HRNzgkgXNGFPbWrvFGPPn3Qz3ZfxZzhhTzlq7xr3xREQkM2stXSd1ZfPhzUy+Y/L5iwFExLdc7gjas8BA4J0stlmgfY4nEhGRiwqPCSfqYBSf3/Y5ver0cjqOiLjJJQuatXZgxsNu1tqzmbcZY/Rrm4iIh320+iNCC4ZyX737nI4iIm6U3as4f8vmmIiIuElCUgKzt8zmkYaPUDCgoNNxRMSNLjcHrQxQHihojGnIuQsEAIoChdycTUREMpm2cRrpNp0H6z/odBQRcbPLzUHrAjwEVADezTSeCLzspkwiIpKFyRsmU69UPeqWqut0FBFxs8vNQZsATDDG9LbWzvBQJhER+Ytdx3axct9K3uzwptNRRMQDsnsftBnGmO7ADUBgpvHX3BVMRET+Z2r0VADurXuvw0lExBOydZGAMWYscA/wD87NQ7sLqOzGXCIiksFayzcbvqFVxVZUKVbF6Tgi4gHZvYqzpbX2QeCYtXYE0AKo6L5YIiLyp8i4SDYd2qRba4jkIdktaH/eA+20MaYckApUdU8kERHJbMTSEYQEhtC3Xl+no4iIh2S3oP1ojCkGjALWADHAFHeFEhERiEuM4x9z/8GcbXMY2nIowYHBTkcSEQ+57EUCxhg/YJG19jgwwxgzBwi01p5wezoRkTzKWkvfmX1ZEbuCXrV7MaTZEKcjiYgHXfYImrXWRaa1OK21ySpnIiLuNW3jNJbELOH9Lu8z856ZFM5f2OlIIuJB2T3FOd8Y09sYYy6/6/8YY74wxiQYY6IzjRU3xiwwxmzP+DMkY9wYYz40xuwwxqw3xjS6ks8SEfElE9ZNoHpIdQY2Hnj5nUXE52S3oD0LfAckG2NOGmMSjTEns/G6r4Cufxl7kXOnTGsAizKeA3QDamT8DATGZDObiIhPcVkXK/etpH3V9vj7+TsdR0QckK2CZq0tYq31s9bmt9YWzXheNBuv+xU4+pfhHsCEjMcTgJ6Zxifac1YBxYwxZbP3NUREfMemQ5s4fvY4rSq2cjqKiDgkWysJGGPaZDWeUcCuVGlrbVzG6+OMMaUyxssDsZn225cxFncVnyEikmut2LsCgFaVVNBE8qpsFTTg+UyPA4GmQCTQPgezZDW/zf5tJ2MGcu4UKJUqVcrBjxcR8Q4rYldQKqgU1UOqOx1FRByS3bU4b8v83BhTEXj7Kj8z3hhTNuPoWVkgIWN8HxeuTlABOJBFlk+BTwHCwsL+VuBERHK7FbEraFWxFVd4XZaI+JDsXiTwV/uAulf52h+AfhmP+wGzM40/mHE1Z3PgxJ+nQkVE8oqDpw6y69guzT8TyeOyOwdtNP873egHNADWZeN1U4C2QAljzD7gVWAkMM0Y0x/Yy7mF1wHmArcAO4DTwMPZ/hYiIj5C889EBLI/By0i0+M0YIq1dsXlXmSt7XORTR2y2NcCg7KZR0TEJ62IXUFgvkAaldWtIEXysuzOQZtgjCmZ8fiQeyOJiORN1lrCY8JpUq4J+f3zOx1HRBx0yTloGfPBhhtjDgNbgG3GmEPGmFc8E09EJO+Yu30uUQejuPuGu52OIiIOu9xFAk8DrYAm1tpQa20I0AxoZYx5xu3pRETyCGst/1z4T2qG1uSxxo85HUdEHHa5gvYg0Mdau/vPAWvtLuD+jG0iIpIDNh/ezKZDm3iuxXME+Ac4HUdEHHa5ghZgrT3818GMeWj6G0REJIcs3LUQgM7VOzucRES8weUKWspVbhMRkSuwaPciqodUp0qxKk5HEREvcLmrOOsbY05mMW44t+STiIhcozRXGuEx4dx7w71ORxERL3HJgmat9fdUEBGRvGrdwXWcTD5Ju6rtnI4iIl7iapd6EhGRHLJq3yoAWlZs6XASEfEWKmgiIg5btX8VZQuXpWLRik5HEREvoYImIuKwVftW0bxCc4wxTkcRES+hgiYi4qDDpw+z4+gOmldo7nQUEfEiKmgiIg6asWkGAK0rtXY4iYh4ExU0ERGHpKSn8MbyN2hWvhktKrRwOo6IeJHL3QdNRETc4GTySR6Z/Qh7T+xl3K3jNP9MRC6gI2giIg4YtWIU32/5nlGdRtGlehen44iIl9ERNBERB6zct5IGZRowtOVQp6OIiBfSETQREQ+z1hJxIIIm5Zo4HUVEvJQKmoiIh+08tpMTyScIKxfmdBQR8VIqaCIiHhZxIAJABU1ELkoFTUTEw1bvX01gvkBuKHmD01FExEupoImIeNjy2OWElQsjwD/A6Sgi4qVU0EREPCgxOZHIA5G0rdzW6Sgi4sVU0EREcsjSmKWs2rfqottd1sWK2BWk23RurnKzB5OJSG6j+6CJiOSA2BOxdJ/cnZJBJdk5ZCd+5sLff13WRdinYWw/up0AvwAt7SQil6QjaCIi18hay5B5Q0hKTSLmeAwr9q742z7L9iwj6mAUZ1LP0KZyG4LyBzmQVERyCxU0EZFr9PHqj5m1ZRbDbx5OUEAQX679EoDktGS+jf6Ws2ln+XLtlxTJX4Q9T+9h+t3THU4sIt5OpzhFRK7Bol2LePaXZ7m15q0Mu3kYB08dZGzkWIoWKEqZwmV4adFLdKzWkeV7l3N/vfspX7S805FFJBdQQRMRuUrxp+K5Y9od1Aytyde9vsbP+PFBtw/I55ePD37/AH/jT2jBUBbuWkjdUnV5rd1rTkcWkVxCBU1E5CpNWj+Jk8knmXbXNIoFFgMgv39+Puz2IceTj/PN+m+Yc98c4k/F07ZKW4IDgx1OLCK5hbHWOp3hqoWFhdmIiAinY4hIHlV/bH0K5ivIqgF/v7VGuiudmOMxVC9e3YFkIuKNjDGR1tpsrfGmiwRERK7C+vj1rI9fz4P1H8xyu7+fv8qZiFw1FTQRkauwePdiAHrU6uFwEhHxRSpoIiJX4bfY36gcXFlXZYqIW6igiYhchZX7VtKiolYDEBH3UEETEblCsSdi2XdyHy0rtHQ6ioj4KBU0EZErtCRmCQAtK6qgiYh7qKCJiFyBUymnGLZkGNeXvJ76Zeo7HUdEfJRuVCsikk3WWp76+Sn2ntjLikdWkM9Pf4WKiHt4/G8XY0wt4NtMQ9WAV4BiwKPAoYzxl621cz0cT0Tkbw6eOsjag2uZsWkGX6z9gv+76f90elNE3MrjBc1auxVoAGCM8Qf2A98DDwPvWWv/6+lMIiIXsyZuDW2/aktiSiIBfgE83vhxrakpIm7n9PH5DsBOa+0eY4zDUURELnQm9QzdJ3cnpGAIs++dTcOyDc+vuSki4k5OXyRwLzAl0/PBxpj1xpgvjDEhToUSEQFYtHsRB08dZNyt42hXtZ3KmYh4jGMFzRiTH7gd+C5jaAxQnXOnP+OAdy7yuoHGmAhjTMShQ4ey2kVEJEf8sPUHiuQvQvuq7Z2OIiJ5jJNH0LoBa6y18QDW2nhrbbq11gV8BjTN6kXW2k+ttWHW2rCSJUt6MK6I5CWnU0/z47Yf6VajG/n98zsdR0TyGCcLWh8ynd40xpTNtK0XEO3xRCIiQFRcFKX/W5qDpw5y9/V3Ox1HRPIgRy4SMMYUAjoBj2UaftsY0wCwQMxftomIeMzoP0ZjMCx8YCEdqnVwOo6I5EGOFDRr7Wkg9C9jDziRRUTkT6npqew9sZfvNn3H3dffrXImIo5x+ipOERGvMTx8ONeNvo5TKafo16Cf03FEJA9z+j5oIiJe46ftP1GnRB0GNBrATZVucjqOiORhOoImIgIcOX2EdfHr6FO3D8+2eBbdPFtEnKQjaCKS58Ucj2H+zvkAtKvazuE0IiIqaCKSh1lruWPaHczaMguAQgGFaFo+y1swioh4lAqaiORZkXGRzNoyiyfDnsRiKV+kvG5KKyJeQQVNRPKEz9d8zpiIMYSVDaNkUEn+1eZffLP+G/L75+f1Dq9rnU0R8SoqaCLi886mneVfi/+Fy7qIOR7D0TNHKV6wOFOip3BrzVtVzkTE66igiYjPclkXvaf1ZsvhLcQnxbPwgYW0r9qell+05Ln5zwEwuMlgh1OKiPydbrMhIj7HWsvkDZN5aeFLzNoyi4OnDtKyYkvaV22PMYaXWr8EwHMtntNVmyLilXQETUR8zqLdi+g7sy8A7aq0Y/4D87HWnr+32e21bmfNwDXcWPpGJ2OKiFyUCpqI+Jw3l79JuSLl+KrHVzQq24h8fn//q65h2YYOJBMRyR4VNBHxKX/s/4PFuxfz307/pVP1Tk7HERG5KpqDJiI+5c3lbxISGMLAxgOdjiIictV0BE1Ecg2XdbE+fj03lr4RP/O/3y/DY8IJjwmnWflmzNoyi2FthlGkQBEHk4qIXBsVNBHxWsfPHiclPYVSQaWIPRFLj6k9iDoYxQM3PsD428cT4B9AYnIi9824j7hTcQBUC6nGU82ecji5iMi1UUETEUfM2DQDP+NHrzq9zo8lpSTx7sp36VOvD6WDStPs82bsPLqTgY0Hku5KZ9OhTfRv2J/xUeM5kXyCO2rfwdSNU4k7FccHXT/gUNIhhrYcSnBgsIPfTETk2qmgiYjHnUk9w4AfBxAUEETP2j0xxnA69TTdvunGsr3LGBMxhurFq7Pj6A561e7FmIgxGAyPNHyEz2//nIZlGjL458H8sPUHQgJDGH7zcIY0G+L01xIRyTEqaCLicdM3Tef42eMcP3ucqINRVAquxKT1k1i2dxlvtH+DT9d8yu5ju/n4lo8Z2Hggbb9qy7K9y86fuhzUdBBtKrcB4IZSN1wwH01ExBcYa63TGa5aWFiYjYiIcDqGiFwBay0tv2jJvpP72HdyH6EFQ0lMSSS4QDB1StZh6UNLL7ipLMDh04dZe3AtHat1dDC5iMi1McZEWmvDsrOvfu0UEY+avmk6q/at4v9u+j/CyoVx5MwRggsEc+j0IYa2GApwQTkDKFGohMqZiOQpOsUpIh7jsi6GLhhKgzINeLTRo1QoWoHIA5EMbjqY8Jhwbq15q9MRRUS8ggqaiGRLVFwUtUrUYt6OeUyJnkK5wuUY3nY4IQVDsv0eu47tYu+JvQxrMwx/P39urXnr+VLW+/re7oouIpLrqKCJyGUt27OMNl+1oVn5ZkTGRVI6qDQHTx1k1tZZLH94ORWDK2brfTbEbwDQIuUiIpehOWgickmp6ak8OfdJggKC+H3/74QWDGX9E+v5rf9vnDh7gnYT2jF+zXhc1nXZ91ofvx6D4YaSN3gguYhI7qUjaCJySa8ve53ohGhm3TOL2JOxNC7bmOIFi9O0fFPm3DeHgT8OZMCPA/Azfjzc8OFLvteGhA1cV/w6gvIHeSi9iEjupIImIhe19uBa/vPrf7j/xvvpUbvH37a3rtSajU9upOnnTXnt19e48/o7sVie/OlJTqWcYkCjAdxc+ebz62Kuj19PvdL1PP01RERyHRU0EQEgISmB/j/0p2GZhvSt15cVsSuYsXkGRQsU5cOuH170dcYYXm//Ol0mdSF4ZDD+fv5YaykWWIzZW2dTolAJlj+8nKD8Qew4uoO+9fp68FuJiOROKmgiwrwd83j0x0fZd3IfC3ct5IetP7Aufh0Ab7R/47JXanau3pnwfuEs3bOU06mn6VGrBw3LNmRpzFIe+P4Bmo9vTnJaMgH+AdxS4xZPfCURkVxNKwmI5HGLdy+m48SO1ClZhxdavUC/Wf0A6Fm7JynpKUztPfX8KcqrsXr/at5a8RblipRjSLMhXFf8upyKLiKSq1zJSgIqaCJ52JLdS+g7sy9FCxQlcmAkQfmDaPtVW3Yc3cGOITsIzBfodEQREZ9xJQVNpzhF8qCVsSv515J/sXj3YsoXKc+0u6adv7Jy+t3TOZN6RuVMRMRBKmgiecjmQ5sZumAoc7fPpVRQKd7v8j6PhT12QRkrUaiEgwlFRARU0ETyjKNnjtLp606cTj3Nmx3eZHDTwRTOX9jpWCIikgUVNBEfFRUXxTsr3+HomaMMaDSAL6K+ICEpgVUDVtGobCOn44mIyCWooIn4mDVxa+j/Q3/WHlxLSGAIhfMXpve0cwuRj+k+RuVMRCQXUEET8QEjl48kuEAwTzR5gleWvELsiVjeaP8GTzR5gvz++RkePpxWFVtluRqAiIh4HxU0kVzu2JljDFsyjDRXGjuP7eTnHT/zQqsXeOmml87v83antx1MKCIiV0oFTSSXm7t9LmmuNFpXas07K98B4OEGl160XEREvJsKmkguMTV6KlsOb2F42+EXjM/aOouyhcuy9KGl5y8EqBFaw5mQIiKSIxwraMaYGCARSAfSrLVhxpjiwLdAFSAGuNtae8ypjCKekuZKw8/44Wf8stw+Y9MM+s7si8u6aFulLfVL12fZ3mUUzl+YOdvm0K9+P/yMHwMaDfBwchERcQenj6C1s9YezvT8RWCRtXakMebFjOcvOBNNxP2SUpLo9W0vFuxaQMMyDVn96Gr8/fzPbz+bdpYXFrzAh398SNPyTTmQeIB7pt9DYnIiZ9LOAFApuBKv3PyKU19BRETcIOtf153TA5iQ8XgC0NPBLCJuFXsilq7fdGXR7kXcfcPdRB2MYubmmaS70gFISU+h9Ret+fCPD/lH038Q3i+cdzu/S+mg0vRv2J85febwfMvnmX//fMoVKefwtxERkZzk2GLpxpjdwDHAAuOstZ8aY45ba4tl2ueYtTbkYu+hxdIlt5q0fhKD5w4mzZXG+NvHc+f1d1Ln4zrsObEHP+PHO53fIcAvgIFzBvJ1r6+5/8b7nY4sIiLXKLcslt7KWnvAGFMKWGCM2ZKdFxljBgIDASpVquTOfCI5zlrLiKUjGLF0BK0qtmJCzwlUL14dgHc6v8N7q97DGMOguYPwN/40KdeEvvX6OpxaREQ8zbEjaBeEMGY4cAp4FGhrrY0zxpQFwq21tS72Oh1Bk9wkOS2ZV5a8wtu/vc1DDR7i89s+v2C+2Z9c1sWXUV8yLnIc73R+h5sq3+RAWhERyWlXcgTNkYJmjAkC/Ky1iRmPFwCvAR2AI5kuEihurf3nxd5HBU1yizGrxzBo7iAslifCnuCjWz666BWbIiLim3LDKc7SwPfGmD8zTLbWzjPGrAamGWP6A3uBuxzKJ5JjrLV8+MeH3FDqBobfPJw76txBxv/3RUREsuRIQbPW7gLqZzF+hHNH0UR8xpq4NWw5vIVxt46j9/W9nY4jIiK5gM6xiLjR3O1zGfDjAPL75+eu63VAWEREskcFTcRNNiZs5I5v7+DE2ROM6T6GkIIXvWOMiIjIBZxeSUDEJ7msi36z+lG0QFFW9l9J6cKlnY4kIiK5iAqaiBt8s/4bIuMimdRrksqZiIhcMZ3iFMlh8afieXnxyzQu25g+9fo4HUdERHIhHUETySFHTh/htaWv8dP2nzh65igz756pe52JiMhVUUETuUbRCdGsjF1J+J5wvo3+lvpl6vPZbZ/RpHwTp6OJiEgupYImcpVOJp9k2sZpPPPLM5xKOQXAv276F/9u/2+Hk4mISG6ngiZyFay1dJjYgYgDETQs05AhzYbw+77fefmml52OJiIiPkAFTeQqLN69mIgDEbzX5T2GNBuCn/HjoQYPOR1LRER8hGYwi1yh1PRU3lz+JqWCSvF42OO6EEBERHKcjqCJXIE0VxqdJ3UmPCacD7p+QGC+QKcjiYiID1JBE8mmpJQkwmPCCY8JZ3S30QxuOtjpSCIi4qNU0EQu4eCpg4THhDNp/SR+3vEz1UKqUSqoFI81fszpaCIi4sNU0EQuYtjiYby+7HUsluACwdQpUYeNhzYytMVQAvwDnI4nIiI+TAVNJAuxJ2J5a8Vb3FbrNl5p8wo3lr6RxJRE3l7xNs80f8bpeCIi4uNU0ESy8MayN7BYPuz6IZWLVQageMHijOw40uFkIiKSF6igiWSy8+hOpkZPZWzkWAY1GXS+nImIiHiSCprkeWmuNJbGLGV81HimRE8B4Laat/Fel/ccTiYiInmVCprkWSeTTzJqxSjGRIzhyJkjBAUE8WKrF7m37r3UK11PN6AVERHHqKBJnrL/5H5KFCpBqiuVjhM7svrAanrV7sUDNz5Al+u6UCigkNMRRUREVNAk7wiPCafrpK40Kd+EAL8A1sStYdY9s+hRu4fT0URERC6ggiY+y1rL9E3TmbpxKvfccA8DfhhAyaCSLN+7HH/jz4SeE1TORETEK6mgiU/adWwXg+YOYt6OeRgMMzfPpFJwJVY8soLV+1dTvGBxbq5ys9MxRUREsqSCJj5nwtoJPP7T4+Tzy8f7Xd6nW41ujFoxiudbPU+FohWoULSC0xFFREQuSQVNfMrxs8cZMm8Ijcs2ZuqdU8+Xsc9u/8zhZCIiItmn+wiIT/lk9SecTD7J6G6jdaRMRERyLRU08Rkp6Sl8+PuHdL2uKw3LNnQ6joiIyFVTQZNczVrLrmO7cFkXs7fMJj4pnsFNBjsdS0RE5JpoDprkWtuObKPH1B5sObyFikUr4u/nT6XgSnS9rqvT0URERK6JjqBJrvVq+KvsP7mfUZ1G0aBMA06lnOL5ls/j7+fvdDQREZFroiNokivFHI/hu43f8UzzZxjacihDWw51OpKIiEiO0RE0yXWOnTlGnxl98DN+DGk2xOk4IiIiOU4FTXKV1PRUbp96O2vi1jD1zqlUDK7odCQREZEcp1Ockmu8t/I9pkRPYfWB1UzpPYU76tzhdCQRERG30BE0yRUmrJ3As/Of5UzaGd7p/A731r3X6UgiIiJuoyNo4lUSkxM5mXyS8kXLY61lQ8IGZmyawZvL36RdlXbMf2A++fz0f1sREfFt+pdOvEZcYhztJrRj38l9DGszjO82fUdkXCQAd99wN2O7j1U5ExGRPEH/2onjohOimbJhCmMjx5Kclkz5ouV5cdGL1Aqtxce3fEzX67pSLaSa0zFFREQ8RgVNHOOyLoaHD+c/v/4HYwzda3Tn3+3+TeVilVkfv57WlVrjZzRNUkRE8h6PFzRjTEVgIlAGcAGfWms/MMYMBx4FDmXs+rK1dq6n84nnvLLkFV5f9jr96vfj7U5vUyqo1PltbSq3cTCZiIiIs5w4gpYGPGetXWOMKQJEGmMWZGx7z1r7XwcyiZulpKdwOvU0BkN8UjzTN03n9WWvM6DhAD697VOMMU5HFBER8RoeL2jW2jggLuNxojFmM1De0znEvVLSU9h8aDMbD23k2JljvLH8DQ4kHrhgn/vq3ccn3T9RORMREfkLR+egGWOqAA2B34FWwGBjzINABOeOsh3L4jUDgYEAlSpV8lhWuTxrLVOipzDqt1FsTNhIqiv1/LbrS17PU82ewmVdlC9SnqD8QfSs3VNzzERERLJgrLXOfLAxhYGlwOvW2pnGmNLAYcAC/wbKWmsfudR7hIWF2YiICPeHlWwZuXwkLy16iQZlGtClehfql65PvdL1KBRQiErBlXSLDBERydOMMZHW2rDs7OvIv5jGmABgBvCNtXYmgLU2PtP2z4A5TmSTq7P/5H5eW/oaPWv35Lu7vlMZExERuQZOXMVpgPHAZmvtu5nGy2bMTwPoBUR7OptcmYgDEYz+YzRprjSi4qJIt+m82/ldlTMREZFr5MS/pK2AB4ANxpi1GWMvA32MMQ04d4ozBnjMgWxyGZM3TGZ4+HAC8wWyIWEDwQWCCQ4MplRQKab0nkLVkKpORxQREcn1nLiKczmQ1WV7uueZl0pISmDRrkXM2T6HyRsm06BMA4ILBDOq0ygGNh5I0QJFnY4oIiLiU3QuSrK0/ch20lxpuKyLbt90I/Zk7P+3d/+xVdVnHMffT1tafrTSllJAqEAHUVBRFAhNjTidv9CoOI0aRDJJplEzTUYMGqdgXKL/CJmYqdlksBmZYYuaLVlQUKtDfoOAVRArv5RRgRYoDErbZ3/cL/Vab7ED2nNu+3klN/ec7/nec57DE9Lnfr/nnkOvbr2YUT6DWT+dRXZmdtQhioiIdFoq0OR79tTt4dY3bmXZzmXNbXnZeSy5ZwnjB42nZ7eeEUYnIiLSNahAExZ/uZhnKp6h+nA1fXr2Yc03a3j+mufpn9ufmqM1TBg8gfOLz486TBERkS5DBVoX4u7sOLCDQWcNIsMyOHDsAC+seIGZH8xkaP5QsjKyWLZzGbOvnc0j4x+JOlwREZEuSwVaF+DurN29lmf//SyLKhcxIHcAh48f5uCxgwDcPvJ25t08D4CPdnzE1T+5OspwRUREujwVaJ1Ekzfx8c6PGTtwLNmZ2Wzdv5Wt+7eSl53H3FVzWbhpIdmZ2Uwvm85XtV/Rt2dfhuQP4fLBl1NWUta8n2uHXRvhWYiIiAioQEtLR44f4YmlT1DfWM/kCyezu243H27/kDkr5jCiaASj+o1iUeUiGr2x+TOzrpjFA2MfoKhnUYSRi4iISFuoQGsHh44dYmP1Rgq6F1DfWE9VTRUZlkFZSRnFvYrbvB93p66+jpysHGqP1lLYo5CVX6/k3rfuZfO+zWRYBi+uerG5/6TzJlFVU0XF9gqmjZ7G1IunUnu0lqKeRYwbOK49TlVERETagQq0H9HQ1ND86KJjDceo/LYSgJF9R1J7tJYdB3ZQsb2CjdUbuaD4AvK75zPz/Zl8fejrH+wrKyOL+y69j4fGPUTF9gp2HdxFeUk5y3ctp3JvJYfrD9PQ1MDYs8eSl5PHvPXz+Hzv5z/Yz8C8gbw75V2KehaxqXoTJb1LqKqpYsqoKWRmZLbvP4iIiIi0O3P3qGM4ZWPGjPHVq1e32/6PHD/CuXPPZeKwiUwYMoGnP3iazfs2p+xb3KuY6sPVAFxYfCFPTniS443HycrIorSglPrGehZ8soCX1rzU/BnDcJwMy6C0oJSzcs7C3Vn/n/U4TnlJOROHT6TJm+id05vqw9UMLRjKpPMmUdCjoN3OW6e1CSgAAAb0SURBVERERM48M1vj7mPa0lcjaCdRV1/HNaXXsGDDAl5Z+wrFvYqZf8t8sjOz+WLfFxT2KKR/bn/KSso4O+9sPq3+lIamBkb1G0XimfDfV1ZSxpSLprBl3xZG9x9Nv9x+rPlmDeXnlJPfPb+537babTR5E6UFpR15uiIiIhITGkFrg7r6Orbs28LQ/KEauRIREZFTohG0Myw3O5dLBlwSdRgiIiLSRWREHYCIiIiIfJ8KNBEREZGYUYEmIiIiEjMq0ERERERiRgWaiIiISMyoQBMRERGJGRVoIiIiIjGjAk1EREQkZlSgiYiIiMSMCjQRERGRmFGBJiIiIhIzKtBEREREYkYFmoiIiEjMqEATERERiRkVaCIiIiIxY+4edQynzMy+BbZ3wKGKgL0dcBxpH8pf+lMO059ymP6Uw9M32N37tqVjWhdoHcXMVrv7mKjjkFOj/KU/5TD9KYfpTznsWJriFBEREYkZFWgiIiIiMaMCrW1eiToAOS3KX/pTDtOfcpj+lMMOpGvQRERERGJGI2giIiIiMaMC7STM7Doz22xmW81sRtTxSGpm9qqZVZvZpqS2QjN7x8y+CO8Fod3M7HchpxvM7JLoIhcAMysxs/fM7DMz+9TMHg7tymGaMLPuZrbSzD4JOZwV2oea2YqQw7+aWXZozwnrW8P2IVHGL98xs0wzW2dm/wjrymFEVKC1wswygReB64GRwF1mNjLaqKQVfwKua9E2A1ji7sOBJWEdEvkcHl6/BH7fQTFK6xqAX7v7CGA88GD4v6Ycpo9jwJXufhFwMXCdmY0HngNmhxzWANNC/2lAjbsPA2aHfhIPDwOfJa0rhxFRgda6ccBWd69y93pgIXBzxDFJCu5eAexv0XwzMD8szwduSWpf4AnLgXwzG9AxkUoq7r7b3deG5UMk/jgMRDlMGyEXdWG1W3g5cCWwKLS3zOGJ3C4CrjIz66BwpRVmNgi4AfhDWDeUw8ioQGvdQGBn0vqu0CbpoZ+774ZEAQAUh3blNcbCNMloYAXKYVoJU2PrgWrgHeBLoNbdG0KX5Dw15zBsPwD06diIJYU5wKNAU1jvg3IYGRVorUv1TUA/eU1/ymtMmVku8DfgEXc/eLKuKdqUw4i5e6O7XwwMIjEDMSJVt/CuHMaMmd0IVLv7muTmFF2Vww6iAq11u4CSpPVBwDcRxSL/vz0npr3Ce3VoV15jyMy6kSjOXnP3v4dm5TANuXst8D6J6wnzzSwrbErOU3MOw/be/PAyBelY5cBNZraNxCU9V5IYUVMOI6ICrXWrgOHhFyzZwJ3A2xHHJG33NjA1LE8F3kpqvyf8EnA8cODENJpEI1y38kfgM3d/PmmTcpgmzKyvmeWH5R7Az0hcS/gecFvo1jKHJ3J7G7DUdVPOSLn7Y+4+yN2HkPh7t9TdJ6McRkY3qj0JM5tI4htEJvCqu/824pAkBTN7HbgCKAL2AE8BbwJvAOcAO4Db3X1/KAbmkvjV5xHgF+6+Ooq4JcHMLgM+BDby3bUvj5O4Dk05TANmNorEBeOZJL74v+HuT5tZKYnRmEJgHXC3ux8zs+7An0lcb7gfuNPdq6KJXloysyuA6e5+o3IYHRVoIiIiIjGjKU4RERGRmFGBJiIiIhIzKtBEREREYkYFmoiIiEjMqEATERERiRkVaCLSaZhZo5mtT3rN+JH+95vZPWfguNvMrOh09yMicoJusyEinYaZ1bl7bgTH3QaMcfe9HX1sEemcNIImIp1eGOF6zsxWhtew0D7TzKaH5V+ZWaWZbTCzhaGt0MzeDG3Lww1ZMbM+ZrbYzNaZ2cskPZfQzO4Ox1hvZi+bWWYEpywiaU4Fmoh0Jj1aTHHekbTtoLuPI/EUgjkpPjsDGO3uo4D7Q9ssYF1oexxYENqfAj5y99EkHnlzDoCZjQDuAMrDg8Mbgcln9hRFpCvI+vEuIiJp47+hMErl9aT32Sm2bwBeM7M3STwqDOAy4OcA7r40jJz1Bi4Hbg3t/zSzmtD/KuBSYFXiiVT04LuHvIuItJkKNBHpKryV5RNuIFF43QT8xszOJ2nqMsVnU+3DgPnu/tjpBCoioilOEekq7kh6/zh5g5llACXu/h7wKJAP5AIVhCnK8ADpve5+sEX79UBB2NUS4DYzKw7bCs1scDuek4h0UhpBE5HOpIeZrU9a/5e7n7jVRo6ZrSDxxfSuFp/LBP4Spi8NmO3utWY2E5hnZhuAI8DU0H8W8LqZrQU+AHYAuHulmT0BLA5F33HgQWD7mT5REencdJsNEen0dBsMEUk3muIUERERiRmNoImIiIjEjEbQRERERGJGBZqIiIhIzKhAExEREYkZFWgiIiIiMaMCTURERCRmVKCJiIiIxMz/AN9dNf2BTnBhAAAAAElFTkSuQmCC\n",
180 | "text/plain": [
181 | ""
182 | ]
183 | },
184 | "metadata": {},
185 | "output_type": "display_data"
186 | }
187 | ],
188 | "source": [
189 | "plt.figure(figsize=(10,7))\n",
190 | "plt.ylabel(\"Duration\")\n",
191 | "plt.xlabel(\"Episode\")\n",
192 | "plt.plot(running_mean(time_steps, 50), color='green')"
193 | ]
194 | }
195 | ],
196 | "metadata": {
197 | "kernelspec": {
198 | "display_name": "Python 3",
199 | "language": "python",
200 | "name": "python3"
201 | },
202 | "language_info": {
203 | "codemirror_mode": {
204 | "name": "ipython",
205 | "version": 3
206 | },
207 | "file_extension": ".py",
208 | "mimetype": "text/x-python",
209 | "name": "python",
210 | "nbconvert_exporter": "python",
211 | "pygments_lexer": "ipython3",
212 | "version": "3.6.4"
213 | }
214 | },
215 | "nbformat": 4,
216 | "nbformat_minor": 2
217 | }
218 |
--------------------------------------------------------------------------------
/old_but_more_detailed/Ch6_Evolutionary.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Deep Reinforcement Learning _in Action_\n",
8 | "### Chapter 6\n",
9 | "#### Evolutionary Strategies"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 1,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import torch\n",
20 | "import torch.nn.functional as F\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import gym"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "Creating an Agent"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 21,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stderr",
39 | "output_type": "stream",
40 | "text": [
41 | "/Users/brandonbrown/anaconda3/envs/deeprl/lib/python3.6/site-packages/gym/envs/registration.py:14: PkgResourcesDeprecationWarning: Parameters to load are deprecated. Call .resolve and .require separately.\n",
42 | " result = entry_point.load(False)\n"
43 | ]
44 | }
45 | ],
46 | "source": [
47 | "env = gym.make('CartPole-v1')"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 2,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "def init_random_agent_weights(state_space=4, action_space=2):\n",
57 | " return [\n",
58 | " torch.rand(state_space, 10), # fc1 weights\n",
59 | " torch.rand(10), # fc1 bias\n",
60 | " torch.rand(10, 10), # fc2 weights\n",
61 | " torch.rand(10), # fc2 bias\n",
62 | " torch.rand(10, action_space), # fc3 weights\n",
63 | " torch.rand(action_space), # fc3 bias\n",
64 | " ]\n",
65 | "\n",
66 | "def get_action_from_agent_weights(agent_weight, state):\n",
67 | " x = F.relu(torch.add(torch.mm(torch.Tensor(state.reshape(1,-1)), agent_weight[0]), agent_weight[1]))\n",
68 | " x = F.relu(torch.add(torch.mm(x, agent_weight[2]), agent_weight[3]))\n",
69 | " act_prob = F.softmax(torch.add(torch.mm(x, agent_weight[4]), agent_weight[5])).detach().numpy()[0]\n",
70 | " action = np.random.choice(range(len(act_prob)), p=act_prob)\n",
71 | " return action"
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {},
77 | "source": [
78 | "Determining the Agent Fitness"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "execution_count": 3,
84 | "metadata": {},
85 | "outputs": [],
86 | "source": [
87 | "def get_fitness(env, agent_weights, max_episode_length=500, trials=5):\n",
88 | " total_reward = 0\n",
89 | " for _ in range(trials):\n",
90 | " observation = env.reset()\n",
91 | " for i in range(max_episode_length):\n",
92 | " action = get_action_from_agent_weights(agent_weights, observation)\n",
93 | " observation, reward, done, info = env.step(action)\n",
94 | " total_reward += reward\n",
95 | " if done: break\n",
96 | " return total_reward / trials\n"
97 | ]
98 | },
99 | {
100 | "cell_type": "markdown",
101 | "metadata": {},
102 | "source": [
103 | "Cross"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 4,
109 | "metadata": {},
110 | "outputs": [],
111 | "source": [
112 | "def cross(agent1_weights, agent2_weights):\n",
113 | " num_params = len(agent1_weights)\n",
114 | " crossover_idx = np.random.randint(0, num_params)\n",
115 | " new_weights = agent1_weights[:crossover_idx] + agent2_weights[crossover_idx:]\n",
116 | " new_weights = mutate(new_weights)\n",
117 | " return new_weights"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "Mutate"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 5,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "def mutate(new_weights):\n",
134 | " num_params = len(new_weights)\n",
135 | " num_params_to_update = np.random.randint(0, num_params) # num of params to change\n",
136 | " for i in range(num_params_to_update):\n",
137 | " n = np.random.randint(0, num_params)\n",
138 | " new_weights[n] = new_weights[n] + torch.rand(new_weights[n].size())\n",
139 | " return new_weights"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 6,
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "def breed(agent1_weights, agent2_weight, generation_size=10):\n",
149 | " next_generation = [agent1_weights, agent2_weight]\n",
150 | " for _ in range(generation_size - 2):\n",
151 | " next_generation.append(cross(agent1_weights, agent2_weight))\n",
152 | " return next_generation\n",
153 | "\n",
154 | "def reproduce(env, agents_weights, generation_size):\n",
155 | " top_agents_weights = sorted(agents_weights, reverse=True, key=lambda a: get_fitness(env, a))[:2]\n",
156 | " new_agents_weights = breed(top_agents_weights[0], top_agents_weights[1], generation_size)\n",
157 | " return new_agents_weights"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": null,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "n_generations = 100\n",
167 | "generation_size = 20\n",
168 | "generation_fitness = []\n",
169 | "max_fitness = 0\n",
170 | "\n",
171 | "agents = [init_random_agent_weights(), init_random_agent_weights()]\n",
172 | "\n",
173 | "for i in range(n_generations):\n",
174 | " next_generation = reproduce(env, agents, generation_size)\n",
175 | " ranked_generation = sorted([get_fitness(env, a) for a in next_generation], reverse=True)\n",
176 | " avg_fitness = (ranked_generation[0] + ranked_generation[1]) / 2\n",
177 | " print(i, avg_fitness)\n",
178 | " generation_fitness.append(avg_fitness)\n",
179 | " agents = next_generation"
180 | ]
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "metadata": {},
185 | "source": [
186 | "### Plot loss"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 8,
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "[]"
198 | ]
199 | },
200 | "execution_count": 8,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | },
204 | {
205 | "data": {
206 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAF3CAYAAADgjOwXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XmYXVWd7//3N1WpzPNEJkhIgpEwE5CpWwFlUBDbAUVtULBp+2KrP+916u5f4+22fz/16dbWbq824AB0I4pKi6gMIiqzJMwJQyohoSpTDakMlUrN6/5RO1hAqnIqdaaqvF/PU8/Ze599zvnWfuDUJ2vttVaklJAkSVJ5GFHqAiRJkvRHhjNJkqQyYjiTJEkqI4YzSZKkMmI4kyRJKiOGM0mSpDJiOJMkSSojhjNJkqQyYjiTJEkqI4YzSZKkMlJZ6gIGY/r06WnBggWlLkOSJGm/Vq5c2ZBSmrG/84Z0OFuwYAErVqwodRmSJEn7FREbcjnPbk1JkqQyYjiTJEkqI4YzSZKkMmI4kyRJKiOGM0mSpDJiOJMkSSojhjNJkqQyYjiTJEkqI4YzSZKkMlLQcBYR6yPi6Yh4IiJWZMemRsTdEbEme5ySHY+I+EZEVEfEUxFxQiFrkyRJKkfFaDk7M6V0XEppebb/OeCelNIS4J5sH+B8YEn2cyXwrSLUJkmSVFZKsbbmRcCbsu3rgd8Cn82O35BSSsDDETE5ImanlDaXoEZJBbBywzZ2tnaWugxJeo1F08dz6LSxpS4DKHw4S8BdEZGA/0gpXQPM6hW4tgCzsu25QE2v19Zmx14RziLiSnpa1jj00EMLWLqkfHqqdjvv+tZDpS5Dkvbpc+cv5aNvXFTqMoDCh7MzUkobI2ImcHdEPNf7yZRSyoJbzrKAdw3A8uXLB/RaSaXz+xfqAbjpI29gTFVFiauRpFeaM3lMqUt4WUHDWUppY/ZYFxG3AicDW/d2V0bEbKAuO30jML/Xy+dlxyQNA/dXN7BszkROWzy91KVIUlkr2ICAiBgXERP2bgPnAM8AtwGXZaddBvws274NuDQbtXkKsMP7zaThoaW9k8c2bOcMg5kk7VchW85mAbdGxN7PuSmldEdEPAr8KCKuADYAF2fn/xJ4K1ANtAAfLmBtkoro0fVNtHd1c7rhTJL2q2DhLKW0Djh2H8cbgbP3cTwBVxWqHkml80B1A1UVIzhpwdRSlyJJZc8VAiQV3P1rGjjxsCkOBJCkHBjOJBVUY3Mbqzfv5IwldmlKUi4MZ5IK6sG1jQDebyZJOTKcSSqoB6obmDC6kqPnTip1KZI0JBjOJBVMSon71jRw2qJpVIyIUpcjSUOC4UxSwWxobGHj9j3ObyZJA2A4k1Qw91c3AN5vJkkDYTiTVDAPVDcwZ9JoFk4fV+pSJGnIMJxJKoiu7sSDaxs5ffF0spVCJEk5MJxJKohVm3awY0+H85tJ0gAZziQVxN77zU5bZDiTpIEwnEkqiAeqG1h6yARmTBhV6lIkaUgxnEnKu9aOLh5d3+QoTUk6AIYzSXm3Yn0T7Z3dzm8mSQfAcCYp7+6vbmBkRXDywqmlLkWShhzDmaS8e6C6geMPncK4UZWlLkWShhzDmaS8atrdzjObdtilKUkHyHAmKa8eWtdISi7ZJEkHynAmKa/ur25g/KhKjp03qdSlSNKQZDiTlFcPVDdwyuHTqKzw60WSDoTfnpLypmZbCxsaWzhj8bRSlyJJQ5bhTFLePJAt2eR6mpJ04AxnkvLm/uoGZk0cxaIZ40tdiiQNWYYzSXnR3Z14cG0jpy+eTkSUuhxJGrIMZ5LyYvXmnWzb3e78ZpI0SIYzSXmx934z5zeTpMExnEnKi/urG1gyczyzJo4udSmSNKQZziQNWmtHF4+u32armSTlgeFM0qA99lITrR3d3m8mSXlgOJM0aKs27gRg+YIpJa5EkoY+w5mkQatpamHi6Eomj60qdSmSNOQZziQNWm3THuZNGVvqMiRpWDCcSRq02qYW5k0ZU+oyJGlYMJxJGpSUEjXb9jB/qi1nkpQPhjNJg7Jtdzt7OrpsOZOkPDGcSRqUmqY9AMz3njNJygvDmaRBqW1qAWDeVFvOJCkfDGeSBqVmW0/LmaM1JSk/DGeSBqW2qYUpY0cyflRlqUuRpGHBcCZpUJzjTJLyy3AmaVBqmlqY7/1mkpQ3hjNJByylxEZbziQprwxnkg5Y/a422jq7me8cZ5KUN4YzSQds7xxntpxJUv4YziQdsJfnOLPlTJLyxnAm6YDV2nImSXlnOJN0wGqbWpg+vooxVRWlLkWShg3DmaQDVrPNkZqSlG+GM0kHrLapxfvNJCnPDGeSDkh3d2LjdlvOJCnfDGeSDsjWXa10dCVXB5CkPCt4OIuIioh4PCJuz/YXRsQjEVEdET+MiKrs+Khsvzp7fkGha5N04BypKUmFUYyWs08Az/ba/zLwtZTSYqAJuCI7fgXQlB3/WnaepDJVs61njjNXB5Ck/CpoOIuIecDbgOuy/QDOAn6cnXI98I5s+6Jsn+z5s7PzJZWhvS1ncyYbziQpnwrdcvavwGeA7mx/GrA9pdSZ7dcCc7PtuUANQPb8jux8SWWotqmFmRNGMXqkc5xJUj4VLJxFxAVAXUppZZ7f98qIWBERK+rr6/P51pIGoGbbHuZP9X4zScq3QracnQ68PSLWAzfT0535dWByRFRm58wDNmbbG4H5ANnzk4DGV79pSumalNLylNLyGTNmFLB8Sf2p3e4cZ5JUCAULZymlz6eU5qWUFgDvA36TUvoAcC/w7uy0y4CfZdu3Zftkz/8mpZQKVZ+kA9fZ1c2m7a3Md6SmJOVdKeY5+yzwqYiopueesu9kx78DTMuOfwr4XAlqk5SDLTtb6epOtpxJUgFU7v+UwUsp/Rb4bba9Djh5H+e0Au8pRj2SBqdmm3OcSVKhuEKApAGrbcrmOHN1AEnKO8OZpAGrbdpDBMyeZDiTpHwznEkasJqmFmZPHE1VpV8hkpRvfrNKGrDapj3ebyZJBWI4kzRgtduc40ySCsVwJmlA2ju72bKzlXmuDiBJBWE4kzQgW3a00p2w5UySCsRwJmlAavZOo+E9Z5JUEIYzSQOyd44zW84kqTAMZ5IGpGbbHipGBLMnjS51KZI0LBnOJA1IbVMLsyeNprLCrw9JKgS/XSUNSM8cZ3ZpSlKhGM4kDUhNU4uDASSpgAxnknLW1tnF1p1trg4gSQVkOJMOUjXbWvjRozW8sHVXzq/Z2LQHgPlT7daUpEKpLHUBkopjd1snD69r5Pcv1HPfmgbWNewG4IRDJ/PT/3F6Tu9Rm4UzW84kqXAMZ9Iw1d2dWL15J79fU8/vX6hn5YYmOroSo0eO4JTDp/HBUw6jpqmF7z2wntqmlpwC1x/DmS1nklQohjNpGGpsbuPd336IF7PWsdfPnsjlZyzkT5fMYPmCKYyqrAB6uja/98B6bn9qMx9946L9vm9NUwsjK4JZE53jTJIKxXAmDUP/fNfz1Gxr4cvvOpozl85k5oR9h6n5U8dy3PzJ/PzJTTmFs9qmPcyZPIaKEZHvkiVJGQcESMPMU7XbufnRGi47bQHvPenQPoPZXhceO4dVm3aytr55v+9ds81pNCSp0Axn0jDS3Z24+rZVTBs3ik+8eUlOr3nb0bOJgNuf3Lzfc52AVpIKz3AmlZGdrR08s3HHAb/+p49v5PGXtvPZ817HxNEjc3rNIZNGc/KCqdz25EZSSn2et6e9i4bmNsOZJBWY4UwqI1//9Rre/u/385vntg74tTtbO/jSr57j+EMn864T5g3otW8/bg5r63fz7Oa+5zzbuL0F6LlPTZJUOIYzqYzcv6aB7gQf/8ETPL8l98lhAb7x6zU07m7jf799GSMGeMP++UfNpmJE8POnNvV5To3TaEhSURjOpDJRv6uN57fu4rJTD2NsVQVXXP8ojc1tOb12zdZdfP/B9bx3+XyOmTd5wJ89dVwVZyyezs+f3NRn12bttqzlzAEBklRQhjOpTDy0rhGAd54wj2svXU79rjY++p8raevs6vd1KSW+8PNVjK2q4NPnvu6AP//CY+dQ27SHJ2q27/P52qY9VFWOYPr4UQf8GZKk/TOcSWXiobUNTBhdybI5Ezl2/mT+5eJjeXR9E3976zP93qh/xzNbeKC6kU+95QimDSI4nbNsFlWVI7jtyX13bdY0tTBv8pgBd5lKkgbGcCaViQeqG3nDwmlUVvT8b3nBMXP45JuX8OOVtVzz+3X7fM2e9i6++ItnWXrIBD54ymGD+vyJo0dy5utm8IunNtPV/dowWNu0h3kOBpCkgjOcSWWgZlsLL21r4fTF015x/BNnL+GCY2bzpTue4+7Vrx3B+a3frWXj9j184e3LXg51g3HhsXOo29XGH17c9prnnONMkopjv9/mEfGViJgYESMj4p6IqI+IDxajOOlg8dDanvvNTls0/RXHI4J/fs+xHDN3Ep+4+XFWb9r58nM121r49u/WcuGxczjl8FeGugN11tKZjK2qeM2ozd1tnWzb3e5gAEkqglz+qX1OSmkncAGwHlgMfLqQRUkHmwfXNjB9fBVHzBr/mudGj6zgmkuXM3H0SD5y/aPU7WoF4B9vX01FBH/z1qV5q2NsVSVvfv0sfvX0Zjq6ul8+Xus0GpJUNLmEs73TjL8NuCWldODTl0t6jZQSD65t5NRF04nY9832syaO5rrLlrOtpZ2/vHEld6/eyl2rt/KxsxYze1J+A9OFx86hqaWD+6sbXj5Wk02jYTiTpMLLJZz9PCKeA04E7omIGUBrYcuSDh5r65up29XG6Yv675o8au4kvnbxcTz+0nY++p8rWTBtLB/5k4V5r+dPj5jOxNGV/LzXqM3aJlcHkKRiySWcXQ2cBixPKXUALcDbC1qVdBB5sI/7zfbl/KNn8+lzX0d3Slx94TJGVVbkvZ5RlRWcd9Qh3LVqK60dPXOs1TbtYczICqaNq8r750mSXimXcPZQSmlbSqkLIKW0G/hVYcuSDh4PVDcwd/IY5k/NrcvwqjMX89jfvYUzl84sWE0XHjuH5rZOfvt8PZDNcTZlTJ/drpKk/Kns64mIOASYC4yJiOOBvd/KEwH7NqQ86OpOPLxuG+cumzWg4DOlwC1Ypx4+jWnjqvj5k5s476hDnEZDkoqoz3AGnAt8CJgHfLXX8V3A3xSwJumgsXrTTnbs6cipS7OYKitG8NajZ3PLyhqa2zqp2dbCiYdNKXVZknRQ6DOcpZSuB66PiHellH5SxJqkg8aDa3tGRJ62n8EApXDhsXO48eEN3PpYLTtbO205k6Qi6a/lbK/bI+L9wILe56eU/qFQRUkHiwfXNrJ45nhmThxd6lJeY/lhU5g9aTTX3NezdNQ8J6CVpKLIZUDAz4CLgE5gd68fSYPQ3tnNH17ctt8pNEplxIjggmNmU7OtZwJaVweQpOLIpeVsXkrpvIJXIh1knqzdzp6OLk4ts/vNervw2Dlce9+LgBPQSlKx5NJy9mBEHF3wSqSDzAPVDUTAKYdPLXUpfTp67iQOmzaW8aMqmTx25P5fIEkatFxazs4APhQRLwJt9EypkVJKxxS0MmmYe3BtI0fNmcTkseU7sWtE8Km3HEF1XbNznElSkeQSzs4veBXSQWZPexePv9TE5afnf/mlfLvouLmlLkGSDir77dZMKW0A5gNnZdstubxOUt8eXb+Njq7EaYvL934zSVJp7DdkRcTVwGeBz2eHRgL/WciipOHuwbWNjKwITlrgxK6SpFfKpQXsz+hZ6Hw3QEppEzChkEVJw92Daxs4fv4UxlblcmeBJOlgkks4a08pJSABRMS4wpYkDW87Wjp4ZuMOTi3T+c0kSaWVSzj7UUT8BzA5Iv4C+DVwbWHLkoavh19spDuV55JNkqTS22+fSkrpnyPiLcBO4HXA36eU7i54ZdIw9dDaRkaPHMHxh3q/mSTptfoNZxFRAfw6pXQmYCCT8uDBtQ2ctGAqVZUOepYkvVa/fx1SSl1Ad0RMKlI90rBWt6uVF7Y2c7pTaEiS+pDLULFm4OmIuJteC56nlD7e34siYjTwe2BU9jk/TildHRELgZuBacBK4M9TSu0RMQq4ATgRaATem1JaP/BfSSpfD61tBLzfTJLUt1zC2U+zn4Fqo2fi2uaIGAncHxG/Aj4FfC2ldHNEfBu4AvhW9tiUUlocEe8Dvgy89wA+VypbD1Y3MnF0Jcvm2BgtSdq3XAYEXH8gb5xNv9Gc7Y7MfhJwFvD+7Pj1wBfoCWcXZdsAPwb+PSIiex9pWHhwXQOnHD6NihGuUylJ2rdcVghYEhE/jojVEbFu708ubx4RFRHxBFBHz4CCtcD2lFJndkotsHfhvrlADUD2/A56uj6lYaFmWws12/bYpSlJ6lcuw8W+R0/LVidwJj33heW0fFNKqSuldBwwDzgZWHqAdb4sIq6MiBURsaK+vn6wbycVzYNrGwAcDCBJ6lcu4WxMSukeIFJKG1JKXwDeNpAPSSltB+4FTqVnMtu93anzgI3Z9kZ6Flgne34SPQMDXv1e16SUlqeUls+YMWMgZUgl09HVze1PbWbGhFEsnjm+1OVIkspYLuGsLSJGAGsi4mMR8WfAfv+6RMSMiJicbY8B3gI8S09Ie3d22mXAz7Lt27J9sud/4/1mGg6adrdz6Xf+wH1rGrjijIVEeL+ZJKlvuYzW/AQwFvg48I/0dG1e1u8reswGrs8msh0B/CildHtErAZujogvAo8D38nO/w5wY0RUA9uA9w3oN5HK0PNbdvGRGx5l6842vnrxsbzzhHmlLkmSVOb2t0LADHpGWFamlGqBD+f6ximlp4Dj93F8HT33n736eCvwnlzfXyp3d6/eyidvfpxxoyr54ZWnuFyTJCknfXZrRsRHgFXAvwHPRcTbi1aVNISllPjmvdVceeMKFs0cz20fO8NgJknKWX8tZ58ElqWU6iPicOC/6LkvTFIf9rR38ZmfPMXPn9zERcfN4cvvOobRIytKXZYkaQjpL5y1p5TqoacrMlteSVIfNu/Yw5U3rOSZTTv47HlL+egbD/fmf0nSgPUXzuZFxDf62t/f2prSwaKrO/HIi418/AdP0NrRxXWXLufs188qdVmSpCGqv3D26VftryxkIdJQ0NnVzZq6Zp7ZuKPnZ9NOVm/ayZ6OLg6dOpab/uINHDFrQqnLlCQNYX2GswNdU1MaTlJK/PLpLTy0roGnN+7kuc07aevsBmBcVQXL5kzifSfP56g5k3jzkbOYNGZkiSuWJA11ucxzJh207lvTwFU3PcaEUZUsmzuRS089jKPmTuKouZNYOG0cI1zAXJKUZ4YzqR+3PbmJCaMqefTv3uyoS0lSUfQ3z9mXs0cnhtVBqa2ziztXbeEty2YZzCRJRdPf2ppvjZ55AD5frGKkcnLfCw3sau3kwmPmlLoUSdJBpL9uzTuAJmB8ROwEgp6lnAJIKaWJRahPKpnbn9rEpDEjOX3x9FKXIkk6iPTZcpZS+nRKaTLwi5TSxJTShN6PRaxRKrrWji7uXr2V85YdQlVlfw3MkiTl134HBKSULoqIWcBJ2aFH9q4cIA1Xv32+jt3tXVxw7OxSlyJJOsjst0kgGxDwB+A9wMXAHyLi3YUuTCqlnz+1mWnjqjj18GmlLkWSdJDJZSqNvwNOSinVAUTEDODXwI8LWZhUKi3tnfzm2TreecJcKivs0pQkFVcuf3lG7A1mmcYcXycNSfc8W8eeji4ucJSmJKkEcmk5uyMi7gR+kO2/F/hl4UqSSuv2pzYxY8IoTl44tdSlSJIOQrkMCPh0RLwTOCM7dE1K6dbCliWVxq7WDu59vp73n3woFS7NJEkqgZyWb0op/RT4aYFrkUru189upb2zmwuOcZSmJKk0vHdM6uX2Jzcze9JoTjh0SqlLkSQdpAxnUmZHSwe/X1PP246ezQi7NCVJJTKgcBYRUyLimEIVI5XSnau30NGVuOBYR2lKkkonl0lofxsREyNiKvAYcG1EfLXwpUnFdftTm5k/dQzHzptU6lIkSQexXFrOJqWUdgLvBG5IKb0BeHNhy5KKa9vudh6obuBtR88hwi5NSVLp5BLOKiNiNj1LN91e4HqkkrjjmS10dSdHaUqSSi6XcPYPwJ1AdUrp0Yg4HFhT2LKk4rr9qU0snD6OZXMmlroUSdJBLpdJaG8Bbum1vw54VyGLkoqpflcbD69r5KozF9ulKUkquVwGBHwlGxAwMiLuiYj6iPhgMYqTiuFXz2ymO+FampKkspBLt+Y52YCAC4D1wGLg04UsSiqm25/czJKZ43ndIRNKXYokSbkNCMge3wbcklLaUcB6pKLasqOVRzdss9VMklQ2cllb8/aIeA7YA/xVRMwAWgtbllQcv3h6MynBBcc6SlOSVB7223KWUvoccBqwPKXUAbQAFxW6MKkYbn9qE6+fPZFFM8aXuhRJkoDcBgSMBf4H8K3s0BxgeSGLkoqhtqmFx1/a7txmkqSykss9Z98D2ulpPQPYCHyxYBVJRXLvc3UAvO1ow5kkqXzkEs4WpZS+AnQApJRaACeD0pD3/NZdTBhdyWHTxpa6FEmSXpZLOGuPiDFAAoiIRUBbQauSiqC6rpnFM8c78awkqazkEs6uBu4A5kfEfwH3AJ8paFVSEVTX7WaxAwEkSWUml+Wb7o6Ix4BT6OnO/ERKqaHglUkFtL2lnYbmNpbMMpxJkspLLvOcAYwGmrLzj4wIUkq/L1xZUmFV1zUDsHim4UySVF72G84i4svAe4FVQHd2OAGGMw1ZL4ezGS7ZJEkqL7m0nL0DeF1KyUEAGjaq65oZVTmCuVPGlLoUSZJeIZcBAeuAkYUuRCqmNXXNLJoxnooRjtSUJJWXXFrOWoAnIuIeek2hkVL6eMGqkgqsuq6ZEw+bUuoyJEl6jVzC2W3ZT2+pALVIRdHS3snG7Xt470nzS12KJEmvkUs4m5xS+nrvAxHxiQLVIxXcuvrdgCM1JUnlKZd7zi7bx7EP5bkOqWjW1O0CYInhTJJUhvpsOYuIS4D3Awsjone35gRgW6ELkwqluq6ZihHBYdPGlboUSZJeo79uzQeBzcB04F96Hd8FPFXIoqRCqq5r5rBpY6mqzKXhWJKk4uoznKWUNgAbgFOLV45UeNV1za6pKUkqW302HUTE/dnjrojY2etnV0TsLF6JUv60d3azvrHFNTUlSWWrv5azM7JH17fRsLGhcTdd3cmRmpKkstVfy9k7e20PeLbOiJgfEfdGxOqIWLV3+o2ImBoRd0fEmuxxSnY8IuIbEVEdEU9FxAkH8gtJ/XFNTUlSuevvjui/67V9zwG8dyfwP1NKRwKnAFdFxJHA54B7UkpLsvf9XHb++cCS7OdK4FsH8JlSv/aGs0UzHakpSSpP/YWz6GM7JymlzSmlx7LtXcCzwFzgIuD67LTr6VlYnez4DanHw8DkiJg90M+V+rOmrpm5k8cwtiqX+ZclSSq+/v5CjYmI4+kJcKOz7ZdD2t7glYuIWAAcDzwCzEopbc6e2gLMyrbnAjW9XlabHduMlCfVdc3ebyZJKmv9hbPNwFez7S29tqFnbc2zcvmAiBgP/AT4ZEppZ8QfG+FSSikiBrROZ0RcSU+3J4ceeuhAXqqDXHd3Yl1DM6cumlbqUiRJ6lN/ozXPHOybR8RIeoLZf6WUfpod3hoRs1NKm7Nuy7rs+Eag90rU87Jjr67rGuAagOXLl7sAu3K2cfseWju6bTmTJJW1gk2RHj1NZN8Bnk0p9W51u40/rtd5GfCzXscvzUZtngLs6NX9KQ2aa2pKkoaCQt4VfTrw58DTEfFEduxvgC8BP4qIK+hZgeDi7LlfAm8FqoEW4MMFrE0HoZen0TCcSZLKWMHCWUrpfvoe5Xn2Ps5PwFWFqkeqrmtm+vgqJo+tKnUpkiT1ab/hrI/JYHcAG1JKnfkvSSqM6rpmFrmmpiSpzOXScvZ/gBOAp+hpCTsKWAVMioi/SindVcD6pLxIKbGmrpmLjptT6lIkSepXLgMCNgHHp5SWp5ROpGe+snXAW4CvFLI4KV/qd7Wxq7WTxbacSZLKXC7h7IiU0qq9Oyml1cDSlNK6wpUl5dcfBwO4pqYkqbzl0q25KiK+Bdyc7b8XWB0Ro4COglUm5VF1vSM1JUlDQy4tZx+iZ3qLT2Y/67JjHcCgJ6qVimHN1mYmjKpk1sRRpS5FkqR+5dJydj7w7ymlf9nHc815rkcqiOq6ZhbNHE/v5cMkSSpHubScXQi8EBE3RsQFEVHIiWulgqiud8FzSdLQsN9wllL6MLAYuAW4BFgbEdcVujApX3a0dFC/q81lmyRJQ0JOrWAppY6I+BWQgDHAO4CPFLIwKV+q63vW1LTlTJI0FOy35Swizo+I7wNrgHcB1wGHFLguKW9cU1OSNJTk0nJ2KfBD4C9TSm0FrkfKu+q6ZqoqRzBvythSlyJJ0n7tN5yllC7pvR8RZwCXpJRcpFxDwt41NStGOFJTklT+crrnLCKOB94PvAd4EfhpIYuS8mlNXTPHHzql1GVIkpSTPsNZRBxBz+jMS4AGero2I6XkxLMaMva0d7Fx+x7ec+L8UpciSVJO+ms5ew64D7ggpVQNEBH/T1GqkvJkbX0zKTkYQJI0dPQ3WvOdwGbg3oi4NiLOBrxpR0PK2mxNzSWzDGeSpKGhz3CWUvrvlNL7gKXAvfSsqzkzIr4VEecUq0BpMNZsbaZiRLBg2rhSlyJJUk5yWSFgd0rpppTShcA84HHgswWvTMqD6rpmDps6lqrKXFYqkySp9Ab0Fyul1JRSuialdHahCpLyqbq+Z8FzSZKGCpsTNGx1dHWzvmG3a2pKkoYUw5mGrQ2Nu+nsTo7UlCQNKYYzDVuuqSlJGooMZxq29oazRTMMZ5KkocNwpmGruq6ZuZPHMG5UTquUSZJUFgxnGrbW1DlSU5I09BjONCx1dyfW1jez2C5NSdIQYzjTsLRx+x5aO7odDCBJGnIMZxqWql1TU5I0RBnONCxVb82m0bBbU5I0xBjONCw9v3UX08ZVMWVcValLkSRpQAxnGna6uhO/fb6eUw6fVupSJEkaMMOZhp0nappoaG7jnGWkB1t+AAASKUlEQVSzSl2KJEkDZjjTsHPnqq2MrAjOXDqz1KVIkjRghjMNKykl7ly1hVMXTWfi6JGlLkeSpAEznGlYeWFrMxsaWzjnSLs0JUlDk+FMw8pdq7YAGM4kSUOW4UzDyp2rt3D8oZOZOXF0qUuRJOmAGM40bGzcvodnNu7k3GWHlLoUSZIOmOFMw4ZdmpKk4cBwpmHjrlVbWTJzPIe7ZJMkaQgznGlYaNrdzh/Wb3PiWUnSkGc407Bwz3N1dHUn7zeTJA15hjMNC3eu2sLsSaM5eu6kUpciSdKgGM405O1p7+K+NfWcc+QsIqLU5UiSNCiGMw15v3uhntaObrs0JUnDguFMQ95dq7cwacxITlo4tdSlSJI0aIYzDWkdXd3c82wdZ79+JiMr/M9ZkjT0+ddMQ9ofXtzGjj0dnHOkXZqSpOHBcKYh7a5VWxg9cgRvPGJGqUuRJCkvDGcaslJK3LV6K3+yZAZjqipKXY4kSXlhONOQ9fTGHWze0eooTUnSsFKwcBYR342Iuoh4ptexqRFxd0SsyR6nZMcjIr4REdUR8VREnFCoujR83LlqCxUjgrOXzix1KZIk5U0hW86+D5z3qmOfA+5JKS0B7sn2Ac4HlmQ/VwLfKmBdGibuWrWVkxdMZcq4qlKXIklS3hQsnKWUfg9se9Xhi4Drs+3rgXf0On5D6vEwMDkiZheqNg196+qbWVPX7ELnkqRhp9j3nM1KKW3OtrcAe/+yzgVqep1Xmx2T9umu1VsBOMf7zSRJw0zJBgSklBKQBvq6iLgyIlZExIr6+voCVKah4M5VWzhq7kTmTh5T6lIkScqrYoezrXu7K7PHuuz4RmB+r/PmZcdeI6V0TUppeUpp+YwZzm11MKrb2crjL23nXCeelSQNQ8UOZ7cBl2XblwE/63X80mzU5inAjl7dn9Ir2KUpSRrOKgv1xhHxA+BNwPSIqAWuBr4E/CgirgA2ABdnp/8SeCtQDbQAHy5UXRraWju6uPXxjSyYNpYjZo0vdTmSJOVdwcJZSumSPp46ex/nJuCqQtWi4WFtfTMfu+lxnt28k398x1FERKlLkiQp7woWzqR8uvXxWv721mcYVTmC735oOWctdQoNSdLwZDhTWWtp7+Tqn63ilpW1nLxgKl+/5DhmT3KEpiRp+DKcqWy9sHUXV/3XY1TXN/PXZy3mE2cvobLC5WAlScOb4UxlJ6XEj1bUcPVtqxg/aiQ3Xv4GzlgyvdRlSZJUFIYzlZXmtk7+9tan+dkTmzh98TS+9t7jmDlhdKnLkiSpaAxnKhsbt+/hz697hPWNu/lf5xzBX71pMRUjHJEpSTq4GM5UFjZt38P7rnmI7S0d3PQXp3DK4dNKXZIkSSVhOFPJbd6xh0uufZjtuzu48SNv4Lj5k0tdkiRJJWM4U0lt3dnK+699hMbmdm644mSDmSTpoOe8BCqZup2tXHLNw9TtbOX6y0/ihEOnlLokSZJKzpYzlUTdrlYuufZhtuxs5frLT+bEw6aWuiRJksqCLWcquobmNj5w7SNs2t7K9z50EictMJhJkrSX4UxF1djcxvuvfZiapha++6GTeIOjMiVJegXDmYpm2+52PnDdI2xobOG7l53EqYsMZpIkvZrhTEWxo6WDD1z3CC827OY7l53EaYtdjkmSpH0xnKngOru6+dgPHqO6bhfXXrrcdTIlSeqHozVVcF/61XPct6aBr7zrGP70iBmlLkeSpLJmy5kK6icra7nu/hf50GkLuPik+aUuR5Kksmc4U8E8UbOdz9/6NKctmsbfvu31pS5HkqQhwXCmgti6s5Urb1jBrImj+Ob7T2Bkhf+pSZKUC/9iKu9aO7r4yxtX0tzWybWXLmfKuKpSlyRJ0pDhgADlVUqJv731GZ6o2c63P3giSw+ZWOqSJEkaUmw5U79aO7po7+zO+fzvPrCenzxWyyffvITzjjqkgJVJkjQ82XImuroTm7bvYV3Dbl6sb2Zdw27W1e/mxYbdbNy+h3FVFbzpdTM5Z9kszlw6k4mjR+7zfe5f08A//WI15y6bxcfPWlLk30KSpOHBcHaQerJmOzc8tIFnNu7gxcbdr2gdmzCqksNnjOOkBVO4ePp8tuxs5e7VW/nF05sZWRGctmg65yybxVuOnMXMCaMBWN+wm6tueowlMyfw1YuPY8SIKNWvJknSkBYppVLXcMCWL1+eVqxYUeoyhoyu7sSvn93Kdfet49H1TUwYVckbDp/KwunjOHzGeA6fPo6FM8YxY/woIuI1r32ipok7V23lzlVb2NDYQgSccOgUzl02i1tW1FLf3MZtV53BodPGlug3lCSpfEXEypTS8v2eZzgb/lraO7llRS3ffeBFNjS2MHfyGC4/YyEXL5/HhD66KPuTUuL5rbu4KwtqqzbtpGJEcOPlJ7tmpiRJfcg1nNmtOYxt2dHK9Q+t56ZHXmLHng6Omz+Zz5y7lHOXzaJyEPOORQRLD5nI0kMm8vGzl1CzrYVdrZ0cOceRmZIkDZbhbBjasqOVr9z5HLc9sYnulDh32SF85E8O58TDphTk8+ZPtRtTkqR8MZwNIyklbn60hv/vF8/S0d3NB085jMtPX+g9YJIkDSGGs2FiQ+NuPveTp3loXSOnHj6NL73raA6bNq7UZUmSpAEynA1xXd2J7z3wIv981/OMHDGC//+dR/O+k+a/ZrSlJEkaGgxnQ9jzW3bxmZ88xZM12zl76Uy++GdHMXvSmFKXJUmSBsFwNgS1d3bzf35bzTfvrWbC6JF8/X3H8fZj59haJknSMGA4G2JebNjNR29cyfNbd3HRcXP4+wuOZNr4UaUuS5Ik5YnhbAjZ0dLB5d9/lB17Orju0uW8+chZpS5JkiTlmeFsiOjs6uaqmx6jtqmFH/zFKSxfMLXUJUmSpAIwnA0RX/zFs9xf3cBX3n2MwUySpGHswNfwUdHc/IeX+P6D67nijIVcvHx+qcuRJEkFZDgrc4+u38b/+7Nn+NMjZvD585eWuhxJklRghrMyVtvUwkdvXMn8KWP5t0uOH9Ri5ZIkaWjwr32Z2t3WyUeuX0F7VzfXXracSWNGlrokSZJUBIazMtTdnfjUj57gha27+Pf3n8CiGeNLXZIkSSoSw1kZ+td71nDnqq38zVtfzxuPmFHqciRJUhE5lUaRvNTYwu72TmZMGMWUsVVUjNj3Uku/eGoz37hnDe85cR5XnLGwyFVKkqRSM5wVSEqJ57bs4o5ntnDHM1t4fuuul58bETBt/Cimjx/FjAmjmDF+FNMnVDFx9Ej+7TdrOPGwKXzxz45yrUxJkg5ChrM86u5OPFm7nTtWbeHOZ7awvrGFCDhpwVT+/oIjOWTSaOp3tdHQ3Eb9rraXt6u37qKhuZ32rm7mTx3Dtz94IqMqK0r960iSpBIwnA1QSomW9i52t3Wyq62T3W2dNO5u53fP13PHM1vYsrOVyhHBaYun85dvXMSbXz+LGRP2vzB5SomdezoZU1VBVaW3AkqSdLAynPXjobWNfPmO52jOQlhzaye72zvpTq89d1TlCN54xAw+e/TrOGvprAFPfRERTBrrdBmSJB3sDGf9qKocwcQxI5kzeTTjqioZP7qS8aN6fsaNqmTC6ErGVfU8Hj1vEmOrvJySJGlwTBP9OPGwKdxw+cmlLkOSJB1Eyurmpog4LyKej4jqiPhcqeuRJEkqtrIJZxFRAXwTOB84ErgkIo4sbVWSJEnFVTbhDDgZqE4prUsptQM3AxeVuCZJkqSiKqdwNheo6bVfmx2TJEk6aJRTOMtJRFwZESsiYkV9fX2py5EkScqrcgpnG4H5vfbnZcdeIaV0TUppeUpp+YwZLgouSZKGl3IKZ48CSyJiYURUAe8DbitxTZIkSUVVNvOcpZQ6I+JjwJ1ABfDdlNKqEpclSZJUVGUTzgBSSr8EflnqOiRJkkqlnLo1JUmSDnqGM0mSpDJiOJMkSSojhjNJkqQyEimlUtdwwCKiHthQ4I+ZDjQU+DP0Sl7z4vJ6F5fXu7i83sXnNe/bYSml/U7SOqTDWTFExIqU0vJS13Ew8ZoXl9e7uLzexeX1Lj6v+eDZrSlJklRGDGeSJEllxHC2f9eUuoCDkNe8uLzexeX1Li6vd/F5zQfJe84kSZLKiC1nkiRJZcRw1o+IOC8ino+I6oj4XKnrGW4i4rsRURcRz/Q6NjUi7o6INdnjlFLWOJxExPyIuDciVkfEqoj4RHbca14gETE6Iv4QEU9m1/x/Z8cXRsQj2XfLDyOiqtS1DicRURERj0fE7dm+17tAImJ9RDwdEU9ExIrsmN8pg2Q460NEVADfBM4HjgQuiYgjS1vVsPN94LxXHfsccE9KaQlwT7av/OgE/mdK6UjgFOCq7L9pr3nhtAFnpZSOBY4DzouIU4AvA19LKS0GmoArSljjcPQJ4Nle+17vwjozpXRcr+kz/E4ZJMNZ304GqlNK61JK7cDNwEUlrmlYSSn9Htj2qsMXAddn29cD7yhqUcNYSmlzSumxbHsXPX+85uI1L5jUoznbHZn9JOAs4MfZca95HkXEPOBtwHXZfuD1Lja/UwbJcNa3uUBNr/3a7JgKa1ZKaXO2vQWYVcpihquIWAAcDzyC17ygsi62J4A64G5gLbA9pdSZneJ3S379K/AZoDvbn4bXu5AScFdErIyIK7NjfqcMUmWpC5D6klJKEeFw4jyLiPHAT4BPppR29jQs9PCa519KqQs4LiImA7cCS0tc0rAVERcAdSmllRHxplLXc5A4I6W0MSJmAndHxHO9n/Q75cDYcta3jcD8XvvzsmMqrK0RMRsge6wrcT3DSkSMpCeY/VdK6afZYa95EaSUtgP3AqcCkyNi7z+O/W7Jn9OBt0fEenpuRTkL+Dpe74JJKW3MHuvo+cfHyfidMmiGs749CizJRvlUAe8DbitxTQeD24DLsu3LgJ+VsJZhJbv35jvAsymlr/Z6ymteIBExI2sxIyLGAG+h516/e4F3Z6d5zfMkpfT5lNK8lNICer6zf5NS+gBe74KIiHERMWHvNnAO8Ax+pwyak9D2IyLeSs/9CxXAd1NK/1TikoaViPgB8CZgOrAVuBr4b+BHwKHABuDilNKrBw3oAETEGcB9wNP88X6cv6HnvjOveQFExDH03BBdQc8/hn+UUvqHiDicnpadqcDjwAdTSm2lq3T4ybo1/1dK6QKvd2Fk1/XWbLcSuCml9E8RMQ2/UwbFcCZJklRG7NaUJEkqI4YzSZKkMmI4kyRJKiOGM0mSpDJiOJMkSSojhjNJQ0ZEzIqImyJiXbZczEMR8WclquVNEXFar/2PRsSlpahF0vDi8k2ShoRsEt3/Bq5PKb0/O3YY8PYCfmZlrzUZX+1NQDPwIEBK6duFqkPSwcV5ziQNCRFxNvD3KaU37uO5CuBL9ASmUcA3U0r/kU1E+gWgATgKWEnPBKQpIk4EvgqMz57/UEppc0T8FngCOAP4AfAC8HdAFdAIfAAYAzwMdAH1wF8DZwPNKaV/jojjgG8DY+lZ6PzylFJT9t6PAGcCk4ErUkr35e8qSRoO7NaUNFQsAx7r47krgB0ppZOAk4C/iIiF2XPHA58EjgQOB07P1hj9N+DdKaUTge8CvVcAqUopLU8p/QtwP3BKSul4emaZ/0xKaT094etrKaXj9hGwbgA+m1I6hp4VGa7u9VxlSunkrKarkaRXsVtT0pAUEd+kp3WrnZ4lYo6JiL3rJ04ClmTP/SGlVJu95glgAbCdnpa0u3t6S6kANvd6+x/22p4H/DBbwLkKeHE/dU0CJqeUfpcduh64pdcpexecX5nVIkmvYDiTNFSsAt61dyeldFVETAdWAC8Bf51SurP3C7Juzd5rKHbR870XwKqU0ql9fNbuXtv/Bnw1pXRbr27Swdhbz95aJOkV7NaUNFT8BhgdEX/V69jY7PFO4K+y7koi4oiIGNfPez0PzIiIU7PzR0bEsj7OnQRszLYv63V8FzDh1SenlHYATRHxJ9mhPwd+9+rzJKkv/qtN0pCQ3cT/DuBrEfEZem7E3w18lp5uwwXAY9moznrgHf28V3vWBfqNrBuyEvhXelrnXu0LwC0R0URPQNx7L9vPgR9HxEX0DAjo7TLg2xExFlgHfHjgv7Gkg5WjNSVJksqI3ZqSJEllxHAmSZJURgxnkiRJZcRwJkmSVEYMZ5IkSWXEcCZJklRGDGeSJEllxHAmSZJURv4vcjkJItaFK18AAAAASUVORK5CYII=\n",
207 | "text/plain": [
208 | ""
209 | ]
210 | },
211 | "metadata": {
212 | "needs_background": "light"
213 | },
214 | "output_type": "display_data"
215 | }
216 | ],
217 | "source": [
218 | "plt.figure(figsize=(10,6))\n",
219 | "plt.ylabel(\"Avg Fitness of Parents\")\n",
220 | "plt.xlabel(\"Generation\")\n",
221 | "plt.plot(generation_fitness)"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "### Test trained agent"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 22,
234 | "metadata": {},
235 | "outputs": [
236 | {
237 | "name": "stderr",
238 | "output_type": "stream",
239 | "text": [
240 | "/Users/brandonbrown/anaconda3/envs/deeprl/lib/python3.6/site-packages/ipykernel/__main__.py:14: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n"
241 | ]
242 | }
243 | ],
244 | "source": [
245 | "state = torch.from_numpy(env.reset()).float()\n",
246 | "done = False\n",
247 | "for i in range(200):\n",
248 | " action = get_action_from_agent_weights(agents[0],state)\n",
249 | " state, reward, done, info = env.step(action)\n",
250 | " state = torch.from_numpy(state).float()\n",
251 | " if done:\n",
252 | " print(\"Game over at time step {}\".format(i,))\n",
253 | " break\n",
254 | " env.render()\n",
255 | "env.close()"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "metadata": {},
262 | "outputs": [],
263 | "source": []
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": []
271 | }
272 | ],
273 | "metadata": {
274 | "kernelspec": {
275 | "display_name": "Python [default]",
276 | "language": "python",
277 | "name": "python3"
278 | },
279 | "language_info": {
280 | "codemirror_mode": {
281 | "name": "ipython",
282 | "version": 3
283 | },
284 | "file_extension": ".py",
285 | "mimetype": "text/x-python",
286 | "name": "python",
287 | "nbconvert_exporter": "python",
288 | "pygments_lexer": "ipython3",
289 | "version": "3.6.7"
290 | }
291 | },
292 | "nbformat": 4,
293 | "nbformat_minor": 2
294 | }
295 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.4
2 | backcall==0.2.0
3 | certifi==2024.2.2
4 | chardet==5.2.0
5 | cycler==0.12.1
6 | decorator==5.1.1
7 | future==1.0.0
8 | idna==3.7
9 | ipython==8.23.0
10 | ipython-genutils==0.2.0
11 | jedi==0.19.1
12 | kiwisolver==1.4.5
13 | matplotlib==3.8.4
14 | numpy==1.26.4
15 | parso==0.8.4
16 | pexpect==4.9.0
17 | pickleshare==0.7.5
18 | prompt-toolkit==3.0.43
19 | ptyprocess==0.7.0
20 | pyglet==1.5.21
21 | pygments==2.17.2
22 | pyparsing==3.1.2
23 | python-dateutil==2.9.0
24 | pytz==2024.1
25 | requests==2.31.0
26 | scipy==1.13.0
27 | simplegeneric==0.8.1
28 | six==1.16.0
29 | torch==2.2.2
30 | traitlets==5.14.2
31 | urllib3==2.2.1
32 | wcwidth==0.2.13
33 | notebook==7.1.2
34 | scikit-image==0.23.1
35 | gymnasium[atari]==0.29.1
36 | gymnasium[accept-rom-license]==0.29.1
37 | gym-super-mario-bros==7.4.0
38 | magent2==0.3.2
39 | pettingzoo==1.24.3
40 | torchvision==0.17.2
41 | gym-minigrid==1.2.2
42 | einops==0.7.0
43 |
--------------------------------------------------------------------------------