├── .gitignore
├── LICENSE
├── README.md
├── chimp
├── __init__.py
├── agents
│ ├── __init__.py
│ ├── agent_test.py
│ └── dqn_agent.py
├── learners
│ ├── __init__.py
│ ├── chainer_backend.py
│ ├── chainer_test.py
│ └── dqn_learner.py
├── memories
│ ├── __init__.py
│ ├── mem_test.py
│ ├── memory.py
│ └── replay_memory.py
├── pre_trained_nets
│ └── mountain_car.net
├── simulators
│ ├── __init__.py
│ ├── atari
│ │ ├── __init__.py
│ │ └── atari.py
│ ├── gym
│ │ ├── __init__.py
│ │ └── gym_wrapper.py
│ ├── mdp
│ │ ├── __init__.py
│ │ ├── cart_pole.py
│ │ ├── mdp_simulator.py
│ │ └── mountain_car.py
│ └── pomdp
│ │ ├── __init__.py
│ │ ├── models
│ │ ├── __init__.py
│ │ ├── rock_sample.py
│ │ ├── rock_test.py
│ │ ├── simulator.py
│ │ ├── tiger.py
│ │ └── tools
│ │ │ ├── __init__.py
│ │ │ ├── belief.py
│ │ │ ├── belief_momdp.py
│ │ │ └── distributions.py
│ │ └── sim_loop.py
└── utils
│ ├── __init__.py
│ ├── distributions.py
│ └── policies.py
├── examples
├── atari_tutorial.ipynb
├── mountain_car.ipynb
├── mountain_car_test.py
├── run_atari.py
├── run_cartpole.py
├── run_mountain_car.py
└── run_tiger.py
├── logos
├── chimp.png
└── monkey_text.png
├── roms
└── README.md
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.o
8 | *.so
9 | *.pyc
10 |
11 | # Packages #
12 | ############
13 | *.7z
14 | *.dmg
15 | *.gz
16 | *.iso
17 | *.jar
18 | *.rar
19 | *.tar
20 | *.zip
21 |
22 | # Logs and databases #
23 | ######################
24 | *.log
25 | *.sql
26 | *.sqlite
27 | *.hdf5
28 |
29 | # OS generated files #
30 | ######################
31 | *.DS_Store
32 | *.DS_Store?
33 | *._*
34 | *.Spotlight-V100
35 | *.Trashes
36 | *ehthumbs.db
37 | *Thumbs.db
38 |
39 | # Data files #
40 | ##############
41 | *.csv
42 | *.jld
43 | *.mat
44 | *.p
45 |
46 | # Images #
47 | ##########
48 | *.jpg
49 | *.jpeg
50 | *.bitmap
51 |
52 | # Documents #
53 | #############
54 | *.eps
55 | *.pdf
56 |
57 | # Misc #
58 | ########
59 | *.swp
60 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Chimp is a general purpose framework for deep reinforcement learning developed at the [Stanford Intelligent Systems Laboratory](http://sisl.stanford.edu/).
6 | Chimp is based on a simple four-part architecture to allow plug-and-play like capabilities for deep reinforcement
7 | learning experiments.
8 | This package was inspired by the Google DeepMind [paper](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) (V. Mnih, et al).
9 | Many of the architectural ideas were taken from DeepMind's
10 | [GORILA](http://arxiv.org/abs/1507.04296) framework and from the
11 | [paper](http://arxiv.org/abs/1508.04186) on distributed Deep Q-Learning by Ong, et al.
12 |
13 | # Installation
14 |
15 | First clone Chimp:
16 | ```
17 | git clone https://github.com/sisl/Chimp
18 | ```
19 | Then add the source directory to your `PYTHONPATH`.
20 |
21 | ```
22 | cd Chimp
23 | export PYTHONPATH=$(pwd):$PYTHONPATH
24 | ```
25 |
26 | You will also need numpy and scipy installed, as well as a deep learning backend. Currently only [Chainer](https://github.com/pfnet/chainer) is supported (TensorFlow coming soon).
27 |
28 | Once you have the dependencies installed you should be able to run the framework using a CPU. To use the GPU, you will need CUDA and a supported graphcis card.
29 |
30 | # Getting Started
31 |
32 | If you are interested in using it for your own reinforcement learning problems check out the [mountain car tutorial](https://github.com/sisl/Chimp/blob/master/examples/mountain_car.ipynb) to get an idea of how to write your own simulator class. If you would like to use Chimp with the Atari Learning Environemnt check out the [Atari tutorial](https://github.com/sisl/Chimp/blob/master/examples/atari_tutorial.ipynb) to get started.
33 |
34 | # Architecture
35 |
36 | Chimp consists of four main modules: Agent, Learner, Simulator, and Memory. Such decomposition leads to a very powerful and flexible framework for reinforcement learning experiments, where one can quickly switch between simulators, replay memory implementations, and various deep learning backends.
37 |
38 | Chimp is also powerful in its flexible handling of inputs to the deep neural network.
39 | The user can specify the history lengths for observations, actions, and even rewards that they want to use as inputs to the model and Chimp will handle the rest.
40 |
41 | The specification of the input size is in the form of a tuple ```(s_size, a_size, r_size)```. For the DeepMind Atari experiments, this setting would look like (4,0,0): they use four image frames per input and no action or reward history.
42 |
43 | # Components
44 |
45 | * Memory (implements experience replay)
46 | * Currently, we support in-memory numpy arrays and HDF5 allocated storage
47 |
48 | * Learner ("brain" of the algorithm that does forward and backward passes in a neural net)
49 | * We support DQN with arbitrary observation/action history lengths as input
50 | * Planning to add LSTM + actor-critic framework
51 |
52 | * Simulator (environment for the agent to interact with)
53 | * Single-player Arcade Learning Environment
54 | * MDPs
55 |
56 | * Agent (general framework that handles all interactions between a learner, a memory, and a simulator)
57 |
58 | # Dependencies
59 |
60 | Chimp relies on existing deep learning back-ends. Currently only [Chainer](http://chainer.org/) is supported.
61 |
62 | Required Python packages:
63 | * [Chainer](https://github.com/pfnet/chainer)
64 | * NumPy
65 | * SciPy
66 |
67 | Recommended libraries (some functionality will be absent without them):
68 | * Pygame
69 | * CUDA
70 | * Arcade Learning Environment
71 |
72 | # Authors
73 |
74 | The original authors of this software are: Yegor Tkachenko, Max Egorov, Hao Yi Ong.
75 |
76 | # License
77 |
78 | The software is distributed under the Apache License 2.0
79 |
--------------------------------------------------------------------------------
/chimp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/chimp/__init__.py
--------------------------------------------------------------------------------
/chimp/agents/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Agent '''
2 |
3 | from dqn_agent import DQNAgent
--------------------------------------------------------------------------------
/chimp/agents/agent_test.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a place holder for real unit testing.
3 | Right now we just overfit a simple control problem:
4 | - the agent tries to get to the top right corner (1,1) of a 2D map
5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
6 | - action 1 is optimal for all states
7 | """
8 |
9 | from chimp.learners.chainer_learner import ChainerLearner
10 | from chimp.learners.dqn_learner import DQNLearner
11 | from chimp.learners.dqn_learner import DQNPolicy
12 |
13 | from chimp.agents.dqn_agent import DQNAgent
14 |
15 | from chimp.simulators.mdp.mountain_car import MountainCar
16 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
17 |
18 | from chimp.memories.replay_memory import ReplayMemoryHDF5
19 |
20 | from chimp.utils.policies import *
21 |
22 | import numpy as np
23 |
24 | import chainer
25 | import chainer.functions as F
26 | import chainer.links as L
27 | from chainer import Chain
28 |
29 | settings = {
30 |
31 | # agent settings
32 | 'batch_size' : 32,
33 | 'print_every' : 1000,
34 | 'save_dir' : 'results',
35 | 'iterations' : 3000,
36 | 'eval_iterations' : 200,
37 | 'eval_every' : 1000,
38 | 'save_every' : 1000,
39 | 'initial_exploration' : 10000,
40 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step
41 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
42 | 'epsilon' : 1.0, # Initial exploratoin rate
43 | 'learn_freq' : 1,
44 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
45 | 'model_dims' : (1,2),
46 |
47 | # simulator settings
48 | 'viz' : False,
49 |
50 | # replay memory settings
51 | 'memory_size' : 20000, # size of replay memory
52 | 'n_frames' : 1, # number of frames
53 |
54 | # learner settings
55 | 'learning_rate' : 0.0001,
56 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
57 | 'discount' : 0.95, # discount rate for RL
58 | 'clip_err' : False, # value to clip loss gradients to
59 | 'clip_reward' : False, # value to clip reward values to
60 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
61 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
62 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
63 | 'gpu' : False,
64 | 'reward_rescale': False,
65 |
66 | # general
67 | 'seed_general' : 1723,
68 | 'seed_simulator' : 5632,
69 | 'seed_agent' : 9826,
70 | 'seed_memory' : 7563
71 |
72 | }
73 |
74 |
75 | mdp = MountainCar()
76 | simulator = MDPSimulator(mdp)
77 |
78 |
79 | class TestNet(Chain):
80 |
81 | def __init__(self):
82 | super(TestNet, self).__init__(
83 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
84 | l2=F.Linear(20, 10, bias=0.0),
85 | bn1=L.BatchNormalization(10),
86 | l3=F.Linear(10, 10),
87 | l4=F.Linear(10, 10),
88 | bn2=L.BatchNormalization(10),
89 | lout=F.Linear(10, simulator.n_actions)
90 | )
91 | self.train = True
92 | # initialize avg_var to prevent divide by zero
93 | self.bn1.avg_var.fill(0.1),
94 | self.bn2.avg_var.fill(0.1),
95 |
96 | def __call__(self, ohist, ahist):
97 | h = F.relu(self.l1(ohist))
98 | h = F.relu(self.l2(h))
99 | h = self.bn1(h, test=not self.train)
100 | h = F.relu(self.l3(h))
101 | h = F.relu(self.l4(h))
102 | h = self.bn2(h, test=not self.train)
103 | output = self.lout(h)
104 | return output
105 |
106 |
107 |
108 | net = TestNet()
109 | custom_learner = ChainerLearner(settings)
110 | custom_learner.set_net(net)
111 | learner = DQNLearner(settings, custom_learner)
112 |
113 | memory = ReplayMemoryHDF5(settings)
114 |
115 | agent = DQNAgent(learner, memory, simulator, settings)
116 |
117 | agent.train(verbose=True)
118 |
--------------------------------------------------------------------------------
/chimp/agents/dqn_agent.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from copy import deepcopy
4 | import pickle
5 | from timeit import default_timer as timer
6 |
7 | from chimp.utils.policies import RandomPolicy
8 | from chimp.utils.policies import DQNPolicy
9 |
10 | class DQNAgent(object):
11 |
12 | def __init__(self, learner, memory, simulator, settings, dqn_policy=None, rollout_policy=None):
13 |
14 | """
15 | The learning agent is responsible for communicating and moving
16 | data between the three modules: Learner, Simulator, Memory
17 | Inputs:
18 | - learner: containes the neural network and the optimizer to train it
19 | - memory: expereince replay memory that can be minibatch sampled
20 | - simulator: simulates the environemnt
21 | - settings: hyper parameters for training
22 | - rollout_policy: rollout policy, random by default
23 | """
24 |
25 | self.learner = learner
26 | self.memory = memory
27 | self.simulator = simulator # for populating the experience replay
28 | self.evaluator = deepcopy(simulator) # for evaluation
29 |
30 | self.dqn_policy = dqn_policy
31 | if dqn_policy is None:
32 | self.dqn_policy = DQNPolicy(learner)
33 |
34 | self.rollout_policy = rollout_policy
35 | if rollout_policy is None:
36 | self.rollout_policy = RandomPolicy(simulator.n_actions)
37 |
38 | self.set_params(settings)
39 |
40 | self.n_epochs = self.iterations / float(memory.memory_size)
41 | self.iteration = []
42 | self.loss = []
43 | self.q_ave = []
44 | self.eval_iteration = []
45 | self.r_eval = []
46 | self.r_per_episode_eval = []
47 |
48 | def policy(self, obs, epsilon):
49 | """
50 | e-greedy policy with customazible rollout
51 | """
52 | if self.random_state.rand() < epsilon:
53 | return self.rollout_policy.action(obs)
54 | else:
55 | return self.dqn_policy.action(obs)
56 |
57 |
58 | def save(self,obj,name):
59 | ''' function to save a file as pickle '''
60 | # TODO: don't you need to close the I/O stream?
61 | pickle.dump(obj, open(name, "wb"))
62 |
63 | def load(self,name):
64 | ''' function to load a pickle file '''
65 | return pickle.load(open(name, "rb"))
66 |
67 |
68 | def train(self, verbose=True):
69 | """
70 | Trains the network
71 | """
72 | learner = self.learner
73 | memory = self.memory
74 | simulator = self.simulator
75 |
76 | if self.viz:
77 | simulator.init_viz_display()
78 |
79 | # run initial exploration and populate the experience replay
80 | self.populate_memory(self.initial_exploration)
81 |
82 | # add initial observation to observatin history
83 | iobs = simulator.get_screenshot().copy()
84 | self.initial_obs(iobs)
85 |
86 | iteration = 0 # keeps track of all training iterations, ignores evaluation
87 | run_time = 0.0
88 | start_time = timer() # mark the global beginning of training
89 | last_print = timer()
90 |
91 | while iteration < self.iterations: # for the set number of iterations
92 |
93 | # perform a single simulator step
94 | self.step()
95 | # minibatch update for DQN
96 | if iteration % self.learn_freq == 0:
97 | loss, qvals = self.batch_update()
98 | self.iteration.append(iteration)
99 | self.loss.append(loss)
100 | self.q_ave.append(np.mean(qvals))
101 |
102 | if iteration % self.print_every == 0 and verbose:
103 | print "Iteration: %d, Loss: %.3f, Average Q-Values: %.2f, Time since print: %.2f, Total runtime: %.2f, epsilon: %.2f" % (iteration, loss, np.mean(qvals), timer() - last_print, timer() - start_time, self.epsilon)
104 | last_print = timer()
105 |
106 | if iteration % self.save_every == 0:
107 | # saving the net, the training history, and the learner itself
108 | learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration)))
109 | np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T)
110 |
111 | if iteration % self.eval_every == 0: # evaluation
112 | sim_r, sim_r_per_episode, sim_time = self.simulate(self.eval_iterations, self.eval_epsilon)
113 | self.eval_iteration.append(iteration)
114 | self.r_eval.append(sim_r)
115 | self.r_per_episode_eval.append(sim_r_per_episode)
116 |
117 | if verbose:
118 | print "Evaluation, total reward: %.2f, Reward per episode: %.2f" % (sim_r, sim_r_per_episode)
119 |
120 | np.savetxt('%s/evaluation_history.csv' % self.save_dir, np.asarray([self.eval_iteration, self.r_eval, self.r_per_episode_eval]).T)
121 |
122 | if iteration % self.target_net_update == 0:
123 | learner.copy_net_to_target_net()
124 |
125 | self.epsilon -= self.epsilon_decay
126 | self.epsilon = 0.1 if self.epsilon < 0.1 else self.epsilon
127 |
128 | iteration += 1
129 |
130 | memory.close()
131 |
132 | learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration)))
133 | np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T)
134 |
135 | run_time = timer() - start_time
136 | print('Overall training + evaluation time: '+ str(run_time))
137 |
138 |
139 |
140 | def step(self):
141 | """
142 | Performs a single step with the DQN and updates the replay memory
143 | """
144 | loss = 0.0
145 |
146 | simulator = self.simulator
147 |
148 | obs = simulator.get_screenshot().copy()
149 | a = self.policy((self.ohist, self.ahist), self.epsilon)
150 | simulator.act(a)
151 | r = simulator.reward()
152 |
153 | term = False
154 | obsp = None
155 | if simulator.episode_over():
156 | term = True
157 | obsp = obs.copy()
158 | simulator.reset_episode()
159 | iobs = simulator.get_screenshot().copy()
160 | self.empty_history()
161 | self.initial_obs(iobs)
162 | else:
163 | obsp = simulator.get_screenshot().copy()
164 | self.update_history(obsp, a)
165 |
166 | if self.viz: # move the image to the screen / shut down the game if display is closed
167 | simulator.refresh_viz_display()
168 |
169 | self.memory.store_tuple(obs, a, r, obsp, term)
170 |
171 |
172 | def batch_update(self):
173 | """
174 | Performs a mini-batch update on the DQN
175 | """
176 | ohist, ahist, rhist, ophist, term = self.memory.minibatch()
177 | # take the last as our action and reward
178 | a = ahist[:,-1]
179 | r = rhist[:,-1]
180 | t = term[:,-1]
181 | oahist = None
182 | # TODO: this indexing is a hack to deal with single sample history
183 | # Using the first history entry of the minibatch (there is only one) - could do this with reshape as well
184 | if self.ahist_size == 0 or self.ohist_size == 1:
185 | oahist = (ohist[:,0], None)
186 | oaphist = (ophist[:,0], None)
187 | else:
188 | oahist = (ohist, ahist[:self.ahist_size])
189 | oaphist = (ophist, ahist[1:self.ahist_size])
190 | loss, qvals = self.learner.update(oahist, a, r, oaphist, t)
191 | return loss, qvals
192 |
193 |
194 | #################################################################
195 | ################### Some Utility Functions ######################
196 | #################################################################
197 |
198 | def simulate(self, nsteps, epsilon, viz=False):
199 | """
200 | Simulates the DQN policy
201 | """
202 | simulator = self.evaluator # use a different simulator to prevent breaks
203 | simulator.reset_episode()
204 | # add initial observation to observation history
205 | iobs = simulator.get_screenshot().copy()
206 | self.initial_eval_obs(iobs)
207 |
208 | if self.viz:
209 | simulator.init_viz_display()
210 |
211 | rtot = 0.0
212 | r_per_episode = 0.0
213 | episode_count = 0
214 | start_sim = timer()
215 | for i in xrange(nsteps):
216 | # generate reward and step the simulator
217 | ohist, ahist = self.eval_ohist, self.eval_ahist
218 | a = self.policy((ohist, ahist), epsilon)
219 |
220 | simulator.act(a)
221 | r = simulator.reward()
222 | rtot += r
223 | if simulator.episode_over():
224 | simulator.reset_episode()
225 | iobs = simulator.get_screenshot().copy()
226 | self.empty_eval_history()
227 | self.initial_eval_obs(iobs)
228 | episode_count += 1
229 | r_per_episode = rtot
230 | else:
231 | obsp = simulator.get_screenshot().copy()
232 | self.update_eval_history(obsp, a)
233 |
234 | if self.viz: # move the image to the screen / shut down the game if display is closed
235 | simulator.refresh_viz_display()
236 |
237 | if episode_count > 0:
238 | r_per_episode /= episode_count
239 | else:
240 | r_per_episode = rtot
241 | runtime = timer() - start_sim
242 | return rtot, r_per_episode, runtime
243 |
244 |
245 | def populate_memory(self, nsamples):
246 | # TODO: do we need to copy obs and obsp?
247 | memory = self.memory
248 | simulator = self.simulator
249 |
250 | simulator.reset_episode()
251 | for i in xrange(nsamples):
252 | # generate o, a, r, o' tuples
253 | obs = simulator.get_screenshot().copy()
254 | a = self.rollout_policy.action(obs)
255 | simulator.act(a)
256 | r = simulator.reward()
257 | obsp = simulator.get_screenshot().copy()
258 | term = False
259 | if simulator.episode_over():
260 | term = True
261 | simulator.reset_episode() # reset
262 | # store the tuples
263 | memory.store_tuple(obs, a, r, obsp, term)
264 | simulator.reset_episode()
265 |
266 |
267 | def plot_loss(self):
268 | try:
269 | from matplotlib import pyplot
270 | except ImportError:
271 | "Can not plot loss, matplotlib required"
272 | pyplot.plot(self.loss[1:])
273 | pyplot.xlabel("Iteration")
274 | pyplot.ylabel("Loss")
275 | pyplot.show()
276 |
277 | def plot_per_sim_reward(self):
278 | try:
279 | from matplotlib import pyplot
280 | except ImportError:
281 | "Can not plot reward, matplotlib required"
282 | pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_eval)
283 | pyplot.xlabel("Iteration")
284 | pyplot.ylabel("Reward")
285 | pyplot.title("Total Reward Per Evaluation")
286 | pyplot.show()
287 |
288 | def plot_per_episode_reward(self):
289 | try:
290 | from matplotlib import pyplot
291 | except ImportError:
292 | "Can not plot loss, matplotlib required"
293 | pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_per_episode_eval)
294 | pyplot.xlabel("Reward")
295 | pyplot.ylabel("Loss")
296 | pyplot.title("Average Reward Per Episode")
297 | pyplot.show()
298 |
299 |
300 |
301 | def set_params(self, settings):
302 | # set up the setting parameters
303 | self.random_state = np.random.RandomState(settings.get('seed_agent', None)) # change to a new random seed
304 |
305 | self.batch_size = settings.get('batch_size', 32)
306 | self.n_frames = settings.get('n_frames', 1)
307 | self.iterations = settings.get('iterations', 1000000)
308 |
309 | self.epsilon = settings.get('epsilon', 1.0) # exploration
310 | self.epsilon_decay = settings.get('epsilon_decay', 0.00001) # decay in
311 | self.eval_epsilon = settings.get('eval_epsilon', 0.0) # exploration during evaluation
312 | self.initial_exploration = settings.get('initial_exploration', 10000) # of iterations during initial exploration
313 |
314 | self.viz = settings.get('viz', False) # whether to visualize the state/observation, False when not supported by simulator
315 |
316 | self.eval_iterations = settings.get('eval_iterations', 500)
317 | self.eval_every = settings.get('eval_every', 5000)
318 | self.print_every = settings.get('print_every', 5000)
319 | self.save_every = settings.get('save_every', 5000)
320 | self.save_dir = settings.get('save_dir', '.')
321 | # create the directory if it doesnt exist
322 | if not os.path.isdir(self.save_dir):
323 | os.makedirs(self.save_dir)
324 |
325 | self.learn_freq = settings.get('learn_freq', 1) # how frequently to do back prop on a minibatch
326 | self.target_net_update = settings.get('target_net_update', 5000)
327 |
328 | self.ohist_size, self.ahist_size, self.rhist_size = settings.get('history_sizes', (1,0,0))
329 | self.ahist_size = 1 if self.ahist_size == 0 else self.ahist_size
330 | self.ohist_size = 1 if self.ohist_size == 0 else self.ohist_size
331 |
332 | self.ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
333 | self.ahist = np.zeros(self.ahist_size, dtype=np.int32)
334 | self.rev_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
335 | self.rev_ahist = np.zeros(self.ahist_size, dtype=np.int32)
336 |
337 | self.eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
338 | self.eval_ahist = np.zeros(self.ahist_size, dtype=np.int32)
339 | self.rev_eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
340 | self.rev_eval_ahist = np.zeros(self.ahist_size, dtype=np.int32)
341 |
342 | #################################################################
343 | ################# History utility functions #####################
344 | #################################################################
345 | """
346 | These are messy, and could be optimized
347 | """
348 |
349 | def update_history(self, obs, a):
350 | # roll the histories forward and replace the first entry
351 | # keep a reversed history so we can easily roll though it
352 | self.rev_ohist = np.roll(self.rev_ohist, 1, axis=0)
353 | self.rev_ahist = np.roll(self.rev_ahist, 1, axis=0)
354 | self.rev_ahist[0] = a
355 | self.rev_ohist[0] = obs
356 |
357 | # reverse to get history in [s0, s1, s2,...,sn] format
358 | self.ohist = np.flipud(self.rev_ohist)
359 | self.ahist = np.flipud(self.rev_ahist)
360 |
361 |
362 | def update_eval_history(self, obs, a):
363 | # roll the histories forward and replace the first entry
364 | self.rev_eval_ohist = np.roll(self.rev_eval_ohist, 1, axis=0)
365 | self.rev_eval_ahist = np.roll(self.rev_eval_ahist, 1, axis=0)
366 | self.rev_eval_ahist[0] = a
367 | self.rev_eval_ohist[0] = obs
368 |
369 | self.eval_ohist = np.flipud(self.rev_eval_ohist)
370 | self.eval_ahist = np.flipud(self.rev_eval_ahist)
371 |
372 | def initial_obs(self, obs):
373 | self.rev_ohist[0] = obs
374 | self.ohist[-1] = obs
375 |
376 | def initial_eval_obs(self, obs):
377 | self.rev_eval_ohist[0] = obs
378 | self.eval_ohist[-1] = obs
379 |
380 |
381 | def empty_history(self):
382 | self.ohist.fill(self.memory._emptyfloat)
383 | self.ahist.fill(self.memory._emptyint)
384 | self.rev_ohist.fill(self.memory._emptyfloat)
385 | self.rev_ahist.fill(self.memory._emptyint)
386 |
387 | def empty_eval_history(self):
388 | self.eval_ohist.fill(self.memory._emptyfloat)
389 | self.eval_ahist.fill(self.memory._emptyint)
390 | self.rev_eval_ohist.fill(self.memory._emptyfloat)
391 | self.rev_eval_ahist.fill(self.memory._emptyint)
392 |
393 |
--------------------------------------------------------------------------------
/chimp/learners/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Learner '''
2 |
--------------------------------------------------------------------------------
/chimp/learners/chainer_backend.py:
--------------------------------------------------------------------------------
1 | '''
2 | (Double) Deep Q-Learning Algorithm Implementation
3 | Supports double deep Q-learning with on either GPU and CPU
4 |
5 | '''
6 |
7 | import numpy as np
8 | import chainer
9 | import chainer.functions as F
10 | from chainer import optimizers
11 | from chainer import cuda
12 | from copy import deepcopy
13 |
14 | import pickle # used to save the nets
15 |
16 | class ChainerBackend(object):
17 |
18 | def __init__(self, settings, net = None):
19 |
20 | self.set_params(settings)
21 |
22 | self.source_net = None
23 | self.target_net = None
24 | if net is not None:
25 | self.set_net(net)
26 |
27 |
28 | def update(self, obs, a, r, obsp, term):
29 | """
30 | Performs a single mini-batch update
31 | """
32 |
33 | self.source_net.zerograds() # reset gradient storage to zero
34 |
35 | # compute loss and qval output layer
36 | loss, qvals = self.forward_loss(obs, a, r, obsp, term)
37 |
38 | qvals.backward() # propagate the loss gradient through the net
39 | self.optimizer.update() # carry out parameter updates based on the distributed gradients
40 | if self.gpu:
41 | return loss, qvals.data.get()
42 | else:
43 | return loss, qvals.data
44 |
45 |
46 | def forward_loss(self, obs, a, r, obsp, term):
47 | """
48 | Computes the loss and gradients
49 | """
50 | if self.gpu:
51 | return self.forward_loss_gpu(obs, a, r, obsp, term)
52 | else:
53 | return self.forward_loss_cpu(obs, a, r, obsp, term)
54 |
55 |
56 | def forward_loss_gpu(self, obs, a, r, obsp, term):
57 | # unpack
58 | ohist, ahist = obs
59 | ophist, aphist = obsp
60 |
61 | # move to GPU
62 | ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist)
63 | ophist, aphist = self.to_gpu(ophist), self.to_gpu(aphist)
64 |
65 | # transfer inputs into Chainer format
66 | ohist, ophist = chainer.Variable(ohist), chainer.Variable(ophist, volatile = True)
67 | ahist, aphist = chainer.Variable(ahist), chainer.Variable(aphist, volatile = True)
68 |
69 | # get target Q
70 | target_q_all = self.target_net(ophist, aphist) # forward prop
71 | target_q_max = np.max(target_q_all.data.get(), axis=1) # max Q for each entry in mini-batch
72 |
73 | # compute the target values for each entry in mini-batch
74 | target_q_vals = r + self.discount * target_q_max * np.invert(term)
75 |
76 | # compute the source q-vals
77 | source_q_all = self.source_net(ohist, ahist) # forward prop
78 | source_q_vals = source_q_all.data.get()[np.arange(source_q_all.data.shape[0]), a]
79 |
80 | # compute the loss grads
81 | qdiff = source_q_vals - target_q_vals
82 |
83 | # distribute the loss gradient into the shape of the net's output
84 | dQ = np.zeros(source_q_all.data.shape, dtype=np.float32)
85 | dQ[np.arange(dQ.shape[0]), a] = qdiff
86 |
87 | # set as the output grad layer
88 | source_q_all.grad = self.to_gpu(dQ)
89 |
90 | # compute loss
91 | loss = np.mean(dQ**2)
92 |
93 | return loss, source_q_all
94 |
95 |
96 | def forward_loss_cpu(self, obs, a, r, obsp, term):
97 | # unpack
98 | ohist, ahist = obs
99 | ophist, aphist = obsp
100 |
101 | # transfer inputs into Chainer format
102 | ohist, ophist = self.chainer_var(ohist), self.chainer_var(ophist, volatile = True)
103 | ahist, aphist = self.chainer_var(ahist), self.chainer_var(aphist, volatile = True)
104 |
105 | # get target Q
106 | target_q_all = self.target_net(ophist, aphist)
107 | target_q_max = np.max(target_q_all.data, axis=1)
108 |
109 | # compute the target values for each entry in mini-batch
110 | target_q_vals = r + self.discount * target_q_max * np.invert(term)
111 |
112 | # compute the source q-vals
113 | source_q_all = self.source_net(ohist, ahist) # forward prop
114 | source_q_vals = source_q_all.data[np.arange(source_q_all.data.shape[0]),a]
115 |
116 | # compute the loss
117 | qdiff = source_q_vals - target_q_vals
118 |
119 | # distribute the loss gradient into the shape of the net's output
120 | dQ = np.zeros(source_q_all.data.shape, dtype=np.float32)
121 | dQ[np.arange(dQ.shape[0]), a] = qdiff
122 |
123 | # set as the output grad layer
124 | source_q_all.grad = dQ
125 |
126 | # compute loss
127 | loss = np.mean(dQ**2)
128 |
129 | return loss, source_q_all
130 |
131 |
132 | def forward(self, obs):
133 | """
134 | Returns the Q-values for the network input obs
135 | """
136 | # turn train off for bn, dropout, etc
137 | self.source_net.train = False
138 | if self.gpu:
139 | return self.forward_gpu(obs)
140 | else:
141 | return self.forward_cpu(obs)
142 |
143 |
144 | def forward_cpu(self, obs):
145 | """
146 | Performs forward pass on CPU, returns Q values
147 | """
148 | # unpack
149 | ohist, ahist = obs
150 | # transfer inputs into Chainer format
151 | ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True)
152 | # evaluate
153 | qvals = self.source_net(ohist, ahist)
154 | return qvals.data
155 |
156 | def forward_gpu(self, obs):
157 | """
158 | Performs forward pass on CPU, returns Q values
159 | """
160 | # unpack
161 | ohist, ahist = obs
162 | # move to gpu
163 | ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist)
164 | # transfer inputs into Chainer format
165 | ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True)
166 | # evaluate
167 | qvals = self.source_net(ohist, ahist)
168 | return qvals.data.get()
169 |
170 | #################################################################
171 | #################### Utility Functions ##########################
172 | #################################################################
173 |
174 | def to_gpu(self, var):
175 | if var is None:
176 | return None
177 | return cuda.to_gpu(var)
178 |
179 | def chainer_var(self, var, volatile=False):
180 | if var is None:
181 | return None
182 | return chainer.Variable(var, volatile=volatile)
183 |
184 | def set_net(self, net):
185 | self.source_net = deepcopy(net)
186 | self.target_net = deepcopy(net)
187 | if self.gpu:
188 | cuda.get_device(0).use()
189 | self.source_net.to_gpu()
190 | self.target_net.to_gpu()
191 | self.optimizer.setup(self.source_net)
192 | self.target_net.train = False
193 |
194 |
195 | def params(self):
196 | ''' collect net parameters (coefs and grads) '''
197 | self.source_net.params()
198 |
199 |
200 | def set_params(self, params):
201 |
202 | self.gpu = params.get('gpu',False)
203 | self.learning_rate = params.get('learning_rate',0.00025)
204 | self.decay_rate = params.get('decay_rate',0.95)
205 | self.discount = params.get('discount',0.95)
206 | self.clip_err = params.get('clip_err',False)
207 | self.target_net_update = params.get('target_net_update',10000)
208 | self.double_DQN = params.get('double_DQN',False)
209 |
210 | # setting up various possible gradient update algorithms
211 | opt = params.get('optim_name', 'ADAM')
212 | if opt == 'RMSprop':
213 | self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate)
214 |
215 | elif opt == 'ADADELTA':
216 | print("Supplied learning rate not used with ADADELTA gradient update method")
217 | self.optimizer = optimizers.AdaDelta()
218 |
219 | elif opt == 'ADAM':
220 | self.optimizer = optimizers.Adam(alpha=self.learning_rate)
221 |
222 | elif opt == 'SGD':
223 | self.optimizer = optimizers.SGD(lr=self.learning_rate)
224 |
225 | else:
226 | print('The requested optimizer is not supported!!!')
227 | exit()
228 |
229 | if self.clip_err is not False:
230 | self.optimizer.add_hook(chainer.optimizer.GradientClipping(self.clip_err))
231 |
232 | self.optim_name = params['optim_name']
233 |
--------------------------------------------------------------------------------
/chimp/learners/chainer_test.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a place holder for real unit testing.
3 | Right now we just overfit a simple control problem:
4 | - the agent tries to get to the top right corner (1,1) of a 2D map
5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
6 | - action 1 is optimal for all states
7 | """
8 |
9 | from chimp.learners.chainer_backend import ChainerBackend
10 | from chimp.learners.dqn_learner import DQNLearner
11 | from chimp.learners.dqn_learner import DQNPolicy
12 |
13 | import numpy as np
14 |
15 | import chainer
16 | import chainer.functions as F
17 | import chainer.links as L
18 | from chainer import Chain
19 |
20 | settings = {
21 |
22 | # agent settings
23 | 'batch_size' : 32,
24 | 'print_every' : 500,
25 | 'save_dir' : 'results/nets_rocksample_belief_rmsprop',
26 | 'iterations' : 100000,
27 | 'eval_iterations' : 100,
28 | 'eval_every' : 1000,
29 | 'save_every' : 500,
30 | 'initial_exploration' : 500,
31 | 'epsilon_decay' : 0.00001, # subtract from epsilon every step
32 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
33 | 'epsilon' : 1.0, # Initial exploratoin rate
34 | 'learn_freq' : 1,
35 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
36 | 'model_dims' : (1,2),
37 |
38 | # simulator settings
39 | 'viz' : False,
40 |
41 | # replay memory settings
42 | 'memory_size' : 1000, # size of replay memory
43 | 'n_frames' : 1, # number of frames
44 |
45 | # learner settings
46 | 'learning_rate' : 0.00025,
47 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
48 | 'discount' : 0.95, # discount rate for RL
49 | 'clip_err' : False, # value to clip loss gradients to
50 | 'clip_reward' : 1, # value to clip reward values to
51 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
52 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
53 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
54 | 'gpu' : False,
55 | 'reward_rescale': False,
56 |
57 | # general
58 | 'seed_general' : 1723,
59 | 'seed_simulator' : 5632,
60 | 'seed_agent' : 9826,
61 | 'seed_memory' : 7563
62 |
63 | }
64 |
65 | n_actions = 2
66 | o_dims = settings['model_dims']
67 | n_samples = settings['batch_size']
68 |
69 | class TestNet(Chain):
70 |
71 | def __init__(self):
72 | super(TestNet, self).__init__(
73 | #l1=F.Bilinear(settings["history_sizes"][0], settings["history_sizes"][1], 20),
74 | l1=F.Linear(o_dims[1], 20, bias=0.0),
75 | l2=F.Linear(20, 10, bias=0.0),
76 | bn1=L.BatchNormalization(10),
77 | lout=F.Linear(10, n_actions)
78 | )
79 | self.train = True
80 | # initialize avg_var to prevent divide by zero
81 | self.bn1.avg_var.fill(0.1),
82 |
83 | def __call__(self, ohist, ahist):
84 | h = F.relu(self.l1(ohist))
85 | h = F.relu(self.l2(h))
86 | h = self.bn1(h, test=not self.train)
87 | output = self.lout(h)
88 | return output
89 |
90 | def make_batch(n_samples, o_dims, n_actions):
91 | obs = np.zeros((n_samples,)+o_dims, dtype=np.float32)
92 | obsp = np.zeros((n_samples,)+o_dims, dtype=np.float32)
93 | a = np.zeros(n_samples, dtype=np.int32)
94 | r = np.zeros(n_samples, dtype=np.float32)
95 | term = np.zeros(n_samples, dtype=np.bool)
96 | for i in xrange(n_samples):
97 | obs[i] = np.random.uniform(0.0, 1.0, o_dims)
98 | a[i] = np.random.randint(n_actions)
99 | obsp[i] = (obs[i] + 0.25) if a[i] == 1 else (obs[i] - 0.25)
100 | obsp[i] = np.clip(obsp[i], 0.0, 1.0)
101 | r[i] = np.sum(obs[i])
102 | return obs, a, r, obsp, term
103 |
104 |
105 | net = TestNet()
106 | custom_learner = ChainerBackend(settings)
107 | custom_learner.set_net(net)
108 |
109 | learner = DQNLearner(settings, custom_learner)
110 |
111 | policy = DQNPolicy(learner)
112 |
113 | obst, a, r, obsp, term = make_batch(10, o_dims, n_actions)
114 |
115 | for i in xrange(10):
116 | ohist = (obst[i], None)
117 | a = policy.action(ohist)
118 | print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None))
119 |
120 | print "TRAINING"
121 | for i in xrange(3000):
122 | obs, a, r, obsp, term = make_batch(n_samples, o_dims, n_actions)
123 | ohist = (obs, None)
124 | ophist = (obsp, None)
125 | #loss, q_all = custom_learner.forward_loss(ohist, a, r, ophist, term)
126 | loss, q_all = learner.update(ohist, a, r, ophist, term)
127 | if i % 500 == 0:
128 | print loss
129 |
130 |
131 | for i in xrange(10):
132 | ohist = (obst[i], None)
133 | a = policy.action(ohist)
134 | print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None))
135 |
136 |
137 |
--------------------------------------------------------------------------------
/chimp/learners/dqn_learner.py:
--------------------------------------------------------------------------------
1 | '''
2 | (Double) Deep Q-Learning Algorithm Implementation
3 | Supports double deep Q-learning with on either GPU and CPU
4 |
5 | '''
6 |
7 | import numpy as np
8 | import pickle # used to save the nets
9 | from copy import deepcopy
10 |
11 | class DQNLearner(object):
12 |
13 | def __init__(self, settings, backend):
14 |
15 | """
16 | Functions that must be defined by the custom learner:
17 | - forward_loss(obs, a, r, obsp, term) # computes scores and loss
18 | - forward(obs) # computes scores
19 | - update(obs, a, r, obsp) # update the params
20 | - get_net() # returns the network object
21 | - set_net(net) # sets the source and target nets and moves to gpu (if needed)
22 | Fields owned by the learner:
23 | - source_net: generates source Q-vals
24 | - target_net: generates target Q-vals
25 | """
26 |
27 | self.backend = backend
28 |
29 | self.clip_reward = settings.get('clip_reward', False)
30 | self.reward_rescale = settings.get('reward_rescale', False)
31 | self.r_max = 1 # keep the default value at 1
32 |
33 |
34 | def update(self, obs, a, r, obsp, term):
35 | r = self.pre_process_reward(r)
36 | return self.backend.update(obs, a, r, obsp, term)
37 |
38 | def forward_loss(self, obs, a, r, obsp, term):
39 | return self.backend.forward_loss(obs, a, r, obsp, term)
40 |
41 | def forward(self, obs):
42 | return self.backend.forward(obs)
43 |
44 | def copy_net_to_target_net(self):
45 | ''' update target net with the current net '''
46 | self.backend.target_net = deepcopy(self.backend.source_net)
47 |
48 | def save(self,obj,name):
49 | pickle.dump(obj, open(name, "wb"))
50 |
51 | def load(self,name):
52 | return pickle.load(open(name, "rb"))
53 |
54 | def save_net(self,name):
55 | ''' save a net to a path '''
56 | self.save(self.backend.source_net,name)
57 |
58 | def load_net(self,net):
59 | ''' load in a net from path or a variable'''
60 | if isinstance(net, str): # if it is a string, load the net from the path
61 | net = self.load(net)
62 | self.backend.set_net(net)
63 |
64 |
65 | def save_training_history(self, path='.'):
66 | ''' save training history '''
67 | train_hist = np.array([range(len(self.train_rewards)),self.train_losses,self.train_rewards, self.train_qval_avgs, self.train_episodes, self.train_times]).T
68 | eval_hist = np.array([range(len(self.val_rewards)),self.val_losses,self.val_rewards, self.val_qval_avgs, self.val_episodes, self.val_times]).T
69 | # TODO: why is this here and not in agent?
70 | np.savetxt(path + '/training_hist.csv', train_hist, delimiter=',')
71 | np.savetxt(path + '/evaluation_hist.csv', eval_hist, delimiter=',')
72 |
73 | def params(self):
74 | """
75 | Returns an iterator over netwok parameters
76 | Note: different back-ends will return different param containers
77 | """
78 | # TODO: return a dictionary here?
79 | self.backend.params()
80 |
81 |
82 | def pre_process_reward(self, r):
83 | """
84 | Clips and re-scales the rewards
85 | """
86 | if self.clip_reward:
87 | r = np.clip(r,-self.clip_reward,self.clip_reward)
88 | if self.reward_rescale:
89 | self.r_max = max(np.amax(np.absolute(r)),self.r_max)
90 | r = r / self.r_max
91 | return r
92 |
93 |
--------------------------------------------------------------------------------
/chimp/memories/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Experience Replay Memory '''
2 |
3 | from replay_memory import ReplayMemoryHDF5
4 | from memory import ReplayMemory
--------------------------------------------------------------------------------
/chimp/memories/mem_test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from replay_memory import ReplayMemoryHDF5
3 |
4 |
5 | settings = {
6 | 'save_dir' : 'results/test',
7 | 'seed_memory' : 1,
8 | 'history_sizes' : (5, 2, 0),
9 | 'memory_size' : 1000,
10 | 'model_dims' : (1,20),
11 | 'batch_size' : 32
12 | }
13 |
14 | mem = ReplayMemoryHDF5(settings)
15 |
16 | o_dims = settings['model_dims']
17 |
18 | for i in xrange(1000):
19 | obs = np.random.random(o_dims) + i # random obs
20 | a = np.random.randint(10) + i# 10 actions
21 | r = np.random.rand() + i
22 | obsp = np.random.random(o_dims) + i
23 | term = bool(np.random.binomial(1,0.1)) # 10% chance reach terminal state
24 | mem.store_tuple(obs, a, r, obsp, term)
25 |
26 | o,a,r,op,terms=mem.minibatch()
27 | #mem.close()
28 |
--------------------------------------------------------------------------------
/chimp/memories/memory.py:
--------------------------------------------------------------------------------
1 | '''
2 | An alternative replay memory that does not utilize HDF5 - less efficient
3 | '''
4 |
5 | import numpy as np
6 |
7 | class ReplayMemory(object):
8 |
9 | def __init__(self, settings):
10 |
11 | self.random_state = np.random.RandomState(settings['seed_memory'])
12 | self.memory_size = settings['memory_size']
13 | self.model_dims = settings['model_dims']
14 | self.n_frames = settings['n_frames']
15 | self.data = [np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32),
16 | np.zeros((self.memory_size, self.n_frames), dtype=np.float32),
17 | np.zeros(self.memory_size, dtype=np.int32),
18 | np.zeros(self.memory_size, dtype=np.float32),
19 | np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32),
20 | np.zeros((self.memory_size, self.n_frames), dtype=np.float32),
21 | np.zeros(self.memory_size, dtype=np.bool)]
22 | self.counter = 0
23 |
24 | # function to sample a mini-batch
25 | def minibatch(self, batch_size):
26 | # sampling a mini-batch of the given size with replacement
27 | ind = self.random_state.randint(0,min(self.counter,self.memory_size),batch_size)
28 | return self.data[0][ind], self.data[1][ind], self.data[2][ind], self.data[3][ind], self.data[4][ind], self.data[5][ind], self.data[6][ind]
29 |
30 | # function to store the observed experience and keep the count within the replay memory
31 | def store_tuple(self, s0, ahist0, a, r, s1, ahist1, episode_end_flag = False):
32 |
33 | # keep the most recent observations within the limit of the memory
34 | ind = self.counter % self.memory_size
35 |
36 | self.data[0][ind] = s0
37 | self.data[1][ind] = ahist0
38 | self.data[2][ind] = a
39 | self.data[3][ind] = r
40 |
41 | if not episode_end_flag:
42 | self.data[4][ind] = s1
43 | self.data[5][ind] = ahist1
44 |
45 | self.data[6][ind] = episode_end_flag
46 |
47 | self.counter += 1
48 |
--------------------------------------------------------------------------------
/chimp/memories/replay_memory.py:
--------------------------------------------------------------------------------
1 | ''' Implements class for reading/writing experiences to the replay dataset.
2 |
3 | We assume
4 | (1) Actions and rewards for the full history fit comfortably in memory,
5 | (2) The belief state representation for the full history does not,
6 | (3) A single sample of belief states fits comfortably in memory.
7 |
8 | For instance, if the replay dataset stores the last 1 million experiences,
9 | then the history of actions is 1 byte x 1 M = 1 MB. The same holds for the
10 | history of rewards. However, a modest belief state representation might be
11 | a dense vector with a maximum of 1,000 Float64 elements (typical state spaces
12 | are on the order of millions). In this case the full history of 1 million
13 | states would be (1,000 elem x 8 bytes x 1 M = 8 GB).
14 |
15 | N.B.!
16 | Memory is organized as (a, r, s', end_of_game_flag). We refer to s'
17 | simply as "state". To sample (s, a, r, s', end_of_game_flag)
18 | we take s' from the current location in memory, and (a, r, s', end_of_game_flag)
19 | from the location one step forward.
20 | '''
21 |
22 | import numpy as np
23 | import h5py
24 | import os
25 |
26 | class ReplayMemoryHDF5(object):
27 | ''' Wrapper around a replay dataset residing on disk as HDF5. '''
28 |
29 | def __init__(self, settings, filename='memory.hdf5', overwrite=True, empty=-1):
30 |
31 | if not os.path.exists(settings['save_dir']):
32 | os.makedirs(settings['save_dir'])
33 |
34 | filename = settings['save_dir'] + '/' + filename
35 | self.random_state = np.random.RandomState(settings['seed_memory'])
36 | self.ohist_size, self.ahist_size, self.rhist_size = settings['history_sizes']
37 |
38 | self.ahist_size = 1 if self.ahist_size is 0 else self.ahist_size
39 | self.rhist_size = 1 if self.rhist_size is 0 else self.rhist_size
40 |
41 | self.max_size = max(settings['history_sizes'])
42 | self.batch_size = settings['batch_size']
43 |
44 | if overwrite:
45 | self.fp = h5py.File(filename, 'w')
46 | else:
47 | self.fp = h5py.File(filename, 'a')
48 |
49 | if all(x in self.fp for x in ('observations', 'actions', 'rewards', 'next_observations', 'terminals')):
50 | self.observations = self.fp['observations']
51 | self.memory_size = self.observations.shape[0]
52 |
53 | self.actions = np.empty(self.memory_size, dtype=np.int32)
54 | self.fp['actions'].read_direct(self.actions)
55 |
56 | self.rewards = np.empty(self.memory_size, dtype=np.float32)
57 | self.fp['rewards'].read_direct(self.rewards)
58 |
59 | self.next_observations = self.fp['next_observations']
60 |
61 | self.terminals = np.empty(self.memory_size, dtype=bool)
62 | self.fp['terminals'].read_direct(self.terminals)
63 |
64 | if self.memory_size != settings['memory_size']:
65 | print("Warning: dataset loaded from %s is of size %d, "
66 | "not %d as indicated in |settings|. Using existing size."
67 | % (filename, self.memory_size, settings['memory_size']))
68 |
69 | else:
70 | self.memory_size = settings['memory_size']
71 | obs_shape = settings['model_dims']
72 |
73 | self.observations = self.fp.create_dataset('observations', (self.memory_size,) + obs_shape, dtype=np.float32)
74 | self.next_observations = self.fp.create_dataset('next_observations', (self.memory_size,) + obs_shape, dtype=np.float32)
75 |
76 | self.fp.create_dataset('actions', (self.memory_size,), dtype='int32')
77 | self.fp.create_dataset('rewards', (self.memory_size,), dtype='float32')
78 | self.fp.create_dataset('terminals', (self.memory_size,), dtype=bool)
79 |
80 | self.actions = np.empty(self.memory_size, dtype=np.int32)
81 | self.rewards = np.empty(self.memory_size, dtype=np.float32)
82 | self.terminals = np.empty(self.memory_size, dtype=np.bool)
83 |
84 | self.observations.attrs['head'] = 0
85 | self.observations.attrs['valid'] = 0
86 |
87 | # index of current "write" location
88 | self.head = self.observations.attrs['head']
89 |
90 | # greatest index of any valid experience; i.e., [0, self.valid)
91 | self.valid = self.observations.attrs['valid']
92 |
93 | # initialize histories
94 | self.ohist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32)
95 | self.ophist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32)
96 | self.ahist = np.zeros((self.batch_size, self.ahist_size), dtype=np.int32)
97 | self.rhist = np.zeros((self.batch_size, self.rhist_size), dtype=np.float32)
98 | self.thist = np.zeros((self.batch_size, self.ohist_size), dtype=np.bool)
99 |
100 | self._emptyint = np.int32(empty)
101 | self._emptyfloat = np.float32(empty)
102 |
103 | def minibatch(self):
104 | ''' Uniformly sample (o,a,r,o') experiences from the replay dataset.
105 |
106 | Args:
107 | batch_size: size of mini-batch
108 |
109 | Returns:
110 | Five numpy arrays that corresponds to o, a, r, o', and the terminal
111 | state indicator.
112 | '''
113 | batch_size = self.batch_size
114 | if batch_size >= self.valid:
115 | raise ValueError("Can't draw sample of size %d from replay dataset of size %d"
116 | % (batch_size, self.valid))
117 |
118 | ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size
119 | max_hist = self.max_size
120 |
121 | indices = self.get_indices(batch_size)
122 |
123 | self.clear_history()
124 |
125 | # TODO: can we get rid of this loop by sorting inidces and then reshaping?
126 | for i in xrange(batch_size):
127 | # all end on the same index
128 | endi = indices[i]
129 | starti = endi - max_hist
130 | # starting indecies if no terminal states
131 | starto, starta, startr = endi-ohist_size, endi-ahist_size, endi-rhist_size
132 |
133 | # look backwards and find first terminal state
134 | termarr = np.where(self.terminals[starti:endi-1]==True)[0]
135 | termidx = starti
136 | if termarr.size is not 0:
137 | termidx = endi - (endi-starti - termarr[-1]) + 1
138 |
139 | # if starts before terminal, change start index
140 | starto = termidx if starto < termidx else starto
141 | starta = termidx if starta < termidx else starta
142 | startr = termidx if startr < termidx else startr
143 |
144 | ohl, ahl, rhl = (endi - starto), (endi - starta), (endi - startr)
145 |
146 | # load from memory
147 | self.ohist[i, ohist_size-ohl:] = self.observations[xrange(starto, endi)]
148 | self.ophist[i, ohist_size-ohl:] = self.next_observations[xrange(starto, endi)]
149 | self.ahist[i, ahist_size-ahl:] = self.actions[xrange(starta, endi)]
150 | self.rhist[i, rhist_size-rhl:] = self.rewards[xrange(startr, endi)]
151 | self.thist[i, ohist_size-ohl:] = self.terminals[xrange(starto, endi)]
152 |
153 | return self.ohist, self.ahist, self.rhist, self.ophist, self.thist
154 |
155 |
156 | def get_indices(self, batch_size):
157 | ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size
158 | max_hist = self.max_size
159 |
160 | # want to sample from valid history sets
161 | start_shift = self.random_state.randint(max_hist)
162 |
163 | # indices corresponding to ranges from which to sample
164 | indices = self.random_state.choice(xrange(1,self.valid/max_hist), size=batch_size, replace=False)
165 | # shift all the indices and offset
166 | indices *= max_hist
167 | indices += start_shift
168 |
169 | return indices
170 |
171 |
172 | def store_tuple(self, obs, action, reward, obsp, terminal):
173 | ''' Stores an experience tuple into the replay dataset, i.e., a
174 | triple (obs, action, reward, obsp, terminal) where |obsp| is the observation
175 | made when the agent takes |action| and recieves |reward|
176 | while |obs| is the observation made prior to taking |action|.
177 | The observation |obs| is assumed to be at index (self.head).
178 |
179 | Args:
180 | obs: observation made at time t of shape provided by user (obs_shape)
181 | action: index of action chosen
182 | reward: float value of reward recieved after taking action a
183 | or None if the input action ended the game
184 | terminal: indicates if obsp is terminal
185 |
186 | '''
187 | self.actions[self.head] = action
188 | self.rewards[self.head] = reward
189 | self.terminals[self.head] = terminal
190 | self.observations[self.head] = obs
191 | self.next_observations[self.head] = obsp
192 |
193 | # update head and valid pointers
194 | self.head = (self.head + 1) % self.memory_size
195 | self.valid = min(self.memory_size, self.valid + 1)
196 |
197 | def clear_history(self):
198 | self.ohist.fill(self._emptyfloat)
199 | self.ophist.fill(self._emptyfloat)
200 | self.ahist.fill(self._emptyint)
201 | self.rhist.fill(0.0)
202 | self.thist.fill(False)
203 |
204 | def close(self):
205 | ''' Stores the memory dataset into the file when program ends. '''
206 | self.fp['actions'][:] = self.actions
207 | self.fp['rewards'][:] = self.rewards
208 | self.fp['terminals'][:] = self.terminals
209 | self.observations.attrs['head'] = self.head
210 | self.observations.attrs['valid'] = self.valid
211 | self.fp.close()
212 |
213 | def __del__(self):
214 | try:
215 | self.close()
216 | except:
217 | pass # already closed
218 |
--------------------------------------------------------------------------------
/chimp/pre_trained_nets/mountain_car.net:
--------------------------------------------------------------------------------
1 | ccopy_reg
2 | _reconstructor
3 | p0
4 | (c__main__
5 | TestNet
6 | p1
7 | c__builtin__
8 | object
9 | p2
10 | Ntp3
11 | Rp4
12 | (dp5
13 | S'_persistent'
14 | p6
15 | (lp7
16 | sS'name'
17 | p8
18 | NsS'_children'
19 | p9
20 | (lp10
21 | S'bn2'
22 | p11
23 | aS'bn1'
24 | p12
25 | aS'lout'
26 | p13
27 | aS'l4'
28 | p14
29 | aS'l2'
30 | p15
31 | aS'l3'
32 | p16
33 | aS'l1'
34 | p17
35 | asg11
36 | g0
37 | (cchainer.links.normalization.batch_normalization
38 | BatchNormalization
39 | p18
40 | g2
41 | Ntp19
42 | Rp20
43 | (dp21
44 | g6
45 | (lp22
46 | S'avg_mean'
47 | p23
48 | aS'avg_var'
49 | p24
50 | aS'N'
51 | p25
52 | asg23
53 | cnumpy.core.multiarray
54 | _reconstruct
55 | p26
56 | (cnumpy
57 | ndarray
58 | p27
59 | (I0
60 | tp28
61 | S'b'
62 | p29
63 | tp30
64 | Rp31
65 | (I1
66 | (I10
67 | tp32
68 | cnumpy
69 | dtype
70 | p33
71 | (S'f4'
72 | p34
73 | I0
74 | I1
75 | tp35
76 | Rp36
77 | (I3
78 | S'<'
79 | p37
80 | NNNI-1
81 | I-1
82 | I0
83 | tp38
84 | bI00
85 | S'\xc7\xfac;\x08\x15B<\x00\x00\x00\x00\x8b-u;k\x18\xc9:\x07\x8bE<\x18\xa6\xff:\xc4\x83!=\x0b<\x89<\xe3\x08\x958'
86 | p39
87 | tp40
88 | bsg8
89 | g11
90 | sS'decay'
91 | p41
92 | F0.9
93 | sS'eps'
94 | p42
95 | F1e-05
96 | sS'_cpu'
97 | p43
98 | I01
99 | sS'beta'
100 | p44
101 | cchainer.variable
102 | Variable
103 | p45
104 | (g26
105 | (g27
106 | (I0
107 | tp46
108 | g29
109 | tp47
110 | Rp48
111 | (I1
112 | (I10
113 | tp49
114 | g36
115 | I00
116 | S'\n{\x1e\xbeZ\xd7\x87\xbe\x9er\x00\xbf\x0fp\xe5\xbe\xd2\xe1\xd2\xbe`\x1b\x05\xbfOte>`s\x10\xbfY\x8f\x06\xbf\x8f8\x87\xbe'
117 | p50
118 | tp51
119 | bcchainer.flag
120 | Flag
121 | p52
122 | (Ntp53
123 | Rp54
124 | g44
125 | tp55
126 | Rp56
127 | sS'_params'
128 | p57
129 | (lp58
130 | S'gamma'
131 | p59
132 | ag44
133 | asg25
134 | I0
135 | sg24
136 | g26
137 | (g27
138 | (I0
139 | tp60
140 | g29
141 | tp61
142 | Rp62
143 | (I1
144 | (I10
145 | tp63
146 | g36
147 | I00
148 | S'\x97\xa4\xb9=\xf74\xbd=vR\xb8=&\x95\xb9=\x88\x9a\xb8=\xeb\x1b\xc3=\\\xcd\xb8=\xb8\xa0\xf0=\r\x94\xc7=NS\xb8='
149 | p64
150 | tp65
151 | bsg59
152 | g45
153 | (g26
154 | (g27
155 | (I0
156 | tp66
157 | g29
158 | tp67
159 | Rp68
160 | (I1
161 | (I10
162 | tp69
163 | g36
164 | I00
165 | S'N\x80\x91?k!\xa2?\xfe1\xae?\x1d\xa7\xb9?\r\xf7\xb8? O\xae?V\x8f\xc4?C}\x94?\x84\xbe\xa8?\xb5n\x9f?'
166 | p70
167 | tp71
168 | bg54
169 | g59
170 | tp72
171 | Rp73
172 | sbsg12
173 | g0
174 | (g18
175 | g2
176 | Ntp74
177 | Rp75
178 | (dp76
179 | g6
180 | (lp77
181 | g23
182 | ag24
183 | ag25
184 | asg23
185 | g26
186 | (g27
187 | (I0
188 | tp78
189 | g29
190 | tp79
191 | Rp80
192 | (I1
193 | (I10
194 | tp81
195 | g36
196 | I00
197 | S'\x05\xb2\xd6;\x00\x00\x00\x00B\xecL;\x8b+\xc2<\x00\x00\x00\x00\xab\xbc\xf8;OJ\x1e8\x18\xec\xfa<\x7f\x1d\x87\x08>\x87\x17'>\xa4\xa9\xb6=s\x83o=\xda\xff\x01>\xb7\xb9\x92=\x91G\x1d\xbe"
223 | p88
224 | tp89
225 | bg54
226 | g44
227 | tp90
228 | Rp91
229 | sg57
230 | (lp92
231 | g59
232 | ag44
233 | asg25
234 | I0
235 | sg24
236 | g26
237 | (g27
238 | (I0
239 | tp93
240 | g29
241 | tp94
242 | Rp95
243 | (I1
244 | (I10
245 | tp96
246 | g36
247 | I00
248 | S'\x7f\x89\xb8=vR\xb8=p\\\xb8=\xe0\xa3\xbc=vR\xb8=\xb0\xc7\xb8=\xb3R\xb8=M\xc5\xbd=H\xfc\xb9=:\xa6\xb9='
249 | p97
250 | tp98
251 | bsg59
252 | g45
253 | (g26
254 | (g27
255 | (I0
256 | tp99
257 | g29
258 | tp100
259 | Rp101
260 | (I1
261 | (I10
262 | tp102
263 | g36
264 | I00
265 | S'\xf6T\xa1?c\xd6\x7f?0\xb5\xdc?\xe0\xa8\x8f?\x00\x00\x80?#"\xba?2\xd1\xae?*\xfa\x8d?_\xa0\x80?\t\x08\xa5?'
266 | p103
267 | tp104
268 | bg54
269 | g59
270 | tp105
271 | Rp106
272 | sbsg13
273 | g0
274 | (cchainer.links.connection.linear
275 | Linear
276 | p107
277 | g2
278 | Ntp108
279 | Rp109
280 | (dp110
281 | g6
282 | (lp111
283 | sg8
284 | g13
285 | sS'W'
286 | p112
287 | g45
288 | (g26
289 | (g27
290 | (I0
291 | tp113
292 | g29
293 | tp114
294 | Rp115
295 | (I1
296 | (I3
297 | I10
298 | tp116
299 | g36
300 | I00
301 | S'%\x0b!\xbf\x83\xc2V>V\xd6\xe3\xbe^\xb0v\xbeQv?>\x90\xf1\x19\xbfk\xcbe?\xf2\xdf\x8b\xbe\xb1h\xf7\xbe\xb0\xce\x97>\xd4\xc1Q\xbe\xf2\xa8O\xbfY\xf5\xee\xbe\xad\x08(\xbf\x98?,\xbf\xa8\x862\xbfMj%?\x19YN\xbe\x9aF\x06\xbf\x96\xb0\x08\xbf3-\xc7\xbe\xab\xe2<\xbd\xb2\x85\xa7\xbe-i\x85\xbe,\xb2\x18\xbf\x94\xd5"\xbf\xc3\xc38?\xa4\xce(\xbeW28\xbf\xd6\xea\xd2\xbe'
302 | p117
303 | tp118
304 | bg54
305 | g112
306 | tp119
307 | Rp120
308 | sg29
309 | g45
310 | (g26
311 | (g27
312 | (I0
313 | tp121
314 | g29
315 | tp122
316 | Rp123
317 | (I1
318 | (I3
319 | tp124
320 | g36
321 | I00
322 | S"Y'6=\x85\xeb\x12?H\x1c\x93>"
323 | p125
324 | tp126
325 | bg54
326 | g29
327 | tp127
328 | Rp128
329 | sg57
330 | (lp129
331 | g112
332 | ag29
333 | asg43
334 | I01
335 | sbsg43
336 | I01
337 | sg14
338 | g0
339 | (g107
340 | g2
341 | Ntp130
342 | Rp131
343 | (dp132
344 | g6
345 | (lp133
346 | sg8
347 | g14
348 | sg112
349 | g45
350 | (g26
351 | (g27
352 | (I0
353 | tp134
354 | g29
355 | tp135
356 | Rp136
357 | (I1
358 | (I10
359 | I10
360 | tp137
361 | g36
362 | I00
363 | S'\x0fq\x99>z\xb6\x08>]?\x8b\xbe\x93<\xfd\xbep\x98p\xbd\xb0\xf4\xe4\xbd\xec\n\xa6\xbe\xe2\x1f\x9c<*\x18\xb4;\x03\xa3e\xbe\xb1\x02$>\n&\xaa\xbd`\xfb{>\x8b\x02\xb4>\x10v\xb9\xbel\x15\xfb\xbd.\x88D\xbd`Vy\xbe\xee$ ?0\xc8\x8c=\xb6G\x17>\xd4\xcb\xe6\xbe\xb6\xa2\n\xbf\xed\x94\xa1\xbd\xd9\x19\x1a?\xd7\xf6\x9c\xbe\x96\xca\xd4\xbdE\x13\xda\xbd\xb8\x8d8\xbfl\x02\xab=t@\xc9\xbdT\xa4\xf6\xbcP\xd5\x83\xbd>\xf8Q\xbe\xeb~\xae\xbdk \x8f=4\x81\xcf\xber\xa0,>\xdb2\xa3>\x0bk\x08=\xdb4\x97\xbd/\x15\xab<\xcb=\xa4\xbc\x15(\x1e=\x1a\x12\xd1\xben\x90\x0c\xbeF\xd7\xf8\xbd\x15?\xca=\xbbj|\xbe\xda\xc5\xb0\xbe\xcaLv\xbe\xb37\xd9=\x07a\x0e\xbfS\x0b\x82>\xf3\xe9\x07?e\x7f2\xbfC\xd2\x08?\x0e\xb0\xac\xbe\xf0c\x97\xbf,b\x1a?4\xd8\xa5\xbe*Z\xa9=\xa6\xc5\x85>\x91\xc94\xbf\xed\xda\x14\xbe\xe8u9?7&;\xbe\xaf9e=\xcca\xc7=\xc9\x08\x1f\xbf\xc1\x9f\x94\xbe\\\x8b\x89\xbe\x9be\xd2\xbd\xb4\xb9;?[\x80\xab>X\xf8$\xbf\xc7\x85+\xbdtY\xff\xbdz\x1fA\xbf\x02\xe8&?\xb6\xbd\xdc\xbe\xf1\x91\xd0\xbcz\xea\x03\xbe\xd5;#>\xe0\xe1\x97\xbd\x8e\xc82\xbf\xdf\xe0.?\xdeZ\x84>\xa8\xb7]\xbf\x1a\x82\x1e?\xa2\xff\x8f\xbesM\xcf\xba\nv\xb7=\xbe\xc4\x88\xbeh\x93a\xbe$i\xe0>\xea\xf50\xbf\xe8u\x8e\xbe\xb7\xf9\x01\xbc\x17\x8d\xf3\xbd'
364 | p138
365 | tp139
366 | bg54
367 | g112
368 | tp140
369 | Rp141
370 | sg29
371 | g45
372 | (g26
373 | (g27
374 | (I0
375 | tp142
376 | g29
377 | tp143
378 | Rp144
379 | (I1
380 | (I10
381 | tp145
382 | g36
383 | I00
384 | S'\xfbK\xaa9.\xa1\xbc\xbbb\xa4\xfa=\xc6\xf1\xd0\xb62Q\xf2:\xfb-\xb2=\x80\x00I=\xc6F=\xbe\xc8\x88\x15>\xa6\xb1u\xbb'
385 | p146
386 | tp147
387 | bg54
388 | g29
389 | tp148
390 | Rp149
391 | sg57
392 | (lp150
393 | g112
394 | ag29
395 | asg43
396 | I01
397 | sbsS'train'
398 | p151
399 | I00
400 | sg15
401 | g0
402 | (g107
403 | g2
404 | Ntp152
405 | Rp153
406 | (dp154
407 | g6
408 | (lp155
409 | sg8
410 | g15
411 | sg112
412 | g45
413 | (g26
414 | (g27
415 | (I0
416 | tp156
417 | g29
418 | tp157
419 | Rp158
420 | (I1
421 | (I10
422 | I20
423 | tp159
424 | g36
425 | I00
426 | S'pqy>\x08\xde\x8f\xbd\x90\x8a\xb5>\xcd\xb8F?X\xdfF\xbe\xe0\xde\xf3=\x06\t\x1a?p\xff\xef=eu|>q\xb9\xac>\xe5k\x89\xbe\x96\xf2q\xbc)\xdd\xc0\xbe\x02\x84w\xbe\xd2\x04\xad>)\xab\x85\xbd\x89-\x14>\xae\x94\x08\xbes\x80\x9e=\x9c\x7f\x05>%N=>C;\x03?\xf55\x16\xbfB\xc8\x83\xbe\xcb\xa5\xed\xbd\x9f\x13`>\xca$c\xbcY\xe9+>\x8b\xff\xc5\xbdyh\x93\xbe\xf3\xd9S\xbeF@\x15>\xdc7]\xbe\x12\xb3\xce\xbe*yv\xbd\xc1X;\xbe\x10\xec\xa6>\xcf\xe8I\xbe\x00\xf5}\xbd\xe9\x15\x01>\xd2\xa8\x02\xbe\xb2\xc0\xae\xbe\xe2\xe7\xd7<\xf5\x91\xdd\xbe\x875\x19\xbc}\xdb\xdf<:_\xc0\xbd\xc55\x97>\x12b >\n\xfcS\xbd\xa1\xe1\x0c>\x16\x04\xec>\xe2c\x14>\x7f\xbfw>\xa6\xe1k>\x1al\x03>}W\xb0\xbesw\xd7\xbed5\x8c\xbe\x06\x99\x07>\xa4@=>%\x81\x11?\x89\x98\x05\xbe\x04e%\xbf\xc4@\x0b<\xa0|\x8b\xbe^5L\xbf\xd1\x1e\x05>X\x96+>\rZd\xbe\xf5\xb1\x8e>\xcaRP\xbe\xc3\xb3\xce>\x9d\xd8\xdd>+\x13\xae>p\xf0\x06>1\xf6w\xbe\x0f\xd3\xf1>Z\xb9\x9a\xbd\\\x1c4>^\x84*\xbe\x12\x90\x00>\xe0o\x91>\xb0#U\xbc8D\xad\xbe\xf0\xa4\xd1\xbd\xd3\xae\x81\xbe\xc6\xf5\xd0\xbeL\xd2\x12\xbd\xd4<\x95>\xdf `=\x8aS\xe2\xbdP\t\xc5=U1\x9e\xbe\xbc\xb7\xaf\xbe\x0eo\x8f\xbe\x92\xc0\x9c\xbe\x1c\xe1\x88\xbe\xff\x17\x96>rM{<[%~\xbeYpi=\xc4Y\x00\xbf\xb5?x\xbd^\xa8\xb0>s\x13\x01\xbf\xb2\xc4\x0c\xbe\x85\x01\xcc=\xa68\xcf>\xf2\x8d*\xbd\x13\xd6x>A\xde~\xbc\xa1K\x99>\xff}?=O\xdb<>\x0c:%?\xd4\xbb\xb8;\xc1\xae\x1f\xbc\rD\xfb\xbd\x95\xc9\xfc L\xb8\xbd\x9c\x84\xdf=mc\x9f\xbe\xe9\x08\x16=\xad\xad!?^s\x95\xbe\xdd\xd0r=\xe4+\xd1>\xeep6>\x9a[\x98\xbd[\xed\xa8>\x1aP\x06>\xea\xa1\x8b\xbcC\xa7\x83>\x95Ya>)r7\xbf68\xe3\xbd\xe4\xea4>a\xbb\x82>?\x08\xb2\xbd33\x03?=\xc5\x08?\xedq\x85>\xabjY>F\r|=|z\x89\xbe$\xdf\xd8\xbdT\xf7\x8f\xbe\xa0\x82\xc6>\xeaS\x1b>\xe2\t\xd3;\xee\xef\n>\x84-\xf0>\x95\x9a\x0c\xbe\xf9\x07\x90=\xea\xac\x00\xbfe\xf8%\xbdw\xf8\x11\xbd\x95\x10\xdb\xbcl\x08;\xbe\xe2\xd9e>9\xb5{>\x8ekF>\x9b\xbcN>\x03S\xe9=&\xc3>\xbd\x1a\xbd\xe0=\x99\x90\x8a\xbc*\xbdQ\xbe?,\xff=\xde0\xae\xbe{\xa7\xec>\xb5|\xeb>\x1c\x84W\xbc\x06\xd6\x0b?i%`?\x0ej\xa0\xbe\xf5\x0c9\xbe?+\xe2\xbd\x82\xf9\x84\xbd\xa8%5\xbc\x80\x1f)\xbcZ\x1a\xde\xbc<\x88\xcc\xbc\xc5\xbf\xe7\xbe+\xa0\xfb\xbd$\x85\x9d\xbc4j\x91>\x9c\xb2d\xbd'
427 | p160
428 | tp161
429 | bg54
430 | g112
431 | tp162
432 | Rp163
433 | sg29
434 | g45
435 | (g26
436 | (g27
437 | (I0
438 | tp164
439 | g29
440 | tp165
441 | Rp166
442 | (I1
443 | (I10
444 | tp167
445 | g36
446 | I00
447 | S'C\x8dn=X\x8b&\xbaoM\xc7\xbd/U\n>\x00\x00\x00\x00\xee(\xd4=\xbf\xcd\xc9=\xa7g\x04>\xe8w\x8e=6\x01G\xbd'
448 | p168
449 | tp169
450 | bg54
451 | g29
452 | tp170
453 | Rp171
454 | sg57
455 | (lp172
456 | g112
457 | ag29
458 | asg43
459 | I01
460 | sbsg16
461 | g0
462 | (g107
463 | g2
464 | Ntp173
465 | Rp174
466 | (dp175
467 | g6
468 | (lp176
469 | sg8
470 | g16
471 | sg112
472 | g45
473 | (g26
474 | (g27
475 | (I0
476 | tp177
477 | g29
478 | tp178
479 | Rp179
480 | (I1
481 | (I10
482 | I10
483 | tp180
484 | g36
485 | I00
486 | S'\xd1\xf7\xe0\xbd\xf5\xedz\xbd\'\x07\x82\xbfCx\xa1\xbe\n\xc8g=r\xba\x97>Q\xd4/\xbe\x15\x1e\xea\xbd\xea5\xf2>Tm\x9e\xbd\xc7 \xbf>}\xd7\x0e?\x82\xcb\x93\xbe\xbd[\x1a\xbf\x04f0>?\xa7c\xbeh\xf8\x8a\xbd_\x05\x88\xbe\x0c\x03\x8e\xbf\xe45\xc5\xbduL\xe2\xbd~L\xb1\xbe\n\xcb\x90?\xc0x\xaf\xbeI\xd3==Td\x08\xbe]n7\xbd\xa4\x82\xa3>,\x9a\x8b>\xbeb\x82?\xc0\xf0\xee=.[\x84\xbez!\x1e>\x82^\x83\xbd\xaa\xd9/\xbe;\xe4F=\xca\xa1\xae>\x93\xc5w\xbf3\xfa/\xbd\xbf\xc6\xb7=^-I\xbe\x0c\xb84>\xa8~t\xbf\x075\xb6>\xe2\x8e;>\x1c\x05\xdb>\xac\xfb*?\x18C\x1b?\xc4\xad;\xbe\xd9\xbc~\xbe\x1e\xce\xf0>\x12\xf4\xf7\xbe\x93\x92E?j\x80\n\xbed\xb3U>\xe4\xfd\xf7\xbe%\xde\x8e\xbf\xfbF\xe3>\xcf\x96\x19>\x1e\xea\x14>\xd8\xc7\x0c\xbf\x84=\x91?\\5\x80\xbf\x9e\xe7q\xbd%v\xb1>\xddV\x11>G\xe7y>\x0c\xdb\xd4>\'\xed\xc7>\xc2\x1aA\xbe\xd0\xa4\xb2\xbc\x1f\xef\x0e\xbe\x97^R?Y\xf0T=\xb1\x00\x87\xbe\xd0\x85c=\xb2."\xbf\xa0\xc6\xe6>/\xf19>xv\x02?\x92\xff2\xbe\xca\x01\x12\xbf(\xeb\xce\xbd\xf1\x10\x1f>TF\r?\xe4!>?\xe3\xea3\xbd\xef\x96N\xbe\xc7V\xc4\xbe\xfea|\xbe\x89\xd0\x04\xbf\xe2W\x94>P[\xb1\xbf\x9f\xa5[?F\x14\xc9=\xe3\xd2j?M\x19\x1d\xbex\x91\x92\xbe\xcf\xe9\n>\xf9\xd3\x8b\xbf'
487 | p181
488 | tp182
489 | bg54
490 | g112
491 | tp183
492 | Rp184
493 | sg29
494 | g45
495 | (g26
496 | (g27
497 | (I0
498 | tp185
499 | g29
500 | tp186
501 | Rp187
502 | (I1
503 | (I10
504 | tp188
505 | g36
506 | I00
507 | S'E\ro\xbd6P\xc9;\xde\x82\x03\xbeR\xa0"<\xb9\x89\xaa=#\x91\x14\xbd\x83\xbc\xd4=L\xf3)\xbd\xe7i\x97=O\xff\n>'
508 | p189
509 | tp190
510 | bg54
511 | g29
512 | tp191
513 | Rp192
514 | sg57
515 | (lp193
516 | g112
517 | ag29
518 | asg43
519 | I01
520 | sbsg17
521 | g0
522 | (g107
523 | g2
524 | Ntp194
525 | Rp195
526 | (dp196
527 | g6
528 | (lp197
529 | sg8
530 | g17
531 | sg112
532 | g45
533 | (g26
534 | (g27
535 | (I0
536 | tp198
537 | g29
538 | tp199
539 | Rp200
540 | (I1
541 | (I20
542 | I2
543 | tp201
544 | g36
545 | I00
546 | S"\xa7'\xb5\xbf\x964\xbf\xbe\xb2p\xee>C\xd6d\xbf\xe5\xc0\xa0?\x87\xaa\x9e@\x03\xf0\xcc\xbdxr\x82\xc0D\x0e\xb7?Y,>\xc0Yz\x8d?O\xa97@\x0ej\xcd=\xb6\xda\xf0\xbf\xb8\xab\xab?\xd3m\x86=\x0c\xeb\xc1\xbe\xf9\x7f\xaf\xbf\x0bW\x9d?\xbe\x96\xa3?*c@?\xd7R\xbc\xbf\xbe\x82@\xbf\xfd\x80\xed\xbf}(->\xf22\xd6\xbfW \r\xbfw\x81+\xbf\xed\x97J>\xde\xdb\xc3>V\x14S\xbe<\x97 \xc0\x00T\xec>2[\xd4?\xb6\x1e\xba\xbe^\xea\xda\xbe\xc11\x81\xbf\x9aM\xf0?\t\x1f\xa9>\xb2A\x9c\xbe"
547 | p202
548 | tp203
549 | bg54
550 | g112
551 | tp204
552 | Rp205
553 | sg29
554 | g45
555 | (g26
556 | (g27
557 | (I0
558 | tp206
559 | g29
560 | tp207
561 | Rp208
562 | (I1
563 | (I20
564 | tp209
565 | g36
566 | I00
567 | S'y\x10E>\x90\x16d>\xeb\xda\x9d>\xe4\xea\x9b\xbd6\xd6\x89>sk\x0c=\xdc\xdeZ\xbd\xb9\xfe9\xbe.r\x88>\x1a\xa6`\xbd&A\xa7>_\xc9\xc3>c\x0cg>\xf0\xcfQ>\x106\x83>\xd6\xe2\xf1=\xe7g\xa4>x\xb0i>4\x1a\x1a=\xbfo\xc7>'
568 | p210
569 | tp211
570 | bg54
571 | g29
572 | tp212
573 | Rp213
574 | sg57
575 | (lp214
576 | g112
577 | ag29
578 | asg43
579 | I01
580 | sbsg57
581 | (lp215
582 | sb.
--------------------------------------------------------------------------------
/chimp/simulators/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Environment simulators.
3 |
4 | * Arcade Learning Environment for Atari game simulation
5 | * Tiger Problem
6 |
7 | Required functions:
8 | __init__, get_screenshot, act, reward, game_over, reset_game
9 |
10 | Require attributes:
11 | n_actions,
12 |
13 | '''
14 |
--------------------------------------------------------------------------------
/chimp/simulators/atari/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Environment simulators.
3 |
4 | '''
5 |
6 | from atari import AtariSimulator
--------------------------------------------------------------------------------
/chimp/simulators/atari/atari.py:
--------------------------------------------------------------------------------
1 | from ale_python_interface import ALEInterface
2 | import pygame
3 |
4 | import numpy as np
5 | import scipy.misc as spm
6 |
7 |
8 | class AtariSimulator(object):
9 |
10 | def __init__(self, settings):
11 |
12 | '''Initiate Arcade Learning Environment (ALE) using Python interface
13 | https://github.com/bbitmaster/ale_python_interface/wiki
14 |
15 | - Set number of frames to be skipped, random seed, ROM and title for display.
16 | - Retrieve a set of legal actions and their number.
17 | - Retrieve dimensions of the original screen (width/height), and set the dimensions
18 | of the cropped screen, together with the padding used to crop the screen rectangle.
19 | - Set dimensions of the pygame display that will show visualization of the simulation.
20 | (May be cropped --- showing what the learner sees, or not --- showing full Atari screen)
21 | - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format
22 | '''
23 |
24 | self.ale = ALEInterface()
25 | self.ale.setInt("frame_skip",settings["frame_skip"])
26 | self.ale.setInt("random_seed",settings["seed_simulator"])
27 | self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"])
28 |
29 | self.title = "ALE Simulator: " + str(settings["rom"])
30 | self.actions = self.ale.getLegalActionSet()
31 | self.n_actions = self.actions.size
32 |
33 | self.screen_dims = self.ale.getScreenDims()
34 | self.model_dims = settings['model_dims']
35 | self.pad = settings['pad']
36 |
37 | print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1]))
38 | print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1]))
39 |
40 | self.viz_cropped = settings['viz_cropped']
41 | if self.viz_cropped:
42 | self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2))
43 | else:
44 | self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2))
45 |
46 | # preallocate an array to accept ALE screen data (height/width) !
47 | self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8)
48 |
49 |
50 | def get_screenshot(self):
51 | '''returns a cropped snapshot of the simulator
52 | - store grayscale values in a preallocated array
53 | - cut out a square from the rectangle, using provided padding value
54 | - downsample to the desired size and transpose from (height/width) to (width/height)
55 | '''
56 |
57 | self.ale.getScreenGrayscale(self.screen_data)
58 | self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:]
59 | self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest'
60 |
61 | return self.frame
62 |
63 |
64 | def act(self,action_index):
65 | '''function to transition the simulator from s to s' using provided action
66 | the action that is provided is in form of an index
67 | simulator deals with translating the index into an actual action'''
68 |
69 | self.last_reward = self.ale.act(self.actions[action_index])
70 |
71 |
72 | def reward(self):
73 | '''return reward - has to be called after the "act" function'''
74 |
75 | return self.last_reward
76 |
77 |
78 | def episode_over(self):
79 | '''return a boolean indicator on whether the game is still running'''
80 |
81 | return self.ale.game_over()
82 |
83 |
84 | def reset_episode(self):
85 | '''reset the game that ended'''
86 |
87 | self.ale.reset_game()
88 |
89 |
90 | def init_viz_display(self):
91 | '''initialize display that will show visualization'''
92 |
93 | pygame.init()
94 | self.screen = pygame.display.set_mode(self.display_dims)
95 | if self.title:
96 | pygame.display.set_caption(self.title)
97 |
98 |
99 | def refresh_viz_display(self):
100 | '''if display is shut down, shut the game down
101 | else move the current simulator's frame (cropped or not cropped) into the pygame display,
102 | after expanding it 2x along x and y dimensions'''
103 |
104 | for event in pygame.event.get():
105 | if event.type == pygame.QUIT:
106 | exit
107 |
108 | if self.viz_cropped:
109 | self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed
110 | else:
111 | self.surface = pygame.surfarray.make_surface(self.screen_data.T)
112 |
113 | self.screen.blit(pygame.transform.scale2x(self.surface),(0,0))
114 | pygame.display.flip()
115 |
116 |
--------------------------------------------------------------------------------
/chimp/simulators/gym/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Import OpenAI Gym Wrapper
3 | """
4 |
5 |
--------------------------------------------------------------------------------
/chimp/simulators/gym/gym_wrapper.py:
--------------------------------------------------------------------------------
1 | class GymWrapper():
2 |
3 | def __init__(self, env):
4 |
5 | self.env = env
6 | self.last_reward = 0.0
7 | self.current_state = None
8 | self.terminal_flag = False
9 | self.n_actions = env.action_space.n
10 | self.model_dims = env.observation_space.shape
11 |
12 | def act(self, action):
13 | """
14 | Transitions to the next state and computes the reward
15 | """
16 | state, reward, done, info = self.env.step(action)
17 | self.last_reward = reward
18 | self.current_state = state
19 | self.terminal_flag = done
20 | def reward(self):
21 | return self.last_reward
22 |
23 | def get_screenshot(self):
24 | return self.current_state
25 |
26 | def episode_over(self):
27 | """
28 | Checks if the car reached the top of the mountain
29 | """
30 | return self.terminal_flag
31 |
32 | def reset_episode(self):
33 | self.current_state = self.env.reset()
34 |
35 | def simulate(self, nsteps):
36 | """
37 | Runs a simulation using the provided DQN policy for nsteps
38 | """
39 |
40 | self.reset_episode()
41 |
42 | rtot = 0.0
43 | # run the simulation
44 | for i in xrange(nsteps):
45 | self.env.render()
46 | state = self.get_screenshot()
47 | a = self.env.action_space.sample()
48 | self.act(a)
49 | r = self.reward()
50 | rtot += r
51 | if self.episode_over():
52 | break
53 | return rtot
54 |
55 |
--------------------------------------------------------------------------------
/chimp/simulators/mdp/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Import MDP models and simulator
3 | """
4 |
5 |
--------------------------------------------------------------------------------
/chimp/simulators/mdp/cart_pole.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #################################################################
4 | # Implements the simulator class for pole cart MDP
5 | #################################################################
6 |
7 | class CartPole():
8 |
9 | def __init__(self):
10 | self.actions = np.array([-1,1])
11 | self.n_actions = 2
12 |
13 | self.state_shape = (1,4) # x, xdot, theta, thetadot
14 |
15 | self.gravity = 9.8
16 | self.mass_cart = 1.0
17 | self.mass_pole = 0.3
18 | self.total_mass = self.mass_cart + self.mass_pole
19 | self.length = 0.7
20 | self.polemass_length = self.mass_pole * self.length
21 | self.force_mag = 10.0
22 | self.tau = 0.02
23 |
24 | self.term_deg = 0.2094384
25 |
26 |
27 | def transition(self, s, a):
28 | if self.isterminal(s):
29 | return s.copy()
30 | x, xdot, theta, thetadot = s[0], s[1], s[2], s[3]
31 |
32 | sint = np.sin(theta)
33 | cost = np.cos(theta)
34 |
35 | force = self.actions[a] * self.force_mag
36 |
37 | temp = (force + self.polemass_length * thetadot**2 * sint) / self.total_mass
38 | thetaacc = (self.gravity * sint - cost * temp) / (self.length * (4.0/3.0 - self.mass_pole * cost**2 /
39 | self.total_mass))
40 | xacc = temp - self.polemass_length * thetaacc * cost / self.total_mass
41 |
42 | sp = np.zeros(4, dtype=np.float32)
43 | sp[0] = x + self.tau * xdot
44 | sp[1] = xdot + self.tau * xacc
45 | sp[2] = theta + self.tau * thetadot
46 | sp[3] = thetadot + self.tau * thetaacc
47 |
48 | return sp
49 |
50 | def reward(self, s, a):
51 | r = 0.0
52 | if self.isterminal(s):
53 | r = -1.0
54 | return r
55 |
56 |
57 | def isterminal(self, s):
58 | if (s[0] < -2.4 or s[0] > 2.4 or s[2] < -self.term_deg or s[2] > self.term_deg):
59 | return True
60 | return False
61 |
62 |
63 | def initial_state(self):
64 | s = np.zeros(4, dtype=np.float32)
65 | s[0] = 2.2 * np.random.rand() - 1.1
66 | s[1], s[2], s[3] = 0.0, 0.0, 0.0
67 | return s
68 |
69 |
--------------------------------------------------------------------------------
/chimp/simulators/mdp/mdp_simulator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #################################################################
4 | # Implements the simulator class for MDPs
5 | #################################################################
6 |
7 | class MDPSimulator():
8 |
9 | def __init__(self, model):
10 | """
11 | Implements the multi-agent simulator:
12 | This serves as a wrapper for MDP problem types
13 | """
14 |
15 | self.model = model # problem instance
16 |
17 | # initalize
18 | self.current_state = model.initial_state()
19 | self.last_action = 0
20 | self.last_reward = 0.0
21 |
22 | self.model_dims = model.state_shape
23 |
24 | self.n_actions = model.n_actions
25 |
26 | def act(self, action):
27 | """
28 | Transitions the model forward by moving
29 | """
30 | mdp = self.model
31 |
32 | self.last_reward = mdp.reward(self.current_state, action)
33 | self.current_state = mdp.transition(self.current_state, action)
34 | if self.episode_over():
35 | self.last_reward += mdp.reward(self.current_state, action)
36 |
37 | def reward(self):
38 | return self.last_reward
39 |
40 | def get_screenshot(self):
41 | return self.current_state
42 |
43 | def episode_over(self):
44 | return self.model.isterminal(self.current_state)
45 |
46 | def reset_episode(self):
47 | self.current_state = self.model.initial_state()
48 | self.last_reward = 0.0
49 |
50 | def n_actions(self):
51 | return self.model.n_actions
52 |
--------------------------------------------------------------------------------
/chimp/simulators/mdp/mountain_car.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #################################################################
4 | # Implements the mountain car MDP
5 | #################################################################
6 |
7 | class MountainCar():
8 |
9 | def __init__(self,
10 | term_r = 10.0,
11 | nonterm_r = -1.0,
12 | height_reward = True,
13 | discrete = False,
14 | discount = 0.95):
15 |
16 | self.actions = np.array([-1.0, 0.0, 1.0])
17 | self.n_actions = 3
18 |
19 | self.state_shape = (1,2) # x and v
20 |
21 | self.term_r = term_r
22 | self.nonterm_r = nonterm_r
23 |
24 | self.vmin, self.vmax = (-0.07, 0.07)
25 | self.xmin, self.xmax = (-1.2, 0.6)
26 |
27 | self.height_reward = height_reward
28 |
29 | self.discrete = discrete
30 | self.xgrid = 10
31 | self.vgrid = 10
32 | self.discrete_x = np.linspace(self.xmin, self.xmax, self.xgrid)
33 | self.discrete_v = np.linspace(self.vmin, self.vmax, self.vgrid)
34 |
35 |
36 | def transition(self, s, a):
37 | """
38 | Returns a next state, given a state and an action
39 | """
40 | sp = np.zeros(2, dtype=np.float32)
41 | #sp = np.zeros(2, dtype=np.float32)
42 | sp[1] = s[1] + 0.001 * self.actions[a] - 0.0025 * np.cos(3 * s[0])
43 | sp[1] = self.vclip(sp[1])
44 | sp[0] = self.xclip(s[0] + sp[1])
45 |
46 | return sp
47 |
48 |
49 | def reward(self, s, a):
50 | """
51 | Rewarded for reaching goal state, penalized for all other states
52 | """
53 | r = s[0] if (self.height_reward and s[0] > 0.0) else 0
54 | if s[0] >= self.xmax:
55 | r += self.term_r
56 | else:
57 | r += self.nonterm_r
58 | return r
59 |
60 |
61 | def isterminal(self, s):
62 | if s[0] >= self.xmax:
63 | return True
64 | return False
65 |
66 | def initial_state(self):
67 | xi = np.random.uniform(self.xmin, self.xmax*0.9)
68 | vi = 0.0
69 | return np.array([xi, vi], dtype=np.float32)
70 |
71 |
72 |
73 | #################################################################
74 | ########################## UTILITIES ############################
75 | #################################################################
76 |
77 | def clip(self, val, lo, hi):
78 | return min(hi, max(val, lo))
79 |
80 | def vclip(self, val):
81 | return self.clip(val, self.vmin, self.vmax)
82 |
83 | def xclip(self, val):
84 | return self.clip(val, self.xmin, self.xmax)
85 |
86 | def find_nearest(self, vals, target):
87 | idx = (np.abs(vals - target)).argmin()
88 | return vals[target]
89 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Environment simulators.
3 |
4 | '''
5 |
6 | from models.simulator import POMDPSimulator
7 | from models.simulator_momdp import MOMDPSimulator
8 | from models.tiger import TigerPOMDP
9 | from models.rock_sample import RockSamplePOMDP
10 |
11 | from models.tools.belief import DiscreteBelief
12 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/__init__.py:
--------------------------------------------------------------------------------
1 | # dummy file
2 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/rock_sample.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from copy import deepcopy
3 | from tools.belief_momdp import MOMDPBelief
4 | import math
5 | import itertools
6 |
7 | #################################################################
8 | # Implements the Rock Sample POMDP problem
9 | #################################################################
10 |
11 | class RockSamplePOMDP():
12 |
13 | # constructor
14 | def __init__(self,
15 | xs=7, # size of grid y dim
16 | ys=7, # size of grid x dim
17 |
18 | rocks={(2,4):False, (3,4):True, (5,5):False, # (2,0):False, (0,1):True, (3,1):False, (6,3):True,
19 | (1,6):True},
20 |
21 | seed=1, # random seed
22 | rbad=-10.0, rgood=10.0, rexit=10.0, rbump=-100.0, # reward values
23 | d0=20, # quality of rover observation,
24 | h_conf=0.5, # confidence level before moving in heuristic policy
25 | discount=0.99):
26 |
27 | self.random_state = np.random.RandomState(seed) # used for sampling
28 | self.discount = discount
29 |
30 | self.xs = xs - 1 # y-size of the grid
31 | self.ys = ys - 1 # x-size of the grid
32 |
33 | self.rocks = rocks # dictionary mapping rock positions to their types (x,y) => good or bad
34 | self.rock_pos = [k for k in sorted(rocks.keys())]
35 | self.rock_types = [rocks[k] for k in sorted(rocks.keys())]
36 | self.rock_map = {(k):i for (i, k) in enumerate(sorted(rocks.keys()))}
37 | k = len(rocks)
38 | self.k = k # number of rocks
39 |
40 | self.rbad = rbad
41 | self.rgood = rgood
42 | self.rbump = rbump
43 | self.rexit = rexit
44 |
45 | # states: state is represented by the rover position and the rock types
46 | self.rover_states = [(j,i) for i in range(xs) for j in range(ys)] # fully observable vars
47 | rs = itertools.product(*(xrange(2) for i in xrange(k)))
48 | self.rock_states = [[bool(j) for j in i] for i in rs]
49 | self.n_rock_states = len(self.rock_states)
50 | self.n_rover_states = len(self.rover_states)
51 |
52 | # actions: total of 5+k
53 | self.ractions = [0, # move left
54 | 1, # move right
55 | 2, # move up
56 | 3, # move down
57 | 4] # sample
58 | for i in range(k):
59 | self.ractions.append(5+i) # sample rock i
60 |
61 | # observations
62 | self.robs = [0, # none
63 | 1, # good
64 | 2] # bad
65 |
66 | # pre-allocate state variables
67 | self.rover_state = np.zeros(2) # rover (x,y) position
68 | self.rock_state = np.zeros(k, dtype=np.bool) # (good, bad) type for each rock
69 |
70 | self.d0 = d0
71 | self.h_conf = h_conf
72 |
73 | self.action_vectors = [[-1, 0], [1, 0], [0, 1], [0, -1]]
74 |
75 | # belief and observation dimensions
76 | self.xdims = 2
77 | self.odims = 1
78 |
79 | #################################################################
80 | # Setters
81 | #################################################################
82 | def set_discount(self, d):
83 | self.discount = d
84 |
85 | def set_rewards(self, rs, rg, rb, re, rm):
86 | self.rsample = rs
87 | self.rgood = rg
88 | self.rbad = rb
89 | self.rexit = re
90 |
91 | #################################################################
92 | # S, A, O Spaces
93 | #################################################################
94 | def fully_obs_states(self):
95 | return self.rover_states
96 |
97 | def partially_obs_states(self):
98 | return self.rock_states
99 |
100 | def actions(self):
101 | return self.ractions
102 |
103 | def observations(self):
104 | return self.robs
105 |
106 | #################################################################
107 | # Reward Function
108 | #################################################################
109 | def reward(self, x, y, a):
110 | # Rewarded:
111 | # sampling good or bad rocks
112 | # exiting the map
113 | # trying to move off the grid
114 | rocks = self.rocks
115 | xpos, ypos = x
116 |
117 | # if in terminal state, no reward
118 | if self.isterminal(x, y):
119 | return 0.0
120 | # if exit get exit reward
121 | if a == 1 and xpos == self.xs:
122 | return self.rexit
123 | # if trying to move off the grid
124 | if (a == 0 and xpos == 0) or (a == 2 and ypos == self.ys) or (a == 3 and ypos == 0):
125 | return self.rbump
126 | # if trying to sample
127 | if a == 4:
128 | # if in a space with a rock
129 | if x in rocks:
130 | # if rock is good
131 | if rocks[x]:
132 | return self.rgood
133 | # if rock is bad
134 | else:
135 | return self.rbad
136 | return 0.0
137 |
138 | #################################################################
139 | # Distribution Functions
140 | #################################################################
141 | # rover moves determinisitcally: distribution is just the position of rover
142 | def fully_obs_transition(self, x, y, a, dist):
143 | xpos = x[0]
144 | ypos = x[1]
145 | # going left
146 | if a == 0 and xpos > 0:
147 | xpos -= 1
148 | # going right
149 | elif a == 1 and xpos < (self.xs+1):
150 | xpos += 1
151 | # going up
152 | elif a == 2 and ypos < self.ys:
153 | ypos += 1
154 | # going down
155 | elif a == 3 and ypos > 0:
156 | ypos -= 1
157 | dist[0] = xpos
158 | dist[1] = ypos
159 | return dist
160 |
161 | # the positions of rocks don't change, good rocks turn bad after sampling
162 | def partially_obs_transition(self, x, y, a, dist):
163 | # fill the distribution with our y var
164 | for i in xrange(len(y)):
165 | dist[i] = y[i]
166 | # if a rock is sampled it becomes bad
167 | if a == 4:
168 | rocks = self.rocks
169 | # if we are on a rock state change type to bad
170 | if x in rocks:
171 | ri = self.rock_map[x]
172 | self.rock_types[ri] = False
173 | rocks[x] = False
174 | dist[ri] = False
175 | return dist
176 |
177 | # sample the transtion distribution
178 | def sample_fully_obs_state(self, d):
179 | # deterministic transition
180 | return (d[0], d[1])
181 |
182 | def sample_partially_obs_state(self, d):
183 | # rock states do not change
184 | return d
185 |
186 | # returns the observation dsitribution of o from the (x,y,a)
187 | def observation(self, x, y, a, dist):
188 | prob = 0.0
189 | # if the action checks a rock
190 | if self.is_check_action(a):
191 | xpos = x[0]
192 | ypos = x[1]
193 |
194 | ri = self.act2rock(a) # rock index
195 | rock_pos = self.rock_pos[ri] # rock position
196 | rock_type = y[ri] # rock type
197 |
198 | r = math.sqrt((xpos - rock_pos[0])**2 + (ypos - rock_pos[1])**2)
199 | eta = math.exp(-r/self.d0)
200 | p_correct = 0.5 + 0.5 * eta # probability of correct measure
201 |
202 | dist.fill(0.0)
203 | # if rock is good
204 | if rock_type == True:
205 | dist[1] = p_correct
206 | dist[2] = 1.0 - p_correct
207 | # rock is bad
208 | else:
209 | dist[1] = 1 - p_correct
210 | dist[2] = p_correct
211 | else:
212 | dist.fill(0.0)
213 | dist[0] = 1.0
214 | return dist
215 |
216 |
217 | # sample the observation distirbution
218 | def sample_observation(self, d):
219 | oidx = self.categorical(d)
220 | return self.robs[oidx]
221 |
222 | def fully_obs_transition_pdf(self, d, x):
223 | if d[0] == x[0] and d[1] == x[1]:
224 | return 1.0
225 | else:
226 | return 0.0
227 |
228 | # only single rock configuration, always return 1
229 | def partially_obs_transition_pdf(self, d, y):
230 | if y == d:
231 | return 1.0
232 | else:
233 | return 0.0
234 |
235 | # pdf for observation prob
236 | def observation_pdf(self, d, dval):
237 | assert dval < 3, "Attempting to retrive pdf value larger than observation size"
238 | return d[dval]
239 |
240 | # numpy categorical sampling hack
241 | def categorical(self, d):
242 | return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0]
243 |
244 |
245 | #################################################################
246 | # Create functions
247 | #################################################################
248 | def create_fully_obs_transition_distribution(self):
249 | td = np.array([0,0]) # position of rover
250 | return td
251 |
252 | def create_partially_obs_transition_distribution(self):
253 | return deepcopy(self.rock_types)
254 |
255 | def create_observation_distribution(self):
256 | od = np.zeros(3) + 1.0/3 # none, good, bad
257 | return od
258 |
259 | def create_belief(self):
260 | return MOMDPBelief(self.n_rock_states)
261 |
262 | def initial_belief(self):
263 | return MOMDPBelief(self.n_rock_states)
264 |
265 | def initial_fully_obs_state(self):
266 | # returns a (0, y) tuple
267 | return (0, self.random_state.randint(self.xs+1))
268 |
269 | def initial_partially_obs_state(self):
270 | for (i, k) in enumerate(sorted(self.rocks.keys())):
271 | t = bool(self.random_state.randint(2))
272 | self.rock_types[i] = t
273 | self.rocks[k] = t
274 | return deepcopy(self.rock_types)
275 |
276 |
277 | #################################################################
278 | # Misc Functions
279 | #################################################################
280 | def isterminal(self, x, y):
281 | xpos, ypos = x
282 | if xpos > self.xs:
283 | return True
284 | return False
285 |
286 | def index2action(self, ai):
287 | return ai
288 |
289 | def is_check_action(self, a):
290 | return True if a > 4 else False
291 |
292 | def act2rock(self, a):
293 | return a - 5
294 |
295 | def n_xstates(self):
296 | return len(self.rover_states)
297 |
298 | def n_ystates(self):
299 | return len(self.rock_states)
300 |
301 | def n_actions(self):
302 | return len(self.ractions)
303 |
304 | def n_obsevations(self):
305 | return 2
306 |
307 |
308 | #################################################################
309 | # Policies
310 | #################################################################
311 |
312 | def heuristic_policy(self, sc):
313 | # takes in a screen shot, [x, b] array
314 | x = (sc[0], sc[1]) # x and y pos
315 | b = np.array(sc[2:]) # belief
316 | return self.heuristic(x, b)
317 |
318 | def heuristic(self, x, b):
319 | # if we are not confident, keep checking randomly
320 | if b.max() < self.h_conf:
321 | return self.random_state.randint(5, 5+self.k)
322 | else:
323 | ri = b.argmax() # index of highest confidence rock state
324 | y = self.rock_states[ri] # rock state
325 | # find closest good rock
326 | c = float('inf')
327 | ci = -1
328 | for (i, t) in enumerate(y):
329 | # if rock is good
330 | if t:
331 | # if on the rock sample
332 | if x == self.rock_pos[i]:
333 | return 4
334 | xrover = x[0]
335 | yrover = x[1]
336 | xrock, yrock = self.rock_pos[i]
337 | dist = math.sqrt((xrock-xrover)**2 + (yrock-yrover)**2)
338 | if dist < c:
339 | c = dist
340 | ci = i
341 | if ci > -1:
342 | return self.move_to(x, self.rock_pos[ci])
343 | # if no good rocks left move right
344 | return 1
345 |
346 | # action to move rover from origin o to target t
347 | def move_to(self, o, t):
348 | # vector components
349 | v = [t[0] - o[0], t[1] - o[1]]
350 | sa = float('inf')
351 | ai = 1
352 | # move in the direction that minimizes angle between action and target
353 | for (i, a) in enumerate(self.action_vectors):
354 | ang = angle(v, a)
355 | if ang < sa:
356 | sa = ang
357 | ai = i
358 | return ai
359 |
360 | def dotproduct(v1, v2):
361 | return sum((a*b) for a, b in zip(v1, v2))
362 |
363 | def length(v):
364 | return math.sqrt(dotproduct(v, v))
365 |
366 | def angle(v1, v2):
367 | return math.acos(dotproduct(v1, v2) / (length(v1) * length(v2)))
368 |
369 |
370 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/rock_test.py:
--------------------------------------------------------------------------------
1 | from rock_sample import RockSamplePOMDP
2 |
3 | pomdp = RockSamplePOMDP()
4 |
5 | x = pomdp.initial_fully_obs_state()
6 | y = pomdp.initial_partially_obs_state()
7 |
8 | tdx = pomdp.create_fully_obs_transition_distribution()
9 | tdy = pomdp.create_partially_obs_transition_distribution()
10 | od = pomdp.create_observation_distribution()
11 |
12 | for a in range(pomdp.n_actions()):
13 | print "Action ", x, y, a
14 | tdx = pomdp.fully_obs_transition(x, y, a, tdx)
15 | tdy = pomdp.partially_obs_transition(x, y, a, tdy)
16 | od = pomdp.observation(x, y, a, od)
17 | x = pomdp.sample_fully_obs_state(tdx)
18 | y = pomdp.sample_partially_obs_state(tdy)
19 | o = pomdp.sample_observation(od)
20 | print "Observation ", x, y, o
21 |
22 | b = pomdp.initial_belief()
23 |
24 | x = (1,1)
25 | a = 6
26 |
27 | od = pomdp.observation(x, y, a, od)
28 | o = pomdp.sample_observation(od)
29 |
30 | b.update(pomdp, x, a, o)
31 |
32 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/simulator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #################################################################
4 | # This file implements a pomdp simulator using the interface
5 | # defined in the README
6 | #################################################################
7 |
8 | class POMDPSimulator():
9 |
10 | # constructor
11 | def __init__(self, pomdp, robs=False):
12 | self.pomdp = pomdp
13 | self.current_state = pomdp.initial_state()
14 | self.current_action = None
15 | self.current_observation = np.array([-1])
16 | self.current_belief = pomdp.initial_belief()
17 | self.current_reward = 0.0
18 |
19 | self.robs = robs # returns observation or belief
20 |
21 | self.tdist = pomdp.create_transition_distribution()
22 | self.odist = pomdp.create_observation_distribution()
23 |
24 | self.n_actions = self.pomdp.n_actions()
25 | self.n_states = self.pomdp.n_states()
26 |
27 | if not robs:
28 | self.model_dims = self.pomdp.belief_shape
29 | else:
30 | self.model_dims = self.pomdp.observation_shape
31 |
32 | #@profile
33 | # progress single step in simulation
34 | def act(self, ai):
35 | pomdp = self.pomdp
36 | s = self.current_state
37 | b = self.current_belief
38 | tdist = self.tdist
39 | odist = self.odist
40 |
41 | a = pomdp.index2action(ai)
42 |
43 | r = pomdp.reward(s, a)
44 |
45 | tdist = pomdp.transition(s, a, tdist)
46 | s = pomdp.sample_state(tdist)
47 |
48 | odist = pomdp.observation(s, a, odist)
49 | o = pomdp.sample_observation(odist)
50 |
51 | b.update(pomdp, a, o)
52 |
53 | self.current_reward = r
54 | self.current_state = s
55 | self.current_observation = o
56 |
57 | # returns the current simulator belief
58 | def get_screenshot(self):
59 | if self.robs:
60 | return np.array([self.current_observation])
61 | else:
62 | return self.current_belief.new_belief()
63 |
64 | # returns the current reward
65 | def reward(self):
66 | return self.current_reward
67 |
68 | # check if reached terminal states
69 | def episode_over(self):
70 | return self.pomdp.isterminal(self.current_state)
71 |
72 | def reset_episode(self):
73 | pomdp = self.pomdp
74 | self.current_state = pomdp.initial_state()
75 | self.current_belief = pomdp.initial_belief()
76 |
77 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tiger.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from copy import deepcopy
3 | from tools.belief import DiscreteBelief
4 |
5 | #################################################################
6 | # Implements the Tiger POMDP problem
7 | #################################################################
8 |
9 | class TigerPOMDP():
10 |
11 | # constructor
12 | def __init__(self,
13 | seed=999, # random seed
14 | rlisten=-1.0, rtiger=-100.0, rescape=10.0, # reward values
15 | pcorrect=0.85, # correct observation prob
16 | discount=0.95): # discount
17 |
18 | self.random_state = np.random.RandomState(seed)
19 | self.rlisten = rlisten
20 | self.rtiger = rtiger
21 | self.rescape = rescape
22 | self.pcorrect = pcorrect
23 | self.discount = discount
24 |
25 | # transition arrs
26 | self.tstates = [0, 1] # left, right
27 |
28 | # actions
29 | self.tactions = [0, 1, 2] # open left, open right, listen
30 |
31 | # observations arrs
32 | self.tobs = [0, 1] # observed on the left, observed on the right
33 |
34 | # belief and observation shape
35 | self.belief_shape = (2,1)
36 | self.observation_shape = (1,1)
37 |
38 | #################################################################
39 | # Setters
40 | #################################################################
41 | def set_discount(self, d):
42 | self.discount = d
43 |
44 | def set_rewards(self, rl, rt, re):
45 | self.rlisten = rl
46 | self.rtiger = rt
47 | self.rescape = re
48 |
49 | def set_listen_prob(self, pc):
50 | self.pcorrect = pc
51 |
52 | #################################################################
53 | # S, A, O Spaces
54 | #################################################################
55 | def states(self):
56 | return self.tstates
57 |
58 | def actions(self):
59 | return self.tactions
60 |
61 | def observations(self):
62 | return self.tobs
63 |
64 | #################################################################
65 | # Reward Function
66 | #################################################################
67 | def reward(self, s, a):
68 | r = 0.0
69 | rt = self.rtiger
70 | re = self.rescape
71 | if a == 2:
72 | r += self.rlisten
73 | elif a == 1:
74 | r = (r + rt) if s == 1 else (r + re)
75 | else:
76 | r = (r + rt) if s == 0 else (r + re)
77 | return r
78 |
79 | #################################################################
80 | # Distribution Functions
81 | #################################################################
82 | # returns the transtion distriubtion of s' from the (s,a) pair
83 | def transition(self, s, a, dist):
84 | if a == 0 or a == 1:
85 | dist[0] = 0.5
86 | dist[1] = 0.5
87 | elif s == 0:
88 | dist[0] = 1.0
89 | dist[1] = 0.0
90 | else:
91 | dist[0] = 0.0
92 | dist[1] = 1.0
93 | return dist
94 |
95 | # sample the transtion distribution
96 | def sample_state(self, d):
97 | sidx = self.categorical(d)
98 | return self.tstates[sidx]
99 |
100 | # returns the observation dsitribution of o from the (s,a) pair
101 | def observation(self, s, a, dist):
102 | p = self.pcorrect
103 | if a == 2:
104 | if s == 0:
105 | dist[0] = p
106 | dist[1] = 1.0 - p
107 | else:
108 | dist[0] = 1.0 - p
109 | dist[1] = p
110 | else:
111 | dist[0] = 0.5
112 | dist[1] = 0.5
113 | return dist
114 |
115 | # sample the observation distirbution
116 | def sample_observation(self, d):
117 | oidx = self.categorical(d)
118 | return self.tobs[oidx]
119 |
120 | # pdf should be in a distributions module
121 | def transition_pdf(self, d, dval):
122 | assert dval < 2, "Attempting to retrive pdf value larger than state size"
123 | return d[dval]
124 |
125 | def observation_pdf(self, d, dval):
126 | assert dval < 2, "Attempting to retrive pdf value larger than state size"
127 | return d[dval]
128 |
129 | # numpy categorical sampling hack
130 | def categorical(self, d):
131 | return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0]
132 |
133 | #################################################################
134 | # Create functions
135 | #################################################################
136 | def create_transition_distribution(self):
137 | td = np.array([0.5, 0.5])
138 | return td
139 |
140 | def create_observation_distribution(self):
141 | od = np.array([0.5, 0.5])
142 | return od
143 |
144 | def create_belief(self):
145 | return DiscreteBelief(self.n_states())
146 |
147 | def initial_belief(self):
148 | return DiscreteBelief(self.n_states())
149 |
150 | def initial_state(self):
151 | return self.random_state.randint(2)
152 |
153 | #################################################################
154 | # Misc Functions
155 | #################################################################
156 |
157 | def isterminal(self, s):
158 | # no terminal state in model
159 | return False
160 |
161 | def index2action(self, ai):
162 | return ai
163 |
164 | def n_states(self):
165 | return 2
166 |
167 | def n_actions(self):
168 | return 3
169 |
170 | def n_obsevations(self):
171 | return 2
172 |
173 | #################################################################
174 | # Policies
175 | #################################################################
176 |
177 | def optimal_policy(self):
178 | def pol(b):
179 | if b[0] < 0.04:
180 | return 0
181 | elif b[0] > 0.96:
182 | return 1
183 | else:
184 | return 2
185 | return pol
186 |
187 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # init file
2 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/belief.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from copy import deepcopy
3 |
4 | #################################################################
5 | # Implements Belief and Belief Updater
6 | #################################################################
7 |
8 | class DiscreteBelief():
9 |
10 | def __init__(self, n):
11 | self.bold = np.zeros(n) + 1.0/n
12 | self.bnew = np.zeros(n) + 1.0/n
13 | self.n = n
14 |
15 | def __getitem__(self, idx):
16 | return self.bnew[idx]
17 |
18 | def __setitem__(self, idx, val):
19 | self.bold[idx] = val
20 | self.bnew[idx] = val
21 |
22 | def update(self, pomdp, a, o):
23 |
24 | # swap pointers
25 | (bnew, bold) = (self.bold, self.bnew)
26 |
27 | sspace = pomdp.states()
28 |
29 | td = pomdp.create_transition_distribution()
30 | od = pomdp.create_observation_distribution()
31 |
32 | # old belief is now new, new is fresh
33 | bnew.fill(0.0)
34 |
35 | for (i, sp) in enumerate(sspace):
36 | # get the distributions
37 | od = pomdp.observation(sp, a, od)
38 | # get the prob of o from the current distribution
39 | probo = pomdp.observation_pdf(od, o)
40 | # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero
41 | if probo == 0.0:
42 | continue
43 | b_sum = 0.0 # belef for state sp
44 | for (j, s) in enumerate(sspace):
45 | td = pomdp.transition(s, a, td)
46 | pp = pomdp.transition_pdf(td, sp)
47 | b_sum += pp * bold[j]
48 | bnew[i] = probo * b_sum
49 | norm = sum(bnew)
50 | for i in range(self.length()):
51 | bnew[i] /= norm
52 | (self.bnew, self.bold) = (bnew, bold)
53 | return self
54 |
55 | def length(self):
56 | return self.n
57 |
58 | def empty(self):
59 | self.bold.fill(0.0)
60 | self.bnew.fill(0.0)
61 |
62 | def empty_old(self):
63 | self.bold.fill(0.0)
64 |
65 | def empty_new(self):
66 | self.bnew.fill(0.0)
67 |
68 | def old_belief(self):
69 | return self.bold
70 |
71 | def new_belief(self):
72 | return self.bnew
73 |
74 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/belief_momdp.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from copy import deepcopy
3 |
4 | #################################################################
5 | # Implements Belief and Belief Updater
6 | #################################################################
7 |
8 | class MOMDPBelief():
9 |
10 | def __init__(self, n):
11 | self.bold = np.zeros(n) + 1.0/n
12 | self.bnew = np.zeros(n) + 1.0/n
13 | self.n = n
14 |
15 | def __getitem__(self, idx):
16 | return self.bnew[idx]
17 |
18 | def __setitem__(self, idx, val):
19 | self.bold[idx] = val
20 | self.bnew[idx] = val
21 |
22 | def update(self, pomdp, x, a, o):
23 |
24 | # swap pointers
25 | (bnew, bold) = (self.bold, self.bnew)
26 |
27 | yspace = pomdp.partially_obs_states()
28 |
29 | tdp = pomdp.create_partially_obs_transition_distribution()
30 | od = pomdp.create_observation_distribution()
31 |
32 | # old belief is now new, new is fresh
33 | bnew.fill(0.0)
34 |
35 | # iterate
36 | for (i, yp) in enumerate(yspace):
37 | # get the distributions
38 | od = pomdp.observation(x, yp, a, od)
39 | # get the prob of o from the current distribution
40 | probo = pomdp.observation_pdf(od, o)
41 | # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero
42 | if probo == 0.0:
43 | continue
44 | b_sum = 0.0 # belef for state sp
45 | for (j, y) in enumerate(yspace):
46 | tdp = pomdp.partially_obs_transition(x, y, a, tdp)
47 | pp = pomdp.partially_obs_transition_pdf(tdp, yp)
48 | b_sum += pp * bold[j]
49 | bnew[i] = probo * b_sum
50 | norm = sum(bnew)
51 | for i in xrange(self.length()):
52 | bnew[i] /= norm
53 | (self.bnew, self.bold) = (bnew, bold)
54 | return self
55 |
56 | def length(self):
57 | return self.n
58 |
59 | def empty(self):
60 | self.bold.fill(0.0)
61 | self.bnew.fill(0.0)
62 |
63 | def empty_old(self):
64 | self.bold.fill(0.0)
65 |
66 | def empty_new(self):
67 | self.bnew.fill(0.0)
68 |
69 | def old_belief(self):
70 | return self.bold
71 |
72 | def new_belief(self):
73 | return self.bnew
74 |
75 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/distributions.py:
--------------------------------------------------------------------------------
1 | #################################################################
2 | # Implements distriubtions for POMDP models
3 | #################################################################
4 |
5 | import numpy as np
6 | from copy import deepcopy
7 |
8 | class Categorical():
9 |
10 | def __init__(self, n):
11 | self.indices = np.zeros(n, dtype=np.int64)
12 | self.weights = np.zeros(n) + 1.0/n
13 | self.n = n
14 |
15 | def __getitem__(self, idx):
16 | return (self.indices[idx], self.weights[idx])
17 |
18 | def __setitem__(self, idx, val):
19 | self.
20 |
21 | def sample(self):
22 | idx = self.quantile(np.random.rand())
23 | return self.indices[idx]
24 |
25 |
26 | def quantile(self, p):
27 | k = self.n
28 | pv = self.weights
29 | i = 1
30 | v = pv[1]
31 | while v < p and i < k:
32 | i += 1
33 | v += pv[i]
34 | return i
35 |
36 |
--------------------------------------------------------------------------------
/chimp/simulators/pomdp/sim_loop.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 | from models.tiger import TigerPOMDP
4 | from models.simulator import POMDPSimulator
5 |
6 | #####################################################################
7 | # This is a sample simulation loop for the DRL framework using POMDPs
8 | #####################################################################
9 |
10 | # initialize pomdp
11 | pomdp = TigerPOMDP(seed=1)
12 |
13 | # initialize and pass the pomdp into simulator
14 | sim = POMDPSimulator(pomdp) # state and initial belief automatically initialized
15 |
16 | sim.n_states # number of states-input layer size
17 |
18 | opt = pomdp.optimal_policy()
19 |
20 | steps = 50000
21 |
22 | rtot = 0.0
23 |
24 | for i in xrange(steps):
25 | # get the initial state
26 | s = sim.get_screenshot()
27 | # pick random action
28 | #ai = np.random.randint(sim.n_actions)
29 | # pick optimal aciton
30 | ai = opt(s)
31 |
32 | # progress simulation
33 | sim.act(ai)
34 |
35 | # get reward and next states
36 | r = sim.reward() # real valued reward
37 | sp = sim.get_screenshot() # pomdp state, this is a belief
38 |
39 | print "Step: ", i
40 | #print "Action ", ai, " Reward: ", r, " Screen Shot: ", sp
41 | #print "Current State: ", sim.current_state, " Current Belief: ", sim.current_belief.bnew, "\n"
42 |
43 | rtot += r
44 |
45 | # check if reached terminal state
46 | if sim.episode_over():
47 | sim.reset_episode()
48 |
49 | print "Total reward: ", rtot
50 |
--------------------------------------------------------------------------------
/chimp/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Dummy File
3 | """
4 |
--------------------------------------------------------------------------------
/chimp/utils/distributions.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | #################################################################
4 | # Implements helper functions
5 | #################################################################
6 |
7 | def categorical(p, rng):
8 | """
9 | Draws multinomial samples from distribution p
10 | """
11 | return np.argmax(rng.multinomial(1,p))
12 |
13 | def softmax(z):
14 | """
15 | Computes softmax values for each Q-value in x
16 | """
17 | # TODO: extend to multi-dimensional input?
18 | ex = np.exp(z - np.max(z))
19 | return ex / np.sum(ex)
20 |
--------------------------------------------------------------------------------
/chimp/utils/policies.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from chimp.utils.distributions import *
4 |
5 | #################################################################
6 | # Implements DQN controllers
7 | #################################################################
8 |
9 |
10 | class DQNPolicy():
11 | """
12 | Class that handles policies generated by the DQN
13 | """
14 |
15 | def __init__(self, learner):
16 | self.learner = learner
17 |
18 | def action(self, obs):
19 | """
20 | Returns the actions with the highes Q value given observation obs
21 | """
22 | q_vals = self.learner.forward(obs)
23 | return np.argmax(q_vals)
24 |
25 |
26 | class StochasticDQNPolicy():
27 | """
28 | Generates actions stochastically according to Q-vals
29 | Network output is turned into probs using softmax
30 | """
31 |
32 | def __init__(self, learner, seed=None):
33 | self.learner = learner
34 | self.rng = np.random.RandomState(seed)
35 |
36 | def action(self, obs):
37 | """
38 | Returns the action according to probs generated by taking softmax over Qs
39 | """
40 | q_vals = self.learner.forward(obs)
41 | q_probs = softmax(q_vals)
42 | return categorical(q_probs[0], self.rng)
43 |
44 |
45 | class EpsGreedyPolicy():
46 | """
47 | Epsilon greedy policy
48 | """
49 |
50 | def __init__(self, policy, n_actions, eps, seed=None):
51 | self.polciy = policy
52 | self.n_actions = n_actions
53 | self.eps = eps
54 | self.rng = np.random.RandomState(seed)
55 |
56 | def action(self, obs):
57 | if self.rng.rand() < self.eps:
58 | return self.rng.randint(self.n_actions)
59 | else:
60 | return self.policy.action(obs)
61 |
62 |
63 | class RandomPolicy():
64 |
65 | # constructor
66 | def __init__(self, n_actions, rng = np.random.RandomState()):
67 | self.rng = rng
68 | self.n_actions = n_actions
69 |
70 | def action(self, obs):
71 | return self.rng.randint(self.n_actions)
72 |
73 |
74 | class OneStepLookAhead():
75 |
76 | # constructor
77 | def __init__(self, simulator, n_rollouts=100):
78 | self.simulator = simulator
79 |
80 | def action(self, obs):
81 | # run each action n_rollouts times, take the highest average
82 | pass
83 |
84 |
85 | class SingleAction():
86 | """
87 | Dummy single action policy
88 | """
89 |
90 | def __init__(self, a):
91 | self.a = a
92 |
93 | def action(self, obs):
94 | return self.a
95 |
--------------------------------------------------------------------------------
/examples/atari_tutorial.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "# be sure to have run ' python setup.py ' from chimp director"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "# Training DeepMind's Atari DQN with Chimp"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {},
24 | "source": [
25 | "Load Chimp modules"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 2,
31 | "metadata": {
32 | "collapsed": false
33 | },
34 | "outputs": [],
35 | "source": [
36 | "from chimp.memories import ReplayMemoryHDF5\n",
37 | "\n",
38 | "from chimp.learners.dqn_learner import DQNLearner\n",
39 | "from chimp.learners.chainer_backend import ChainerBackend\n",
40 | "\n",
41 | "from chimp.simulators.atari import AtariSimulator\n",
42 | "\n",
43 | "from chimp.agents import DQNAgent"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "Load Python packages"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 3,
56 | "metadata": {
57 | "collapsed": true
58 | },
59 | "outputs": [],
60 | "source": [
61 | "%matplotlib inline \n",
62 | "import matplotlib.pyplot as plt\n",
63 | "\n",
64 | "import numpy as np\n",
65 | "import random\n",
66 | "import chainer\n",
67 | "import chainer.functions as F\n",
68 | "import chainer.links as L\n",
69 | "from chainer import Chain\n",
70 | "import os\n",
71 | "\n",
72 | "import pandas as ps"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "Set training parameters"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 4,
85 | "metadata": {
86 | "collapsed": true
87 | },
88 | "outputs": [],
89 | "source": [
90 | "settings = {\n",
91 | "\n",
92 | " # agent settings\n",
93 | " 'batch_size' : 32,\n",
94 | " 'print_every' : 5000,\n",
95 | " 'save_dir' : './results_atari',\n",
96 | " 'iterations' : 5000000,\n",
97 | " 'eval_iterations' : 5000,\n",
98 | " 'eval_every' : 50000,\n",
99 | " 'save_every' : 50000,\n",
100 | " 'initial_exploration' : 50000,\n",
101 | " 'epsilon_decay' : 0.000005, # subtract from epsilon every step\n",
102 | " 'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions\n",
103 | " 'epsilon' : 1.0, # Initial exploratoin rate\n",
104 | " 'learn_freq' : 4,\n",
105 | " 'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r)\n",
106 | " 'model_dims' : (84,84),\n",
107 | " \n",
108 | " # Atari settings\n",
109 | " 'rom' : \"Breakout.bin\",\n",
110 | " 'rom_dir' : './roms',\n",
111 | " 'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square\n",
112 | " 'action_history' : True,\n",
113 | "\n",
114 | " # simulator settings\n",
115 | " 'viz' : True,\n",
116 | " 'viz_cropped' : False,\n",
117 | "\n",
118 | " # replay memory settings\n",
119 | " 'memory_size' : 1000000, # size of replay memory\n",
120 | " 'frame_skip' : 4, # number of frames to skip\n",
121 | "\n",
122 | " # learner settings\n",
123 | " 'learning_rate' : 0.00025, \n",
124 | " 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used\n",
125 | " 'discount' : 0.99, # discount rate for RL\n",
126 | " 'clip_err' : False, # value to clip loss gradients to\n",
127 | " 'clip_reward' : 1, # value to clip reward values to\n",
128 | " 'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations\n",
129 | " 'optim_name' : 'RMSprop', # currently supports \"RMSprop\", \"ADADELTA\", \"ADAM\" and \"SGD\"'\n",
130 | " 'gpu' : True,\n",
131 | " 'reward_rescale': False,\n",
132 | "\n",
133 | " # general\n",
134 | " 'seed_general' : 1723,\n",
135 | " 'seed_simulator' : 5632,\n",
136 | " 'seed_agent' : 9826,\n",
137 | " 'seed_memory' : 7563\n",
138 | "\n",
139 | " }"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "You may want to set a smaller number of iterations (like 100000) - for illustration purposes. We set the GPU option to True, turn it off if your machine does not support it. Be sure to have the requested rom in the indicated directory."
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 5,
152 | "metadata": {
153 | "collapsed": true
154 | },
155 | "outputs": [],
156 | "source": [
157 | "# set random seed\n",
158 | "np.random.seed(settings[\"seed_general\"])\n",
159 | "random.seed(settings[\"seed_general\"])"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "Now we initialize the simulator first, as we need to use some information it provides - e.g., number of actions."
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 6,
172 | "metadata": {
173 | "collapsed": false
174 | },
175 | "outputs": [
176 | {
177 | "name": "stdout",
178 | "output_type": "stream",
179 | "text": [
180 | "Original screen width/height: 160/210\n",
181 | "Cropped screen width/height: 84/84\n"
182 | ]
183 | }
184 | ],
185 | "source": [
186 | "simulator = AtariSimulator(settings)"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "Here we define the convolutional network, in a format required by Chainer - the deep learning library we use."
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 7,
199 | "metadata": {
200 | "collapsed": false
201 | },
202 | "outputs": [],
203 | "source": [
204 | "#Define the network\n",
205 | "class Convolution(Chain):\n",
206 | "\n",
207 | " def __init__(self):\n",
208 | " super(Convolution, self).__init__(\n",
209 | " l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),\n",
210 | " l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),\n",
211 | " l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),\n",
212 | " l4=F.Linear(3136, 512, wscale = np.sqrt(2)),\n",
213 | " l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)),\n",
214 | " )\n",
215 | "\n",
216 | " def __call__(self, ohist, ahist):\n",
217 | " if len(ohist.data.shape) < 4:\n",
218 | " ohist = F.reshape(ohist,(1,4,84,84))\n",
219 | " h1 = F.relu(self.l1(ohist/255.0))\n",
220 | " h2 = F.relu(self.l2(h1))\n",
221 | " h3 = F.relu(self.l3(h2))\n",
222 | " h4 = F.relu(self.l4(h3))\n",
223 | " output = self.l5(h4)\n",
224 | " return output\n",
225 | "\n",
226 | "net = Convolution()"
227 | ]
228 | },
229 | {
230 | "cell_type": "markdown",
231 | "metadata": {},
232 | "source": [
233 | "We then initialize the learner + chainer backend, replay memory, and agent modules."
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 8,
239 | "metadata": {
240 | "collapsed": false
241 | },
242 | "outputs": [],
243 | "source": [
244 | "backend = ChainerBackend(settings)\n",
245 | "backend.set_net(net)\n",
246 | "learner = DQNLearner(settings, backend)"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 9,
252 | "metadata": {
253 | "collapsed": false
254 | },
255 | "outputs": [],
256 | "source": [
257 | "memory = ReplayMemoryHDF5(settings)"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 10,
263 | "metadata": {
264 | "collapsed": false
265 | },
266 | "outputs": [],
267 | "source": [
268 | "agent = DQNAgent(learner, memory, simulator, settings)"
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "Now let the agent train."
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": null,
281 | "metadata": {
282 | "collapsed": true
283 | },
284 | "outputs": [],
285 | "source": [
286 | "agent.train()"
287 | ]
288 | },
289 | {
290 | "cell_type": "markdown",
291 | "metadata": {},
292 | "source": [
293 | "# Visualizing results"
294 | ]
295 | },
296 | {
297 | "cell_type": "markdown",
298 | "metadata": {},
299 | "source": [
300 | "First, let's visualize the training and evaluation results."
301 | ]
302 | },
303 | {
304 | "cell_type": "code",
305 | "execution_count": 11,
306 | "metadata": {
307 | "collapsed": false
308 | },
309 | "outputs": [],
310 | "source": [
311 | "train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n",
312 | "train_stats.columns = ['Iteration','MSE Loss','Average Q-Value']"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 12,
318 | "metadata": {
319 | "collapsed": true
320 | },
321 | "outputs": [],
322 | "source": [
323 | "eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n",
324 | "eval_stats.columns = ['Iteration','Total Reward','Reward per Episode']"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 13,
330 | "metadata": {
331 | "collapsed": false
332 | },
333 | "outputs": [
334 | {
335 | "data": {
336 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEPCAYAAACHuClZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXmcHGWd/z/fyTGTyTAJ5CIhCZNwJJAAAUSOoGk8EFbQ\nCCJEFMKiyyrnqiisyrGrrC7+WGEFFEUCIuoKIoqC4UiLIGdCIDeEMBNCJoeZkExmckwy398f33pS\n1dVV1dV19DH9fb9e/Zqu6u6qp5+pfj71vZ6HmBmKoiiKEoe6cjdAURRFqX5UTBRFUZTYqJgoiqIo\nsVExURRFUWKjYqIoiqLERsVEURRFiU2qYkJEY4noaSJaQkSLiOhya//1RLSGiBZYj9PSbIeiKIqS\nLpRmnQkR7Q9gf2ZeSERNAOYD+CSAcwF0MvMtqZ1cURRFKRn90zw4M68DsM56vo2IlgE4wHqZ0jy3\noiiKUjpKFjMhohYA0wC8aO26jIgWEtHPiGhIqdqhKIqiJE9JxMRycT0I4Epm3gbgDgATmXkaxHJR\nd5eiKEoVk2rMBACIqD+ARwE8xsy3erx+IIA/MvORHq/pxGGKoigRYOaShhJKYZn8HMBSp5BYgXnD\nWQAW+32YmfXBjOuvv77sbaiUh/aF9oX2RfCjHKQagCei6QDOB7CIiF4FwAD+HcBniWgagF4ArQAu\nSbMdfYHW1tZyN6Fi0L6w0b6w0b4oL2lncz0HoJ/HS4+neV5FURSltGgFfJUwe/bscjehYtC+sNG+\nsNG+KC+pB+DjQERcye1TFEWpRIgI3AcD8EoCZLPZcjehYtC+sNG+sNG+KC8qJoqiKEps1M2lKIrS\nx1A3l6IoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+\nsNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGai\nKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoP\nttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEps\nNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhU\nCeoPttG+sNG+sNG+KC8qJoqiKAmwdi1w883lbkX5SDVmQkRjAdwHYBSAXgA/ZebbiGhfAL8BcCCA\nVgCfYeYtHp/XmImiKFVBNgt8/evASy+VuyV9M2ayG8BXmHkKgBMBXEpEkwFcA+BJZp4E4GkA16bc\nDkVRlFTp6QF27ix3K8pHqmLCzOuYeaH1fBuAZQDGAvgkgHutt90LYGaa7egLqD/YRvvCRvvCptx9\nsWuXiklJIKIWANMAvABgFDOvB0RwAIwsVTsURVHSoKcH2LGj3K0oH/1LcRIiagLwIIArmXkbEbkD\nIb6BkdmzZ6OlpQUAMHToUEybNg2ZTAaAfSdSC9uZTKai2qPblbNtqJT2lGvb7CvX+V99NYutWwGg\n9OfPZrOYM2cOAOwdL0tN6kWLRNQfwKMAHmPmW619ywBkmHk9Ee0PYB4zH+bxWQ3AK4pSFfzyl8Dl\nlwMdHeVuSd8MwAPAzwEsNUJi8QcAs63nFwJ4pATtqGrcd6G1jPaFjfaFTbF9sXw58JOfJHd+jZmk\nCBFNB3A+gA8R0atEtICITgPwfQAfJaIVAD4M4HtptkNRFMXN/PnAHXckd7xaz+bSubkURalJfvxj\n4MorgW3bgAED4h/vRz8SN9fu3UC/fvGPF4e+6uZSFEWpODo7xTX15pvJHK+nR/7WqnVSUExI+BwR\nXWdtjyei96ffNMWJ+sZttC9stC9sTF/Mmwf8138Vfn9np/xdtCiZ8+/aJX9VTPy5A1K9Psva7gRw\ne2otUhSlaFpbgd/9rtytqAzmzwcWLiz8vs5OoLk5OTFRy6QwxzPzpQB2AAAzbwYwMNVWKXk4c+lr\nHe0LG9MXf/0rcPfd5W1LuTF90d5uWwlBbNsGnHBC8pZJrRYuhhGTHiLqB6uwkIhGQCZtVBSlQti0\nyXbb1Drt7eGsg85O4MQTgcWLkzmvWiaFuQ3AwwBGEtF3ATwL4KZUW6Xkob5xG+0LG9MXHR1yp13L\nmL4oRkyOPlren0Tf1XrMpOB0Ksz8SyKaD6kHIQAzmXlZ6i1TFCU0apnYtLcDI0YUfl9nJzB0KDB5\nMrBkCXD88fHOW+uWia+YENF+js0NAH7lfI2ZK2DSgNpB4wQ22hc2pi82bVLLxBkzaW4u/P7OTmCf\nfYCpUyVuEldM1DLxZz4kTkIAxgPYbD0fCmA1gAmpt05RlFB0dKhlAgDd3cDWreEG9G3bREyOOCKZ\nIHytWya+MRNmnsDMEwE8CeBMZh7OzMMAnAFgbqkaqAgaJ7DRvrAxfbFpE9DVBfTWcGpMNpvFunXy\nPEw2l7FMjjgimSB8rVsmYQLwJzDzn80GMz8G4KT0mqQo0fjgB2vX1bNpk/zt6ipvO8pNezswalT4\nALxaJskRRkzWEtG3iKjFenwTwNq0G6bkonECG7++ePllGUxqCdMXHR1AU1Ntu7oymQza24EDDyw8\noO/ZI/UgjY3AmDEyn9b69fHOv2sX0L+/ikkQswCMgKQHPwxZFXFW4CcUpcQwy+CwcWO5W1J6du6U\ngWz06NoWEwB7xaSQm2vbNmDwYIBIHklYJz09Yulo0aIPzNzBzFcC+CCADzDzlZrJVXo0TmDj1Rdm\n8Kg1Mclms9i0CdhvPxnIatXNB0hftLcDLS2FrQMTfDdMnRo/brJrl1iHapn4QERHENGrABYDWEJE\n84loavpNU5TwmLvBDRvK245y0NEBDBumbi4Aod1cJl5iSNIyUTHx5ycAvsLMBzLzgQC+CuCudJul\nuNGYiY1XXxgxSdIyufji+H70tMlkMti0ScRkn31qW0ycMZNdu8T16UcaYrJrl4pJIQYz8zyzwcxZ\nAINTa5GiRCANMfnLX4DXXkvueGmhbi6b9nbggAMkEG6yq7zo7BRLzjB1KrB0abzUarVMCrOKiL7t\nyOb6FoBVaTdMyUVjJjZefZGGmHR2AitXJne8NMhms+rmsjAxk9Gjgfr64EHdbZkMGSJ/46RWa8yk\nMP8Myeb6nfUYbu1TFADA3LnAXWV2fCYtJsxyl//WW8kcL03UzSXs3g28957MyzVwYHBGl1tMAEkT\n7u6Ofn61TArAzJuZ+QpmPgbAcQCus9Y0UUpIJcdM/v534LnnSnc+v5jJgAHJiUl3t7g8Kt0yMTGT\nqG6uww+XAH5f4LDDMhg+XNZfL2SZuLO5AGDQIGD79ujnV8ukAET0ABE1E9FgAIsALCWiq9NvmlIt\nrF1b/jviHTuAsWOTy+Yyg3I1WCZR3VzbtwPLlgFvvJFe20qJcXEBxbu5ALVM4hLGzXU4M28FMBPA\nY5AJHj+faquUPCo5ZtLeXlox8YuZjBsnlklQFk9YOjuBkSOBVasqe74rU2cSxc1lZguoBsEMw9y5\nWey/vzwP4+ZyBuCB5CwTLVr0ZwARDYCIyR+YuQfWqouKAlSOZTJ0qAwiSbSls1PucpubK3+Klqhu\nrrXWpEh9RUw2bYpnmQwapJZJHMLWmbRC0oGfIaIDAWxNs1FKPpUcMym1mPjFTBoaJPiaRNzEDDYH\nHVTZg20mk4ns5mpvl6lEKj0uFJbm5kxsN5fGTKITJgB/GzMfwMz/xEIbgFNK0DYlQc46C3un506S\n3bslTlEJlkktigkQPZtr7Vop1qv07xcWZ8ykkJvLLwCvlkl0fMWEiD5n/f2K+wHgipK1UAEQP2by\nt78Ba9Yk0xYnGzZIFlUlxEzSEJODDy79nfvddwNtbeHeO29e9Lm52tuBD3yg74jJokXZslsmKibe\nmCr3fXweSpXADGzeDGzZkvyx29tlwO3sTCbwHRUjJiNHJpPRZe5cS22Z9PYC114LPPtsuPeblOiG\nhmhurmOOkZUJy21ZJkESMZOoYsIsVnotu7l8l+1l5p9Yf28sXXNqF3MxDhjg/XqcmElnp6zfkIaY\nrF0rs7S++aYMbIMGxT9mlL4wYtLQUN2Wyfz50v6wVuThh2cwbJg8j+LmOuAAYOJEyVo76qji21tJ\ndHVlQru5vLK54qQG9/TI9VpIxPoyYepMJhLRH4loIxFtIKJHiGhiKRpXSzz5JHDOOekc2xSlpSUm\nY8YkW339pz8Bny8y+Xz79uTdXE1NpbdMHn9cMsjCiolxcQHR3FyjR1dHXKgQvb0yKadJDS61ZaJi\nEi6b6wEA/wdgNIAxAH4L4FdpNqoWWb48eACJEzOJKiZ/+ANwzz3B7zEDUpJismEDsGSJ/+uljJkM\nGyYDVamqxB97DPjsZ8OLybx52b2WyeDBIiZh3Y3t7XIjUI64UNJ0dAD19VnU18t2lAr4OJbJrl1i\nDamYBNPIzL9g5t3W434ADWk3rNZobU1vwIoqJi+/LIISRBqWSWenuF2KicGkJSZEpRtsOzpkgaZZ\ns8KLyZYt2Csm/fvLYBZmQNyxQwbUYcP6hmXS3m73AxBtbq6kLBMtWvTnMSK6xpox+EAi+jqAPxPR\nfkS0X9oNrBVaW8Vl4UecmElUMdmyJdhCAERMkrZMOjtlQPRbS6QUdSbOO9dSDbZPPAF88IMiXmHF\nZP/9M3vdXEB4V9e6deISIpLvl7ZYMqfbh5IIktm7HWQhONd/d1KtlsnOnUAlTJDhG4B38Bnr7yWu\n/edBKuE1fpIAbW2SVWPucJKko0OOGUVM3nrLHqi9MK6SpMUEEOvE+MALkXQ2l/POtVRi8thjwOmn\nA6NGyY2FGaCCMAWLBpPRNWpU8OecNRkHH5z+93vzTeDYY0XEBqewGpLz+wDBg7pz/Xcn1Roz2bgR\n+Nzn0kn9L4YwRYsTAh4qJAnR2ioXo5+rK27MZPz4aGLS2wusWOH/nrTcXID/ABcmZhI3TdmZ7VMK\nN1dvrwTfTz9dZr3df/9w07gsXJjNEZOw/wdjUQJybbS3pzsIbtsmjwcfLPzeKDcD7e1AT09273aQ\nm8vLxQXEK1o0wt/QUHox2b49mSzKuAQVLX7d8fwc12s3pdmoWmPbNrmIJ04MdnVFZfNmYMKEaGIy\nbJi/q2v3bmnvyJHJi8n48WKZhMWISWMjUFcXb5Ej04ZSWiavvSZZXBOt27OxY8PdaW7dikhuLmNR\nAnITM3as3NAYVq0Cvv/90M0vSHe3iOTPfx78vrfflmu12PihO2YSZCH4iUmcosVyWiYVLyYQN5bh\nWtdrp4U5OBHdTUTrieh1x77riWgNES2wHqGO1Zdpa5N1q4cP9xeTuDGTqGJy0kmynKkXGzbID7h/\n/+TF5Kij/AfwoJgJkEzcxDnglMIyMS4uQ1gxGTgw4+nmKoTTMgHyXV133imV+EmxfTswfbpkLb75\npv/7VqwQ4Sn23FLNn9m7XcjNlZZl0r+/WJl79kQ7ThSqQUzI57nXth/3APiYx/5bmPkY6/F4yGP1\nWVpbpfBv2LB0LJOODjl+sWLy3nsiJn6WiXNAam5OTky2bRMxiWKZAMmLyZgx0hdxrZ0gHn8cOM1x\nWxVWTMy8XIawou6OMTitr927gfvvB955J7lZDbq7ZVbnz30OmDPH/31vvgkcdxxw++3FDcitrWLN\nGqK4uZKwTIhKb51Ug5iwz3Ovbe8DMD8LwGtVxrBiVBMYyyRITOLGTNKwTJyukqQtk2nTosVMgGTE\nxHn3Wlcn/VeMuBVDdzewYAHgNLjCisk772Rju7mA3IyuuXPl5qOxEfjHP/I/+9OfBguCF93dMuBd\ndBFw773+QrFyJXDeeSJ0f/xjuGMzi0WzcWN2775Cbi539TsQLwDvTJZQMcnnKCLaSkSdAI60npvt\nI2Ke9zIiWkhEPyOiITGPVfVUomXCLP74970PWL3aO3feaZkkLSaTJok1ENbt4BSTuBldZv13Z9ZR\nmq6ud9+V7CvngFBMzCRpN9ecOcDs2XKnv3p1/mefegq47jq5Gw/L9u0iTlOniog98YT3+958Ezjk\nEOCKK4Dbbgt37HXrZCAf4hhJosZM4k6nUujcaVDxYsLM/Zi5mZn3Yeb+1nOzHSd59Q4AE5l5GoB1\nAG6Jcaw+QWtrYcskbsxk7Fi54IMKuZx0d8uPo7FRgsJeGV0mkwtIXkyam0UAvayBtGMm3d0yIPR3\nJM6nWYuxfn1+Kq+XmPT2AjfdZK/82NsLdHdnsO++9nuKcXN5WSYdHWKZnHuuiMk77+R/9u23pb9/\n85tw3w+QPjV1Hf/8z/6B+JUrRdjOPlviK4sWFT72ihXA5Mm510XUbK6kLJNSFi4aq6/chKkzSRRm\ndv7Mfwog0JidPXs2WlpaAABDhw7FtGnT9l40xt1R7dttbRm0tADz52exbBkAJHv8jg4J0jY2ZvHn\nPwMzZxb+/JYtQENDFtmsTCa4dCmweXPu++fPz+LQQ6W9++wDvP22vD9uezs75XhDh2bxyCPA1KmF\nP79jh6TJrl8PjBiRwcaN0c8/ebKc3/n6pEnAH/+YxXHHJf//37Qpg1Gjcl8fOxZYuTK3P++/P4tv\nfhOYPj2DGTOARx/NoqEB6N/fPt6GDcC++waf76ST5P+7eHEWdXXyukz2mMUNNwCnn57B0KFAXV0W\nTz2Vf728/XYG3/secP31WRxwAHDKKfbrK1cCF1+cAVHu+bu7xQ2VzQLnnZfBNdcAjzySxZAhdvue\neiqL1lZg4sQMBg4EPvaxLK69Fnj00eDvs3y5/H+cr9fXA6tXe1+P27bl/38BuX42bwai/P56eoAt\nW+R89fUZq5Aw/OfjbG/fnsF772Uxe/YcANg7XpYcZk71AaAFwCLH9v6O5/8G4IGAz3ItMHIk89q1\nzA89xDxzpvd75s2bF+nY3d3M9fXMvb3MEyYwv/lmuM8tXco8aZI8v/565m9+M/89H/848yOPyPOn\nnmKeMSNSE3Po7WWuq2PetYv58suZb7kl/z1efTFqlPQhM/PPf8584YXR2/DGG8wHHZS775lnmE84\nofhj7dlT+D233858ySW5+3btYh4wgLmnx953//2yb/Zsu52jR8/L+dyPfsT8pS8Fn6+tjfmAA/L3\njxnDPG4c8+OPy/b3v8/81a/mvqezk7mhQb7XlCnMc+farz30EDMR8+LF+ce+4Qbm666zt08/nfnh\nh3Pfs3Il84EH2tvr1jEPHcq8aVPw97nqKuabb869Ln77W+azzvJ+/7e/zXzjjfn7OzqYhwwJPpcf\nDz5on2/KFObXXot2nCjceivzZZfl7rPGztTHd+cjzHQqkSGiBwD8HcChRLSaiC4C8N9E9DoRLQQw\nwxKUmmX7dolljBqVTsxk82apQyASn3LYuMmWLZJ9AwCHH+4dhE8jAG/W5xgwQFwvYYPeSbq5vFJH\nTR8Uk920ZAkwZUrhz5ipTZwMGCCp4s7VMV95BbjsMuDhh6WNHR3iDnQS5v/gdnEZDjpIAuMf+Yhs\ne7m5jEu2rg742teAm2+W/c8/D1xyiXwPrwQAp5sLkGr4+fNz32PiJYZRo4AjjwQWLgz+PsuXS4zN\nSRQ3V1Ixk1IXLlZ8zAQAiKgfEc2LenBm/iwzj2HmemYez8z3MPMFzHwkM09j5pnM7DMDU23Q1iY/\n2rq6dGImHR3Y61MvRkzee88OaE6Z4p0enEYA3vlDnzjRO6MrTMwkTgDea7AZNkx+sO++G/44P/uZ\nDHSFBNErZgLkx01eeQU44wxZHfGhh+RaaWnJ5HymqalwNpc7LdgwaRJwwQVSXAgA48blB+BbWyWz\nDZDZjZcular2s84C7rtPjuE1ILv9+n5icvDBufsGDy4cx/CKmUTJ5ho4UNKid+8OPp8Xms1VQEyY\neQ+AXs24Sg9zpwekY5l0dNgV0sVaJkZMDjlEBhXnD2T3bkkbNYNgGmLiZZl43eUzS9vM9ONxLRO/\nwcZPVL3YuVNqNd73PuCFF4LfG0ZM9uyRO/RjjpFMqzlz8mtMgHD/B3cml+EHPwCuv97e9srmMhXq\ngAyeV1wh6/DceKMUXTY2etfjmGwugxET5/9z5cpcywQoHBTfvl2+j2mTIUo2F1H0WhPN5go3a/A2\nAIusavbbzCPthtUKbW2StQTIwNDR4T1gmmBbsSQhJgMHyo/VmdG1YYO4YUzGUxpiMmGCiK2zJuEz\nnwFuvDGb85mdO6WNddbVPHJkfDHxGmyKEZPf/14KL887LxkxWbFCXEhDhwJnnilT1c+fD2zfns35\nTBw315AhuRN6jh4tNwzOFGCnmADApZeK2+1f/kW2Bw/2FhO3m2vsWPnrtLy8LJNBg4Izo1auFAu2\nf//c30iQm8uvAt6cL4qYqGUSTkx+B+DbAJ4BMN/xUBLA1JgA9hTWSa7HnYSYAPkDqfvutqlJBgyT\nthoV50A+aJAI7Nq1sr1qlbh33PM2uWc1HjzYpM3Gb4MTv9iRF3ffDXzhC8AJJyQjJq+8IlYOINfJ\nrFmSXuuOmYRxc/lZJm7695d2OV17bjEZPBiYOTN3209MnAMeUb6ry8syaWgIHty94iVANMsEiD6l\nilom4WYNvhey0uILzHyveaTftNrA6eYC/F1dcWImSYmJcyB11pgAYhU0Nha3bKwX7h+6M25y++3y\ngx0zJpPzGbeYEMVzdcW1TN5+WyraZ84Ut9TSpf4DInPucrNO3GJy7LH2a7NnSzuPPTaT85k4lokX\nbleXW0zcDB7sPRi73VxArpjs3i3ncR+7kKVg4iVAcTETPzGJ6uYqt2Xi7ttyEGYN+DMBLATwuLU9\njYgKrL+nhMXp5gJk4E8ybmKyuYB4YnL44bkDqVcQNwlXl5eYrFolIjVnjszt5BYsr/VWgsTkhhuC\n7z793CBhM7ruuQc4/3xp06BB8rkFC/zPBXjHaJxiMn++bZkAwNFHA0ccIa5GJ2HFJIxlAkgQ3mR0\nMRcWE7+YidvNBeSKSVubCKr7/1jIzeVnmRTK5vLqb3O+JGImpSxarBrLBMANAN4P4D0AYOaF0AWx\nEsPp5gL8LZM4MZMo2VxuMZk6FXjxRdvl4bZMgGTExD2QmwkI778fmDFDrINly7I5n/ETE6+Mrp4e\n4LvfBd54w78NfoNNmIyuPXtETC6+2N4X5Oryc3EBtpjs3i1T1B9zjP0aEfDII1LY6cS4uYIEL6yb\nC8i1TDZbs+w5K+7dhI2ZACImr7wibXWnBRsKubmclonzNxLHMoni5iq3ZVItYtLDzO4hKKZnXAHk\ngtu0KfeHnXRGl9vNtXVruM8560wA4LDDgC99SQa0Bx/0dpUkZZk4B3Lj5rrtNuDyy+U19+BSjGWy\nerUMzkHzXgUNNoVcXXPnyv/zyCPtfVHFZMwYGfiXLAEOOCA/PjJhQv4gYhIR/Aaz3btFFEaO9P8O\nTpxiYqwS9wqFTvzExGvAGztWjrVmjT2NipsgS8FM8Jh0zESzuaIRRkyWENFnAfQjokOI6H8hhYhK\nTFavlh+UyesHKidm4qwzAeRH/61vyUyu114LPPBAadxcBx0EPPqo9FEmI2LS3JzJ+YyXmHjVSAD2\n/FpRxcTt7nMzd67UXDiJKiYNDSLojz2W6+Jy4nVdBP0f1q8X15jzmgvC6eYq5OIC/GMmXpaJMwjv\nZ5kEubna2+V1Yyk5+8LPzbVnjwz0fjGGJCwTLVr053IAUwDsBPArAFsBXJVmo/oSu3b5++7dwXcg\nfcskqpvL8P73A6++CnzlK8Dxx+e+5jWIrV9fXBGYV8ykq0vqGYi8s5W8xGTyZPGnuzFi4jWBoV8b\nnLgTEdy88oqsx+Fk4kRpo5eABYkJIDcbv/+9v5h4EZTRtXp1eBcX4G2ZBFFMzATIFRMvyyTIzeUX\nLwH8rYNt26R//KwrtUyiEyabq5uZvwngwwBOYeZvMnMJw0vVze9+B3zxi96vuYPvQDoxkyTFBJAf\n43/8R/4g6CUm558P/OUv4dvrHshHjpTMpfPPt8+9Zk025zPFisnhh6fj5nIWFjohEuvkxRfzP+M1\nlYqTsWPlc85MLide14X7/9DVJZbkpz4FfOxjwCc+4X8+N04xcVa/+1GMmwuwxcQrLRgIHtyd8RIg\nXMwkKPgOaMwkDmGyuY4jokUAXocUL75GRD6XtuLmnXfkjs4Ld/AdqHzLJAgvMWlrE3dEWNwDOZEE\ntM1dbdiYyaRJEmR3B6JXrgROOSVYTIKK2oIyut54Q8TPK0Dt5+oKY5kQSfZWWNz/h3/9V+AnP5FU\n5dWrc6vcC7HvvnLXvXVreDeXW0yY/adJP/ZY4KWXvNOCgWA3V5Bl0q+fnNe9CFfQjYI5n1om0Qjj\n5robwJeZuYWZWwBcClmOt2r5xz/kx10K2tu9ffeAv5vLXZQHRIuZ7N4tP2wTuA0rJmZhLHfAtxDu\nQYxZMp+KmSer0I+9qQmoq8vk7PMSkyFD5L3uzKuVKyX2Usgy8bt7Dcromj/f34KIIyaTJ/v3idd1\n4XZzPfsscNddwIUX5iZVhIHInvDx7bfzb37ceMVMdu2SAkjn+jAGEzP0SgsGgt1cbsvE2Rd+y+cW\nur7UMolOGDHZw8x/MxssS/FGmAqtcli/Xu6GSvEPX7tWgtleWVReboMkLRMTRDfTjAweLN+50Ap5\nXV3ygzB3WmFxi8nmzXKhry9iKs8gqwAIHzMB8l1de/bIgPjBDwavb15owPFzdQWJyXHHSazJ3feF\nxOSoo2TOq2Jw/h82bpSbEy8XUljGjxcL08uSduMVMwka7EwQ3iteAgRbCkGWCeA9qBe6vtQyiU4Y\nMfkrEf2EiDJENIOI7gCQJaJjiOiYgp+uQLZulYGkrS39cxkXj9+KdWnGTJwuLkB+uM3NhdOD3WnB\nYXGLibl7L9YyCfJpNzXZi3QZworJmjWSyTRypNxFmroJrzYEDTh+GV3OKU/cNDfLjcPrr+fuLyQm\nH/848P/+n//rhWImRuDqwvzSfRg3Dnj5ZTlu0P8G8HZz+QXfDccd5y8Kfm6uHTsk3uT8/bj7wiuj\nK4ybKwns5HRkAAAgAElEQVTLpFRFi8yVIyZhVlo8yvrr9rQeDYABfCjRFpUA80N7+21YKwWmR3u7\nuLJWr5Y7WsPOneJuO+CA3PcnaZm4xQSwXV3u2WadRImXAPlismaN/MCSdHN5TUnuJyaTJuVOTums\nZRg3Ttrn7h9mGQyDBs0pU0Q4nPgF350cd1z+tCiFAvBRcFpvL7+cn11WLOPHA9ls4XgJEE1Mrr7a\nP+PPz81lrG4v15khqpurGEvaUC7LZOdOOW+cm4WkCJPNdUrAo+qEBLAHvNbW9M/V3i4ptO64SVtb\nfo0JID+Q7u58d0iUmEmQmAThrjEJi5dlcsQRxf04C/3YBw4EiDI5d5xhLROnmHitsQ5I3zc0BNdh\nTJkis/Y6CQq+G445JndiQ1OpXuhuP4hCdSZJiMm4cbL4VVgxcd/ZF7pzbmryt4T93E5eAuXuCz8x\nCervSpo1+L33JAU/iEqxSoBwbq4+h9MySfs8vb0y+AQtMuSESAYkryB8sUQVk6Qsk3fflQE0ScvE\nq9Zkxw7vH1QhMfFyPRYabACxLN56K/f6CYqXOD/nFBPj4gqqKI+C8/8Q5HoLy/jx0sdhxMQrZlLI\nMgnCz80V5phR3FxRA/BprLS4ejXw298Gv0fFpMxs3SqDbNpiYibUM24uJ0GZMV6urigxE+ckj4ZS\nismaNcC0adIOd4qmF8x2UVkQ/ftn88TEyzIZP1760bw3jGVSaLABZMC56CKZxdgQRkyOOgpYtswe\naArFS8LgdV0YsX33XRnk3BmDxTJ+vPwNIyYNDXJO5/87jpj4ubm6usQKcuLui1IG4NOwTLq6CrdF\nxaTMdHbKD7tUYuK1Yl1QAVhQ3KS3N7zV4pzk0VBqy+TAA8WFESYO1N0tP8QgPzggP54wYlJXJ1lM\nZlLHpMQEAL78ZZnF2LQjjAXQ2CjTw5jgfRJi4oX5PxgXV1zLxyxkFUZMzGqFTuskzoBXjJvLTSlT\ng9OImfQZMSGis4IepWxk0nR2ykR8pRCTMWP8lz8txjIx/uBnn5W7/TAXa7ndXGvWSILByJHh4iaF\n7hoNo0ZlcgYrPzEBbFcXs7imDjpI9psAvJuwYtLSImux339/uOC7wRk3SSL4HhQz8ZraJQqDBsl1\n7Je+68YdN4nr5ooaM4mazVVJlkl3d/AM0FUhJgDOtB4XQwoXz7cePwPwz+k3LT06O2VQ6eqKv5hT\nEGaq77Fj5bnT9A+qJg6yTNatE1//PSHKRsOIybp1wCWX5L4nydTgAw6Qu+8wcZMw8QrAO2ZSSEza\n2yU91wwkQTGTMGICyHxht90mGWOFgu8GZ9wkLcvE9M/LL8ePlxhefz2cZQLkWyZxxKS+XgZq9wqe\nXm4ur88WG4CvNMsECD5WVYgJM1/EzBcBGADgcGY+m5nPhkz6WGQ5W2WxdasMqi0t6Vonxs1VXy+D\n+rp19mtBBWBBMZN//EPubm+6qfAFG0ZMnnsOuPfeXKFLwjLZvl1+DKauI6yYhBnId+wIFzMBbDFx\nT3Fu3Fzuu74wMRtDJiNZXzffXDheYkhaTPzqTLZuTc4yAYJTyd2404PjDHhE8r91B+G9BCpMzKTa\nLBMguD1VISYOxjGzc3al9QDGp9SekmAuqAkTSiMmQK6rq6tLfux+Lo4gy2TTJuC002SxKqd10tsL\nfP3r4sc3hBGTBQvkwncWcEZNDR40SOoFenrEKhkzRgaDsG6usGISNmYC2LUmbjFpbpaYitvlV4xl\nQiRrrMyZE15Mpk2TmMmuXenGTJYskcE26RqWMLjFJI5lAngP8JWczZVU0WJfFJOniOgvRDSbiGYD\n+BOAJ9NtVrqYC6oUlolZQMopJm1tEpj2KzTyWrrX+IM3bRKxuf562zrp7QUuuwx44gkRFBOgDysm\nAwfmptBGtUxM2m5np4iJCdwW4+YKM5BPnJgJLSaHHirTm69Yke/z94qbFCMmgCwjvO++4S2AwYPl\nJmbJkmTExG9urs7O5KySYkkyZgKEt0zC1pkEzTmnlkl0whQtXgbgx5BK+KMA3MXMl6fdsDQplWXi\nXB7VvS5E0BxHhSyTYcOkEHLqVODuu0VIFi4E/vpX4Oyzgf/8T3nv5s3B2VzM4nL5+MdzK8Wjiglg\nu7pM8B1I3s1VTMykqUlcbU8/nS8mXnGTYsWksVHiCaecEv4zJgifRvU7YLc/qXhJsSSZzQV4D/Bx\nYiZpTKeSRszEXON9QkyIqB8RzWPmh5n536zHw6VqXFqYGXEnTEi3Ct7PzVVoXYhCMZPhw2Xf9dcD\nV14pQvL44/KdbrwR+MUvJB22kJisXSt/P/zhZCwTwBYTp2WStJtr06bwMRNA4iavvOItJnEtE3Oc\nYtJvjz1WLMI0YyZAeS2Tcri5kpibq7ExvmWSVNFin7JMmHkPgF4iiji0VCalsEy2b5dBzgzmSVsm\ngFgnc+bYQgLIwH311bJee0ODfYEbnGKyYIHcJbsrxZO2TMK6ucKmBhcTMwHsSQRNWrDBS0zCtiEO\nxx4LPPOMJD2kcS6T8FEuyyRpMQnr5nITxTKpr88vugxDmtlchcQkTt8mSZiYyTbIolh3E9Ft5pF2\nw9LELSbOjJ7f/ha49db45zBWibljjWuZuGMmhvPPz/cBX3klsGpVfrwE8BcTt5srSmow4G+ZJOnm\nOvLI8DETQL7fsGH5VppzfXNnG+LMlRWGadNkga0kplLxipkQyfcKk6qcBu6YSancXIViJrt3yyPo\nWiGKFjdxWib9+0scs1hBctOnLBOL3wH4NoBnAMx3PKqS3l57Vth995UguLOi/M47ZZAthp07gY9+\nNHfmU2e8BCjeMuno8C5WcouJFw0Nkq7qNY2Gl5iMGSN9snmznLNQkDIIp5g4Yybr1wcXXwHF1ZmE\nLVoEpED18MPz9yfl5iqWffaRxIA0MrkM5bxbTbLOBEgum8tcX4UEPIqYOC0Tv4W5iqWrS8anoBhO\nVYkJM9/r9ShF49Kgq0s638wK63R1rVsnQWz30rOFWL0aePLJ3JlknfESQOIc3d3iRilkmdTXy8O5\n7kg2m0VPj3w+jNXw6U8DTz2Vv7+pSQbf3bttMSGyU2i3bcvtn2LxcnOZH7DX2uBOwg7kbW3FxUxO\nPhmYOzd/f7nEBBBXVxLB9yhztqVNudxchepMtm4N978tNj3YWCDO30xSYjJsWB+yTIjoECJ6kIiW\nEtEq8yhF49LAPVg4xeShh+RusdDiUW7M5597zt7nFhOz/OmSJXKRmSC6H6NG5QetzVxbYdcu8Fop\nkUi+/1tvSV8YUTNiErXGxLDPPnKMDRtyv38YV1cadSaAXfjmppxictJJ8SdgrFSSLFoEksvmKub6\nKsYycVolfueOgin6DWpLd3cViQlkvfc7IUv1ngLgPgD3p9moNAkSk//7P5kNtljLpLVVLmynmKxd\na9eYGMaPF8unpaWwqT16tL1KIyD+4DAurjAMGQLMm2dbJYAdhI8TfAekb1eulHY6f2BJislJJxUX\nM/FjyBBxezrrbkolJl/6EnDLLfGPE2Wdm7RJus4krJur0NxcYf+3xVomzniJIYnCxTBiUlWWCYBB\nzPwUAGLmNma+AcDH021WeviJydq1wKJF4h4q1jJpbQXOOivYMgFETJ55pvA62kC+mADh4iVhcIqJ\nIUkxWb7cDr4bvCwtN1HqTJjlx1xfX3xbifILF4uZTiUOdXWFZ0euVpKOmSSVzdUXLZNqE5OdRFQH\n4E0iuoyIPgWgBD+3dDA1JgYjJg8+CHziE/LPi2KZfPSjcoGbgclPTJ59NtyEeW4xyWaziYpJNpsr\nJsbNlYSYLFuWvxxxGMskbFru4sV2zGTnTrP6YrT2ul1dpbJMkqIaYiZpuLm6u4tfz6TUlklYMVm7\n1js5pasLGDGib4nJlQAaAVwB4FgAnwNwYZqNShM/y+Q3vwHOPTd/5tswmBmATzoJ+PvfZZ+fmGzZ\nEt0ycRYsxmHIEBnYnWJyyCGSTrxpU3wxWbs23zJJK2YS1cVlcFbBh1n/XSlMKYoWu7qiZXOVyjIJ\nW7jILPVAzjovQ18Ukw5m3sbMa6yZhM9m5hdSb1lKuC+olhYZRJcvl0pw40IplMbqxGRnTZ9uu7r8\nYiZANMsk6ZjJ4MEiIAazZsXChdFrTAC7b92WSVg3V5iB/NRTM4mJyWmnAd/9rvwPu7oKr/9eadRC\nzCSpubmKEZNSWSZtbfI7dy9419srQrHffn1LTH5ORG8R0a+J6FIiOiLswa1Cx/VE9Lpj375ENJeI\nVlgTSJa0ut59QTU2ygA9c6ZcEP37y8VbKI3VsH271GeMHm2Lyc6dch73wG/EpNwxk+ZmKZxzD5qT\nJgEvvhjfMgGiubmKdUP09sYXk3PPBb7yFeBDH5JMu2pycVUqpZibK2ydSVQ3V6liJs8/L3/ds1eb\nLK1CLreqEhNmngHgMAD/C2AogD8RUciFY3EPgI+59l0D4ElmngTgaQDXhm9ufNwxEwA44gjg85+3\nt816EGFoa5Mgbl2d1A4sWyZpt6NG5afwmjmcwlgmY8akFzPZd1/g6KPz90+eLBMQJiEmxbq5inEx\n/e1v2b13j3HFBJBp5K+8Evinf6o+F1elx0z27ImeIGFwi0lPj9xIuK0Br5hJKdxccSyTFywfj3u8\nManPhdpSSWJSMJ+EiE4G8AHrMRTAowD+FubgzPwsEbmz6T8JYIb1/F4AWYjAlASvC2ru3NwAbnNz\n+LiJswCxoUHWlv/97/PjJYD80+fPDzfNxejR9kSMhqRiJpdd5r1/0iQZnNOwTAq5uYp1MZkq+CTE\nBBAxYZZ5zpR4OMXEzB0VZ9oYt5vLWCWFjunl5nJfl14UG4CPa5lMnpxvmVSjmIRxc2UBzARwF4AM\nM3+ZmX8V45wjmXk9ADDzOgAjYxyraLzExH1RFhOEd6+YOH26ZIa54yUGL4vAi/32kwvaXEhJxkxG\nj/YWu8mT5W853FzFZFFlMpm9sa2kxAQArroKeOyxZI5VKio9ZpLEYOceUP1cXF51JqVIDY5qmWzf\nLq7VD32ob1gmYcRkOID/AHAigMeJ6Eki+s8E21BEqDs+YS6oYtxc7nm2pk8HXn3Ve7AuBiKZbsO5\n1G9SYuJHEmIybJjMg+V2Fw0bJpXxzvnLnBPhFTtbbxpiAsSfeFHJjZnEDb4D+QNqmOp3ILqbKynL\npFDR4oIFwGGHye+8L1gmBd1czPyeNX3KOABjAZyEeGvAryeiUcy8noj2BxAYlp09ezZarNF66NCh\nmDZt2t47EOMjLWb7rbeA008Pfn9zcwadneGO99JLwL/8i70tWWAZjB4drX3O7cGDs/jTn4DLLssg\nm82ivV1qQaZMif79g7aXLcuisREYMiTe8ZYsyX+9Xz+gqSmLP/wBOOusDJYvB048MYv77gPOPDNj\nWYJZZLOFjw8ATU0ZPPNMFtu3Aw0N6fRHNWwvXLgQV111VcW0BwBmzMigtxd48sks3n0XaGyMd7yG\nhgx27LC399svg8bG/Pf/8Ic/zBkfXn89a82+LdttbVmsWmVv+51v0KAMNmwI376engwGDsx9vb5e\nzh90Pd9/fxbjxsl4s3Jl7utdXcCuXVksXw5s3+5/fgnUy/gwx1qzuyVMhk8aMHPgA8AqAH8G8O8A\nTgYwsNBnXJ9vAbDIsf19AN+wnn8DwPcCPstJc+qpzI89Fvyez36W+b77wh3v/e9n/vvfc/cdcgjz\nXXdFa5+TmTOZH3xQnj/11Dzu14955874xw3iqquY165N59hTpzK/9po8P+ss5qYm5rvvlu1slvkD\nHwh3nHnz5vGppzI//jjzww8zf/KT6bS3Gpg3b165m+BJczPz5s3M8+czH310vGM9/TTzjBn29vPP\ny+/Ojbsvli5lPvRQe/v44+Wzhfjxj5m/+MXw7fvTn5hPPz1336WXMt96a/DnPv1p5vvvZ77nHuYL\nLsh97ZFHmM84g3nBAuajjvL+/K5dzP36eb9mjZ2hx+kkHmHcXAcz8z8x803M/Cwz7yr8EYGIHgDw\ndwCHEtFqIroIwPcAfJSIVgD4sLVdMsKYusUE4L2mk7/6apmpNi7O9OCjj85g8OB832zS/M//xHfR\n+WHiJi++CLz0EnD77cCvrOhbsTGTwYPTcXNVG5UYMwHsuEkabi6v6nfAu87E6eYKO2twEjGTMEWL\nzz8PnHBC7rIQhjBurkpycQEh3FwADiaiOwGMYuapRHQkgE8w83cKfZCZP+vz0keKaWSShI2ZhBGT\nri55n3sq8S9+MXr7nDjFJO14SSkwGV3f+Y4sOXzOOcAVV0hcqNhpTJzFpbUsJpWKiZskJSbO+EOY\n6negdNOpRMnmWrNGXp84UZJ4vALwTU3VJSZhLJOfQmpBegCAmV8HcF6ajUqTsJZJmAB8W5tMI55W\n0NYpJnPnZqteTEaOBO67TwRl9mz5IZx5pmS/FSMm2Ww2tQB8teGMI1USJj04iQGvoSFcNpe7LwZW\ncDbX888DJ54oY0chy8RP2KpRTBqZ+SXXvt2e76wCvIoW3YS1TAqtmBgXp5hs3Vr9lsnIkVLT893v\n2jPmzpolrq6olkmti0mlYsSklG4uN043F3P4jMFSVMC/8IK4uAAZj9xism1b9bm5wojJP4joIFgp\nvET0aQDtwR+pTMyStEmlBhdaMTEuTjEZMyaTSMFiOWlpkbuxT33K3veRj0iGWjFTmZg6kySLFquV\nWomZhHFzecVMzIC+fbsM+GGm/S/F3FzGMgHEMilUZ+I1V2A1ismlAH4CYDIRvQvgKgBfSrVVKbFz\np0xxUiiIHTYA7y5YTJq+FjM57zzg6adz3YIDBwJnny2zNqtl0ncwMZNSurncDBhgT71SjOWb9nom\nO3cCr70mswUDwW6u/v1lVohdHmlPlbTKIhBubq5VzPwRACMATGbmk5m5NfWWpUDYC6pS3FwjR8ps\norJee/XHTOrqvAf+WbPkh6Exk+Kp9JhJkm4uc3ceNmZSV2cLSjFikvZKiytWSKzVtKe+Xr6bU3yc\nhZl+bjczVU2lEGYN+BFERMzcxcydRDSLiBaXonFJEyZeAoQPwKft5urXT+biWr9e7lyqXUz8+MAH\nZPqZYiZZVDGpbJIUk/79RRjM7AlhK+AB20KoJMtk8+bcOfa8gvDO7+jXnqpxcxHRWUT0DwCvA3iH\niD5BRAsAfAbABaVqYJIkbZmk7eYCbFdXfX31x0z86NcPuOMOEZUwmJiJikllx0yScnMBua6usHNz\nAXZGVzksEz8x2bo1f8oidxC+GsUkKBx1A4ATmHklER0D4EUAn2LmR0vSshQoRkwKWSadnfLPHDEi\nmbb5YcSkL8RMgvjkJ4t7v4pJZWMG5O7u/OUIomAG1Obm4qwdk9FVasskqGhxy5Z8D4k7CF+NYhLk\n5trNzCsBgJkXAFhRzUIChL+gwgTgjVWS9sSARkza2qo/ZpIU2WxWK+AtaiFmAuRmdPm5ubz6Ioqb\nq6FBBMg5EWkQUSwTLzHpy5bJSCL6imN7qHObmW9Jr1npEDZmYtxczP5iUQoXF2CLSV+oM0kStUwq\nm8GDZeqcUrq5vHC6ucL89gH5zZs1VMLEZoqNmXiNQ+44rVtMvNxulSYmQZbJTwHs43i4t6uOsHcn\n/fvLxRDkN21rs5fhTRMjJp2dGRUTC42Z2FR6zCRJyyRKzCSKmwsoLm6SRMzEyzIxCSlVb5kw842l\nbEgpKOaCMtaJ353Ju++GW7UtLqNHAw8/LHdLlZQGWG5UTCobZ8wkaTdX2Ap4wB7Uw07y6Dxf2LhJ\nsZbJli35i+cVskyqQUzCFC32GYoVk6Ag/Nq1pROTxYtlLRBduEkwdSZaAV/5MZM03Fx+FfBefREl\nmwsobkqVNGImZjqVoLaomJSRsDEToHAQvpSWybp14dtdKwwcKJXNW7fWtphUKuVwc3kR1c1VzJQq\nxa60WEhMentzCxLVMqlAori5/CiVmJjp7Q88MJP+yaqETCYDInF1/eMftS0mtRQzKeTm8ouZRLFM\ninFzJVVnYjwhsnqoFGoGtaVPiIlVd1J1FHNBFaqCL5WY1NcD++2HPluwGIemJqm/qWUxqVRMzKSU\nbi4v4ri5jGVyxx2AtTKyJ1FiJkGWiTv1uU+LCap0osekLJPOTslBd99dpMXo0cCOHdnSnKwKML7x\npiZxCdSymFR6zCRpNxdz+Lm5gHhurq4u4JprgJtukiQYP4pdabFQanC1ikmYlRbzYOaE1hIsLcXE\nTIIC8MYqKVVAfMyY0glXNWF+cLUsJpVKWm6unh753bktAT+iurkaG4GvfU1muFiwQObg8ztGEnUm\nhSyTbdvyj1N1YuLj0toCoI2Zq2qRrGLdXH6WSalcXIbRo2UNeEUwvnGTh1/LYlLpMZOk3VxBkzwm\nNTcXIEtMT50K/PKXIiyTJwNLlwLHH5//Xi/LpH9/sZr37JG555wUqjPxEpONG/PPW3ViAuAOAMdA\nJnwkAFMBLAEwhIi+xMxzU2xfoiSVGlxqMbnuOrVMvDBiUl9f3nYo+TQ2yu9t4MD8wTQKxtVTrKUT\n1c116612ABwApkzxFxMvy4TItk6c7d25U0TGfc32BTdXmJjJWgBHM/P7mPlYAEcDWAXgowD+O83G\nJU21WiYHHQQsXpwt3QkrHGfMpL6+dO7GSqRSYyYDBsgjqUJb4+YKEpOk5uYCcoUEAA4/XFYD9cLL\nMnGe24lxcbmv2SDLxK/OpOoWxwJwKDPv7UZmXgpZJGtVes1Kh2JjJpUiJoo3TU217eKqdMyys0lg\n3FzFVL8DMshv3x5+ni0/pkzxFxMvywQIFhM3++wjcZHe3typVIC+ZZksIaI7iWiG9bgDwFIiqgfQ\nk3L7EmP3bvmnh+38oNTgcohJpfrGy4EzZlLrYlLJ18XgwclaJiZm4ndMvzqTTZukLXEsWOPm8sLP\nMjFLFzvxipcA4gpsbBRB6cturtkAVkLWfr8K4uKaDRGSU9JqWNJ0dsrgE/aCUsuk8lExqWwaG5MX\nkygxk02binNxedHSIgWyXmOCn2UyfLic24lXjYnBuLqcU6kAfUtMTgfwI2b+lPX4ATN3M3MvM3sk\nrFUmxfpMKykAD1Sub7wcOGMmtS4mlXxdJO3mMjETP3eV39xcGzfGn46ors7O6HLjZ5kMH56fhRXk\najfekL5smZwJ4A0i+gURnUFEkWpTyk0x8RLAPwC/e7dcIGaaE6V8qJhUNqV2c3lRXy8WRVzLBPB3\ndflZJiNGyLmdBI1DxjIJIyZ79shYVEmZjAXFhJkvAnAwgN8CmAXgLSL6WdoNS5oolomXmKxfL4tU\nhS2aSopK9o2XGo2Z2FTydZGGmAS5uYJiJkmIiV9GV5Bl4iUmfmn+QZaJe9JJY5VUUiZjqOlUmLkH\nwGMAfg1gPoCZaTYqDYoVE78A/Lvv5q9FoJSHwYNVTCqZxsbSurm8GDgwWcvES0yCYiZuMQkTMwlj\nmVSaiwsIISZEdDoRzQHwJoCzAfwMQNU5eaJaJsy5+8sVfK9k33ipMX0xfrzU4NQylXxdlNrN5Vdn\nUuzCWH74ubmSipkMGeJtmXjVmVSimISJf1wA4DcALmFmn9lmKp9iYyYDBsiUCM51BQDN5Kokjj/e\nuyJZqQzipuM6iZPNBSQjJs6MLufxio2Z+I0fzc193DJh5lnM/HsjJER0MhHdnn7TkqVYywTwDsKX\nS0wq2TdearQvbCq5L9LK5iomZmIshiTExCuji7k4N1dQzKSQm8vpJalKMQEAIjqaiG4molYA/wlg\neaqtSoGNG2VdkGLwCsKrZaIo4UgrAF9MzCRJywTId3Xt3i0eDC8LrNiYiV8Avn9/EbIeR4l4VYkJ\nER1KRNcT0TIAPwTQBoCY+RRm/t+StTAh3noLOPjg4j7jFYQv1drvbirZN15qtC9sKrkvLr4Y+MIX\nkjlWnJgJkJyYuDO6/OIlQLSYibFMnNOpAPmurqoSE4j1cSyAU5l5BjP/CMCe0jQred56q/hgrVom\nihKdlhZZByQJwri5vEjSzQXkZ3T5ubgA8YRs2SLWiyFKAB6ofjE5C0A3gGeI6MdE9CHIFPRVycqV\nxVsmXlXwGjMpP9oXNrXSFw0NYgW4pxpx4ldnAqTn5gqyTPr1A4YOBTZvtvcVqjPxmk4FyK81qSox\nsYLu50HWL3kGwL8BGGlN+nhqqRqYBJs3yz99xIjiPucOwJd6uV5FUQSzPkhHR/myuYD8ObqCLBMg\nP24Spc4EqH7LBADAzF3M/AAznwlgLIBXAXwj7omJqJWIXiOiV4nopbjHC8LES4pNU3S7uUq9XK+T\nSvaNlxrtC5ta6ouGBqlmLyZmkrSbq64OOPRQYMUK2Q6yTAC5gXXGTQrNzbVli7crz11rUpVi4oSZ\nNzPzXcz84QTO3Qsgw8xHM/P7EzieL1HiJUB+AF7jJYpSPgYNsqeTD0vSlgkATJpki0kxlsnOnZLe\n6zef1pAhMl1TfX3+6pR9wjJJESrV+aPESwB/y6Qc1IpvPAzaFza11BeDBgHvvVf83FxAemJSyDJx\niomJl/h5NoYMkfHGSyzdYrJ+PTByZLT2p0U5xYQBPEFELxPRF9M8UZS0YEAtE0WpJMw8bFGyueJO\nQe9k8mRguVVpV4xlEhQvAeT79e8fTkxaWyV+U0mUczr56czcTkQjIKKyjJmfdb9p9uzZaLF6bejQ\noZg2bdreOxDjIy20vXJlBp//fPj3m+21a7N44w0AkO2XXspi7Fh7u9jjxdl2+oNLcb5K3jb7KqU9\n5dxeuHAhrrrqqoppT5rbu3fL9uDB3q//8Ic/zBsfduwAgAz22Se59kyalMGKFbK9dCkwcKD/+7ds\nAXbskO2nn85aVon3+//61ywaG72/36BBwPz5WQwZItutrcDmzVlks/ZvYc6cOQCwd7wsOcxc9geA\n6wF8xWM/J8GYMcyrVxf/uV//mvmcc+ztM85gfvDBRJpUNPPmzSvPiSsQ7QubWuqLk09mBph7erxf\n95lTZWIAAAzeSURBVOqLPXuYv/CFZNvR2ck8aJAc+29/Y54+3f+9997L/LnPmfYxz5gRfOwJE5jf\n9778/RdeyPzzn9vbo0czv/OO/3GssbOk43hZ3FxE1EhETdbzwQBOBbDY671eK4wVQ3e3pBNGcU85\n3VxPPQXMnw/MmBGvPVExdy+K9oWTWuqLhgZxW/X38ad49UVdHfDTnybbjqYmWdNo9eriYyaF3G1D\nhhR2c+3YIYkIo0dHa39alMvNNQrAw0TEVht+ycxzvd64Zg1wyCHRT7RqlVTh1kWQTROA37gRuPBC\n4N575eJQFKX0DBqU3FxfcXEG4QvFTExqcKGYCRAsJqZocfVqYNy4/IyvclMWy4SZ32bmaSxpwUcw\n8/f83vvOO/HOtXJl9DUvTN73RRcB558PfPSj8doSB2e8oNbRvrCppb4YNCg4LbiUfTFpkgThw9SZ\nFGOZNDfnz8sF5FomlRh8B8qbzRWKuGISNZMLEMtkyRK5s/jOd+K1Q1GUeDQ0VI5lMnmyWCbFZHMF\nTaVi8LNMnEWLKiYRScIyiSomw4aJi+yBB0q/5rubWvKNF0L7wqaW+qKQm6uUfWHcXIUsk6YmEZzt\n28NbJoViJiomESm3mytq9byiKMlSyM1VSoybq5BlQmS7uuLGTFRMYlJONxdQnnm4vKgl33ghtC9s\naqkvCrm5StkX48ZJNX5HR7BlAtiurjCWyXnnycNNNYhJOYsWQxFHTHbtkqr1Aw9Mrj2KopSHSsrm\nqquTLNPFiwu7wJ1iUihmcuSR3vurQUz6tGXS1gaMHVv+eEcS1JJvvBDaFza11BeF3Fyl7otJk4BF\ni5K1TPwwYlKpNSZAFYjJrl35qx2GJU68RFGUyqKSLBNAMrrCWCZmGvowMRM/jJhUao0JUAViMm5c\ndOskbrykkqgl33ghtC9saqkvzjkHuPpq/9dL3ReTJslCVqWyTLq7K9fFBfRxMVHLRFH6DqNGyQBe\nKZi2JBkz8cPUmaiYxEAtE6GWfOOF0L6w0b6wKUfMBChtzETFJAZRxaSrC3jpJf/sCEVRlDg0NckE\nsmFiJmvWyHO/VRYLoWKSAFHF5Mc/Bj74Qalg7wvUkm+8ENoXNtoXNuXoi0mTwlkmb70Vb4EuFZME\niCIm3d3AD34AXHddOm1SFEUBgOnTC6fpDh8ObNgQPV4CVIeYkKyjUpkQES9ezDj7bHuZzDDccgvw\n3HPAQw+l1zZFUZQw7Nol7q2jjwYWLIh2jJ4eOcaAAXKzXCg1mIjAzCWdv6PiK+CNZcLsPbXJr34l\nud7XXSed3d0N3Hwz8PjjpW+roiiKm4EDxcUVx801YIBU3VdqjQlQBW6u5mZZWW3zZu/Xv/c94Mkn\ngeOPl+ni77oLOPFE4KijStvOtFHfuI32hY32hU0l98Xw4fHEBBBXV6W6uIAqsEwA2zrZb7/c/YsX\ny0RrbW3APfcAmQywZw/w9NNlaaaiKIonw4fHi5kAUmuiYhITIyZua+OBB4BZs8T8u/hiyd56+mlg\n2rTytDNNtJ7ARvvCRvvCppL7YsQItUwqAq+Mrt5eEZNHHrH3HXJIvPXiFUVR0qAW3FwVHzMBvMXk\n+edlBtFaKUqsZH9wqdG+sNG+sKnkvhg3TqaDiUNzc2VPD1U1lslTT+Xu++UvgfPPr5zFqxRFUfy4\n/vr4Y9Uf/yjuskql4utMmBlPPw189avACy9I+m9PDzBmjEyX0lcq3BVFUZJC60x8OPFE8RUecwww\nZ45Ukx56qAqJoihKpVAVMZNBg4Df/Q741reAM84ALr9cXFy1RCX7g0uN9oWN9oWN9kV5qQoxAcTf\nOGsW8NprwMyZwHnnlbtFiqIoiqEqYiaKoihKeMoRM6kay0RRFEWpXFRMqgT1B9toX9hoX9hoX5QX\nFRNFURQlNhozURRF6WNozERRFEWpSlRMqgT1B9toX9hoX9hoX5QXFRNFURQlNhozURRF6WNozERR\nFEWpSsomJkR0GhEtJ6I3iOgb5WpHtaD+YBvtCxvtCxvti/JSFjEhojoAPwLwMQBTAMwiosnlaEu1\nsHDhwnI3oWLQvrDRvrDRvigv5bJM3g/gTWZuY+YeAL8G8MkytaUqeO+998rdhIpB+8JG+8JG+6K8\nlEtMDgDgXIh3jbVPURRFqUI0AF8ltLa2lrsJFYP2hY32hY32RXkpS2owEZ0A4AZmPs3avgYAM/P3\nXe/TvGBFUZQIlDo1uFxi0g/ACgAfBtAO4CUAs5h5WckboyiKosSmLGvAM/MeIroMwFyIq+1uFRJF\nUZTqpaIr4BVFUZTqoCID8NVe0EhEdxPReiJ63bFvXyKaS0QriOgvRDTE8dq1RPQmES0jolMd+48h\notetfvihY/9AIvq19ZnniWi847ULrfevIKILHPtbiOgF67VfEVHqVikRjSWip4loCREtIqIrargv\n6onoRSJ61eqPm2q1LxznriOiBUT0B2u7JvuCiFqJ6DXr2njJ2ld9fcHMFfWACNxKAAcCGABgIYDJ\n5W5Xkd/hZADTALzu2Pd9AF+3nn8DwPes54cDeBXicmyxvruxGF8EcJz1/M8APmY9/xKAO6zn5wL4\ntfV8XwBvARgCYKh5br32GwDnWM/vBHBJCfphfwDTrOdNkDjZ5FrsC+tcjdbffgBeADC9VvvCOt+/\nAbgfwB9q9TdinWsVgH1d+6quL0py0RTZsScAeMyxfQ2Ab5S7XRG+x4HIFZPlAEZZz/cHsNzr+wF4\nDMDx1nuWOvafB+BO6/njAI63nvcDsMH9HsdFcK71fCOAOkcfP16GPvk9gI/Uel8AaIQknRxeq30B\nYCyAJwBkYItJrfbF2wCGufZVXV9UopurrxY0jmTm9QDAzOsAjLT2u7/vu9a+AyDf3eDsh72fYeY9\nALYQ0X5+xyKiYQA2M3Ov41hjEvpeoSCiFoi19gLkR1JzfWG5dV4FsA5AlpmXokb7AsD/ALgagDNo\nW6t9wQCeIKKXiegL1r6q64uyZHMpAHJ/RHEJk09e0pzznBMTNQF4EMCVzLyN8uuHaqIvrB/n0UTU\nDOAvRJRB/nfv831BRB8HsJ6ZF1p94Eef7wuL6czcTkQjAMwlohWowuuiEi2TdwGMd2yPtfZVO+uJ\naBQAENH+ADZY+98FMM7xPvN9/fbnfIakZqeZmTvg03fMvAnAEJIJNt3HShUrcPcggF8w8yPW7prs\nCwMzb4X4tN+H2uyL6QA+QUSrAPwKwIeI6BcA1tVgX4CZ262/GyGu4PejGq+LUvgEi/Qf9oMdgB8I\nCcAfVu52RfgeLQAWOba/D8vXCe+A2kAAE5AbUHvBurAIMvicZu3/MuyA2nnwDqiZ50Ot134D2x96\nJ4B/LVE/3AfgFte+musLAMNhBzcHAXgGUrRbc33h6pcZsGMm/11rfQGJnzVZzwcDeA7AqdV4XZTs\noimyg0+DZP68CeCacrcnQvsfALAWwE4AqwFcZP2znrS+11zzT7Pef611USwDcKpj/7EAFln9cKtj\nfz2A/7P2vwCgxfHabGv/GwAucOyfAMn2eMO6UAaUoB+mA9gDuSF4FcAC63+7Xw32xRHW938VwGsA\nvmbtr7m+cPWLU0xqri+sc5rfxyJY41019oUWLSqKoiixqcSYiaIoilJlqJgoiqIosVExURRFUWKj\nYqIoiqLERsVEURRFiY2KiaIoihIbFROlJiCiTuvvgUQ0K+FjX+vafjbJ4ytKNaBiotQKpqBqAoDP\nFvNBawqKIP4950TMJxdzfEXpC6iYKLXGfwE42VqU6UprJt//Jlm4aiERfREAiGgGET1DRI8AWGLt\ne9ia2XWRmd2ViP4LwCDreL+w9nWakxHRzdb7XyOizziOPY+IfmstcPSLEveBoiSOzhqs1BrXAPgq\nM38CACzxeI+ZjyeigQCeI6K51nuPBjCFmVdb2xcx83tE1ADgZSJ6iJmvJaJLmfkYxznYOvbZAI5k\n5iOIaKT1mb9a75kGmWdpnXXOk5j572l+cUVJE7VMlFrnVAAXWOuMvAiZE+kQ67WXHEICAFcR0ULI\n/EZjHe/zYzpkVlww8wYAWQDHOY7dzjKf0ULIxKCKUrWoZaLUOgTgcmZ+Imcn0QwAXa7tD0FWrNtJ\nRPMANDiOEfZchp2O53ugv0WlylHLRKkVzEDeCWAfx/6/APiyte4KiOgQImr0+PwQyOpzO4loMmQp\nU8Mu83nXuf4G4FwrLjMCwAcgy/UqSp9D74aUWsFkc70OoNdya81h5lutJYUXEBFBFiGa6fH5xwH8\nKxEtgUwL/rzjtbsAvE5E85n58+ZczPwwEZ0AmXK+F8DVzLyBiA7zaZuiVC06Bb2iKIoSG3VzKYqi\nKLFRMVEURVFio2KiKIqixEbFRFEURYmNiomiKIoSGxUTRVEUJTYqJoqiKEpsVEwURVGU2Px/UXYY\nagMKQHAAAAAASUVORK5CYII=\n",
337 | "text/plain": [
338 | ""
339 | ]
340 | },
341 | "metadata": {},
342 | "output_type": "display_data"
343 | }
344 | ],
345 | "source": [
346 | "plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode'])\n",
347 | "plt.xlabel(\"Iteration\")\n",
348 | "plt.ylabel(\"Avg. Reward per Episode\")\n",
349 | "plt.grid(True)\n",
350 | "#plt.savefig(settings['save_dir'] + '_' + \"evaluation_reward.svg\", bbox_inches='tight')\n",
351 | "plt.show()\n",
352 | "plt.close()"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 14,
358 | "metadata": {
359 | "collapsed": false
360 | },
361 | "outputs": [
362 | {
363 | "data": {
364 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEPCAYAAACKplkeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8HfO9//HXR1zqmjQOSoNQd0qooi7NdneK0ouqKuJX\ndagq2iI4qtVDizrIT+q4p3Vrm5NWtYpQWW5NhCRbIiRBxDUkEknkInL5nD++szJr773W3mvvPWvN\nzF7v5+Mxj71m1qyZz/7stddnzfc78x1zd0RERJKwWtoBiIhIz6GiIiIiiVFRERGRxKioiIhIYlRU\nREQkMSoqIiKSmLoUFTO73czeN7OJJcs+bWYjzWyqmT1iZr3rEYuIiNROvY5U7gQOb7VsMPCYu28P\nPA5cVKdYRESkRqxeFz+a2ZbA39x912h+CjDQ3d83s88ABXffoS7BiIhITaTZp7Kxu78P4O7vARun\nGIuIiCQgSx31Gi9GRCTnVk9x3++b2SYlzV+zKq1oZio4IiJd4O5Wz/3V80jFoqnoAWBQ9PgU4K/t\nvdjdNblz2WWXpR5DViblQrlQLhxw5s+v9Fz91euU4nuBfwHbmdmbZnYq8GvgUDObChwczUsHZsyY\nkXYImaFcxJSLmHKRrro0f7n7dyo8dUg99i8iIvWRpY56qcKgQYPSDiEzlIuYchFTLtJVt+tUusPM\nPA9xiojUmxnMnw8bbFDuOcN7cEe9JKBQKKQdQmYoFzHlIqZcpEtFRUREEqPmLxGRDJs1CxYuhK23\nLv+8mr9ERKRqRx0Fn/tc2lFUT0UlZ9ReHFMuYspFrKflYsGCtCPoHBUVEZEaW7o07QjqR30qIiI1\nNG0abL89dPUjbIcdYOrUyq9Xn4qISAOZOzftCOpLRSVnelp7cXcoFzHlIqZcpEtFRUREEqM+FRGR\nGhozBr70JfWpiIhIghrle7GKSs6ovTimXMSUi1hWc3HPPWlHUB8qKiIidfDqq2lHUB8qKjnT1NSU\ndgiZoVzElItY1nNhBpMnV7/+1Km1i6UWVFREROosb4WiM1RUciar7cVpUC5iykVMuUiXioqIiCRG\nRSVnst5eXE/KRUy5iCkX6VJRERGRxKio5Izai2PKRUy5iDVyLubOhdGj041BRUVEpIcYPBj23Tfd\nGFRUckbtxTHlIqZcxPKci+9/H5Ys6frrV65MLpauUlEREcmI226D6dPTjqJ7VFRyppHbi1tTLmLK\nRSyrufjgg+S3mcXbFKuoiIjUwdChyW9zs82S32Z3pV5UzOwiM5tsZhPN7B4zWzPtmLIsz+3FSVMu\nYspFrJFykcVbFadaVMxsS+D7wO7uviuwOvDtNGMSEZGuS/tIZQHwCbCuma0OrAO8m25I2ZbV9uI0\nKBcx5SKW91xYXe/TmLzV09y5u39oZtcCbwKLgZHu/liaMYmIJOGtt2DRos6/Lu93iEy7+Wtr4Dxg\nS2AzYD0z+06aMWVdI7UXd0S5iCkXsVrn4h//gK99reP1DjwQdtyxpqFkUqpHKsCewDPuPhfAzP4M\n7Avc23rFQYMG0b9/fwD69OnDgAEDVr15ioe7mte85jVf6/nrry/w6KPw1FNNHHBA5fWXLg3z48eH\neWiKfhZ48UX4+tfLb3/ffQvstBOMHh2vX/r61utDgaeegiOPbGLmzAIwjEGDWPV5WXfuntoE7AZM\nAj4FGDAMOKvMei7BqFGj0g4hM5SLmHIRq3UuTjrJHdxPOaX99fr1C+uNHh1+Fj/GwH3EiPKvKa63\n1lptl7W3/vz5Yf5732u5bvTZWdfP9VSbv9z9BeD3wDjgBUJhuSXNmEREpOvSbv7C3a8Brkk7jryI\nD3lFuYgpFzHlIl1pn1IsIpI5CxbAihXJb3fmzOS3mTUqKjlT7KQT5aKUchFLIhe9e8OVV3Y/ltZO\nPjn5bWaNioqISBmvv578Nmtx9JM1Kio5o/bimHIRUy5iykW6VFRERMrI2pXtp50Gy5enHUXHVFRy\nRm3nMeUiplzEekIuyhW022+HOXPqH0tnqaiIiHRBtUcyQ4bUNo6sUVHJGbUXx5SLmHIRSyoXXWn+\ncocbbmi57L77EgkHgNGjYcSIys8//XRy++oqFRURkYR88gmce27l50eN6t72Tz0VvvnNys9Pndq9\n7SdBRSVnekJ7cVKUi5hyEVMu0qWiIiJSRlebvxqdikrOqO08plzElItYI+ciC3eNVFEREamzIUPg\nnXfSjqI2VFRyRu3FMeUiplzEkspFLZu/nngCttwS3n675zWZqaiIiNTA/PntP79iBWy+OTz+eJh/\n4IHax1QPKio508jtxa0pF7FGysX558MVV1R+vl65KHeEUbrso4+q286CBeHnMce0v+28UFERkVz5\nzW/g6qtrv580P9iXLcvvlfgqKjmjtvOYchFTLmK1yMW0abDXXuFxe8WmK2dfVXrNww8nt616UlER\nEenA00/Dc8/Bgw/C3XeHZe01f734YvXbzkIhSJKKSs40Utt5R5SLmHIRq+XYX0cdFT8uFpdynngi\nkRByafW0AxARScO994YzsE46KZntffnLcOSR4fGYMdW/Tkcqkiq1nceUi1ij5aK9D+Jqc3HSScne\nM/6pp+DPfw6Phw/v/vYeeqjtsjwUIBUVEZEyunP219Kl1a+bh0LRGSoqOaO285hyEVMuYmnmIu3r\nS7JQoFRURCRzVqyAlSvTjqI+slAIkqSikjN5aDtfuhQee6z2+8lDLuqlp+Vil13guOMqP59En0ot\nPsy7ep1Kc3P39jt3bvdenyQVFUncvffCoYemHYXk2ZQp4da5STr0UJg1q/r1O9OU9dZbnX9N0VFH\nwe67d/51pbbaquX87NnpHQGpqORMHtrO69VskYdc1ItyESuXi7vuCkfPEybEy1asiB+ffHIYU6zU\nu+/Gj6dNa3+fW2zR+Thr6eij09u3iookrppvSJ05O0Ya1+c/Dx98UP3606bBlVe2Xd7RqcN33QV3\n3NFyWekFjFddVX0MWfDss+ntO/WiYma9zWy4mb1sZpPNbO+0Y8qyrLSdf/ABnHhi11//qU/Fo7N2\nVVZykQU9NRcvvtjxUUKpm26CSy4pVHy+mi88771X/f5a09lfGSgqwA3AP9x9R2A34OWU45EqjB0b\n+k7KqfaNraMVqUa5D+qufniWe93HH7ec33TTrm0bki8qrTvgP/ww2e3XQqpFxcw2AA5w9zsB3H25\nu3fz+2vPloe283p9W8pDLupFuQjCe6+pw/VKP5x32qn0tS2tWNG593PSX5RGjEh2e/WQ9pHKVsAH\nZnanmY03s1vMbO2UY5JuysIhuPQcnfn239F7r/j8tdfGy15/PfycM6ftSSalnfnV6MzoxLWQhf+9\ntAeUXB3YAzjL3Z83s+uBwcBlrVccNGgQ/fv3B6BPnz4MGDBg1bezYntyI8yXtp2nGc/EiVD8Rtj6\n+ZdfLsbY/vY6er6j+eKyLP190ppvbm7m3HPPzUw8ScwX3x8TJhRYvrz0aKzAsmXx86WvDx+q11Mo\ntP18KF3/jTfieYifHzas5Xzr58vNt95+Z1/flflCoWU+AFaubGLZsgIwLFren1S4e2oTsAkwvWR+\nf+BvZdZzCUaNGpV2CO7u/uCD7pX+LL/7XeXnisB91qzuxZCVXGRBT8sFuG+6afj55JNtn+vbt+Wy\nxx93X7jQ/dhj3WFU2e2B+2OPuS9Z4n7JJfGy0uk//qPl/NKl5dcrTq23n/R0663V7/eqq8qti3ud\nP9dTbf5y9/eBt8xsu2jRwcBLKYaUeXloOw/fAjvW3UP1POSiXvKWi//6L/j975Pb3kEHwXXXwf33\nQ/Eb/UUXwW67tVzvjjtg7bUrv/duvjm5mOrttdfSjiBIu08F4EfAPWbWTDj7q8xZ5pK2xYthk02q\nW/f222sbi+TfpZfCz34WHrvDRx9VXrdcn8qSJeHaklKt+z8eeYSomTaWlQ/eak2eXP26Tz5Zuzg6\no8OiYmZXm9kGZraGmf3TzGab2XeTCsDdX3D3L7r7AHf/urvPT2rbPVFpf0I9zZoVpu2373jdao9A\nunv6ZVq5yKI85+Kee2CDDTr3miVLwgWNr7wCb7/d+tkCUP79FfpistGhXY35nfg0nDKldnF0RjVH\nKod5OM33KGAGsA1wfruvkB6rMxeiiVSjOG5WJcXicPDBbZtWt9su3HGxtUceiR//5Cfx4/HjuxZj\nWjozVllWVHP2V3GdI4Hh7j7f8lLme6CstJ3/539Wfk7XqdRfnnNR7fvl8cfhmWfaLi+eEhxr4uyz\nw1EMwH//d9f32ZFly5K7HXE5Dz5Yu23XSjVF5e9mNgVYApxpZhsBH3fwGunhSgfmE+mO0g94s7YX\n/JU2Y7XX91KqWFAqWby4uu10dJ3KmmtWt51G0mHzl7sPBvYF9nT3ZcBi4JhaBybl5aHtvNpvgd39\ntpiHXNRLnnMxeHDL+fY6p884o/Jzr75afFTocJ+/+U2HqwDhgsgsmj0bFi1KO4ryqumoXwf4AXBT\ntGgzYM9aBiU9yy9+kb9RXiU9rb9sVHtCx913Jx/LOeckv80kbLwxnHBC2lGUV01H/Z3AJ4SjFYB3\ngP+qWUTSrjy2nf/853BZmzESWlq8GL7znc5tN4+5qJWekIvWTU3Fo4Tbb4ezz+7MlpoSigj+/OfE\nNpW4tme9ZUM1ReVz7n41sAzA3RcD6qlvYE8/3f7z5Zq1Ovq2+dprcN99XY9J8u+UU1rOf/JJ+HnP\nPXDjjfWPR7qmmqLySTTIowOY2ecADVqekiy0nR9wQNoRBFnIRVakmYuPPur6qa+HHRY/Hjs2/Oz+\nmVmF7m5AuqGaonIZ8DCwuZndA/wTuKCmUUmP090LHSW7vvGN6kdbaO3RR9su0xUL+VbN2V+PAl8H\nBgH3Ec4CK9Q2LKkka23n3/pWy/mPP67fUBhZycWQIdCrV3LbO+IIeP/9zr0mjVy4w/Ll8M475Z//\n5JPqT90tbi8ZTUltSLqgmrO/vgzsDHwELAB2ipZJD3PvvW3vJ9GR4cNbznfnVqx59fzznc9bex55\nBMaNS257tTJ4MKy7bjxfvD30T38abjf9rW/BFluEZW+80XHRKD4/enTysUr9VNP8dX7JdCnwN+Dn\nNYxJ2lHLtvMTT4QZM2qz7Vo0f7WXi5Ur430uXx7OHurJTXBp9KlMmBCORl6KxhXv3Tv8vPZa2Gij\ncMOqOXNg4EDo3x8eeqjl61sPuVI84vn737sbWaG7G8iFji7wTEs1zV9Hl0yHArsAObhTsjSqW24J\nzVHFYcwnTgxnD11+ebpx5c348e0X4vhiw1jpUUaxGbQ4eu6RR4YhTcKNsNpqfa94ad/ChWlHUJ55\nJ7++WRj4a7K771SbkMru0zsbp3SeGZx1VjiT509/avncjBmw1VblX1f6p3n9ddh665bLzcKH/PLl\nLfd1xhlh+PNNN4VJk2DXXbt3NLFyZRjVtW/feNknn7QcSmPmzPDPuM02Xd9PayefHIZhT+otahbG\nfPrKV5LZXrXGjg2F4uij244a3Pp3GzcO9tQl0DlguHtdT33ocOwvM/v/RKcTE45sBgA5G+tTqjVs\nWPeGfzj22Pjx7NmhGaS1b34z/Pyf/wlTUh/GvXvDcce1XNZ6bKYdd4R580IB0llGsXnzYO+9w+OX\nytwm74wzwt/qpJNCs1Zzc33jk/yopk/leWBcNI0GLnT3xO6nIp1T67bzSh/w3/te5de8/npoupgz\np+VNkbbZJtz3AlpeLd16wMBSf/tby1jefTc+zF91g9RI61wsXAh33ll52xA+PAF237399ert3Xe7\nV+S6+7647rr48cCBbZ+/+eYQ391356GgFNIOoKF1eKTi7r+rRyCSnuXLYY01wuNyp4Duumtonqqk\n2NzV2oIFsM468XxTEzzxRNv1zOL7X3z1q/GpqsWYSm2ySTJnmL3wQhhGfcYMeOAB+OMfu7/N7rj4\n4vDzF78IP4tXk9fLaiVfL2fPru++pYepdPN6YBIwscw0CZjY3o3vk55CmJK0I45wf/JJ9/XXLx4D\nxJO7+8qV7n/8Y9vnaj1df337z//jH+V/n+7ssztOOqnr2xg8uLqYHnkkLLvppnjZkCHuc+d2Pe6i\nZ56p/99YU70m3L1+n9Uedlvxg3zL9qa6BtnV/1hpY/ly9xdfDAWjvTeju3uvXmn/Q7QfX9GwYcls\n81e/cl+xovM5raaozJvnPmZMy2Wvv95xTIsXuy9Z4t63b9vfHdxvvbXz8Za65Zb0/5aaajnh7hkp\nKlmaVFRio0aN6tbrq30zFgpp/zO0P111lfuIEaM6PKrp7DR/fuXcfelL7suWxfNz54bXrLNO+Llg\ngfsee7ifemrb155zTljn2Wfdp01zHzu2+ph22aXlfOnfcujQ7r0v0v471mYalYEYsjLh7hkrKsA+\nwHPAQsIQ+CuABXUNsvQ/qcFV++Fx/fXuV14Zzy9f3vHRSf6mUYlvc+hQ90WLyue0uI67+wknuD/1\nVOXtVHptUlPpNj/6yP3vfx/ls2aF5ffeG5aVWrHC/aWXWi57/PG0/375eV/kd8Lds1dUnge2ASYA\nvYBTgV/VNcjif5FUrfTDZ+jQtN/Y+Zquu65lLleudB83rnPbKO3rqEWMX/lKy/nNNw8/S/vAHnww\nxH3ZZfGyK690nzq1uqY3TT1hwt3rW1Q6vPjRzJ539z3NbKK77xotm+DudTspUxc/ds7cubDhhuHx\nO+/AZz+bbjx5c8EF4eyz6dPDTZrOPTcMGtlZO+4YznbbeOPkYxSpTv0vfqymqDwJHALcBrwHzAQG\nuftutQ9vVQwqKpFCoVBxRNp77oFp08JFbEceWd+40lGg1iPSjhoFBx5Y010kpIBG5y0qoFwUZfCK\neuAkwkWSPwTOAzYHvlHLoKRrvqtLUhNXOkKAiHSs4pGKmZ0P3Ofuqd8JWUcqlZ1xBuyzD5x6atqR\niOTfjTfCD3+YdhRJqv+RSnvDtGwGjDazp8zsB2ZWZhQnSZN7GD5DBUUa1T//mez2zjqr/KgPSbri\nitpuP20Vi4q7nwdsAfwn8Hlgopk9bGanmNn69QpQWiod4+nMM9OLIxsKaQeQIYW0A0jFTiVjpR91\nVPFRAQi3QFiyBO6/P16nOJjpwQfDttuG++w891wYnn/atPDcl7/ccqiaq6/uWmy33ho/3mcfOO20\n8Hj//ePlxxzTtW1nWrWniRFOJz6ccGrx4iRPQSMUt/HAAxWer3DibOMpvU5lt93SPl0x7WlUBmLI\nytSYuXAP12CVuvXWkIvitTpvvx3WHTHC/dVXw+MpU9r/Pyte0/Xuu2G+vRgqXcj64Yfx47fecn/t\ntTACwuTJcexFP/xhrXKEu2fslGIAM/s88G3geOADQl/LDUkVNjM7D/gCsIG7f7XM815NnI1k4UJY\nX8eL0uAqfSysWBHu4ZOU4gjSX/taOFX8D38Ip5xDuBnZ4MHhTMEPPohfs3Il/Pa3YZDUzTdvub2P\nP4ZPfar8Pvr0iUfTTiByPCt9Kma2rZldamaTgXuARcBh7r5PwgWlH/AVwinLUqXnn087ApF0uMPS\npfDRR5XXSbKglDrooNAn8tprocAAbLlluKndP/8ZrmfaccewvHjTu9YFBdoWlKIhQ+DDD2GttWoT\nfz2011H/MLAWcLy77+ruV7r79BrEcB1wPqBDkSpcdFGBiy/Oy7UTtVZIO4AMKaQdQEWt7yLaHdtt\nF36uuSast175dWpxz6HdoqvySu95M3x4uEVBsYDtumvoo6l0K4hqFLfV+mZzeVLxOhV3/1zrZWZ2\nlLv/Pamdm9mRwPvu3mxmTUDFw7RBgwbRv39/APr06cOAAQNWXQRYfBP1xPmhQ2HEiAI/+1mY//Wv\nIf4AaYp+Nuo8HTzfSPPNqez//vvh2GPbX3/ddcP8QQc18fjjXd/fgQeG13f0/9Mc3UUsyf/Ha6+F\n+fObOPzwls/36tV2/f32K0TFoXP7e/rpJvbcM8xvv33H+Sg/XwCGRfP9SUVnOmCA8Ul26ABXAm8C\n0wlX6i8Efl9mvfZ71Xqot96KO9zGj69VR17yk3v8+JRT0onhS18KQ81/8kmIZ9Ei93790s9N3qYx\nY9p/furU8LNXL3ezts9vv33I/5NPhvHQ5s2rvK3Zs91HjXK/4YZ42S23uJ9+ehjHrNEUO/S7N+Hu\n9e2o79zKMKFmgcBAdPZXC2l/oLSeRo50/+532y4//vj48frrh9jnz3f/zGfi3+XGG8PZaldcEb3r\n3P2ii9zXWKP9fR5wgPuf/tT5WO+6q3xOFyxIP495mqZNCz9vu63tc8XbBJx0Uiguy5a5/+Uv7jNm\nhOVf+1rId2sff+z+ve+5T58etjNhQth+qUceCWdLSfs3civ+L7nHN3Jzd7/wwnATvjwUlb1qFoiK\nShvl30SjUvlwGTiwZWwjR8bPXXttGFZ98ODwTbM906eH9UqNHx+/rnSfP/6x+/33h+UvvBD201Eu\nDjnE/fnn249hu+3qn7/aT8m+L446yv2VV+K/9ccfh8dNTeHLQpb/Jbt7z6GsmTXLffhw93XXdd92\n25Z/p1LLl7s/+mjLZZkrKsBGwM+B4cD9wC+BLeoeZJbfwTVSrw+P4nTBBe0/f9xxLeMrDgW/cGHX\n7pZYyZlnhu3ecEM1eWmZi0qvaW3OnPCPumiR+6WXhoKWdlH4t3/r7ja6977YYQf3DTcM13ZAaHqp\n5Je/dBWVFHz8cTgarFRUykmjqLR3SvF+hHupGHAXcAewEnjCzPY1s+u62Z0jZSxbBre1e3J1U032\naxbOu6805Msaa7Sc32OPcErnuuvCau2dQ9hJQ4fCiy+Gs2jKefjhcJZN0ATAmDFhLl7evr59YaON\nYJ114PLLocKgz1Xp3Tv8HDgw/Nxvv85v4+23wxXcK1e2v17xiu/ymjq/48guu8D48TBjRuUzqkpd\nckl4n2ZVpVG8826ttWD11eGaa8L/3pIlaUdUQaVqA4wBdi+zfAAwH/hdvSofWf5alLB6fCt+/PHQ\nnPTFL7rffLN7nz6hXdu9ZbPWxRfHj2fPTjcv5YD7JZeEe7hPmRKugu6qp55yP/TQ9vN2+unuAwaE\n/prSb4pLlsTxdHQ3xWJH9XnnhZ+bblo+ntY3VjvnnHgfSU/z5rXc949/HDd3Sb6RwpFKex/kL7Xz\n3CvAanULsocXlfvuC3fiO+aYaj4ERnX7Q6QjK1e6P/NMePzWW2FoiyyqRTPHmmuWz9nOO7dc79ln\n2752/Pi4sJXbRrGPwj2s16dP5TiWLw8drcVmsaLS7Q0Z0v33RVNT13OVVT21+asr0igq7TVcmJl9\nuszCvsByd+/gYF3KMYNhw1ouO+EE2Gor+Otfk9vPHXdAv37Q3BwGzCsaOrS6GPfdNzzu1w8+1+aK\npZ5r6dLyy6NLH1bZa6+26+y+e3xxXLFJrsgdttkmnjcLV05X0qsXPPRQuHPnM8/Ey+NBE0MT4aGH\nhmarUiNHhiaSanSluU6kXZWqDXA68BzhrKz1o6kJeBY4vZ6Vj5weqUyb5v6vf4UB53beOZynX/yG\nuOGGYZ1aNGfssEPbWMD97LPr+/vn2fDh7gceWP3RXb0UBzosFxOEU7dL5yE0ZZ14Ystlxenyy+sT\nt6SDLDV/hXg4CngSmEMYSPJJ4Oi6B5ml/+oqNTd3/OH/k58kX1C+8IXy8SxaFM4ckeqVXqiXJeC+\nzz5tl594ovvMmfH8v/9729iXL3d/4IGw/Jpr3N98s7axSrrSKCpVjVKctjyNUrx8eTg75vbbYc6c\nWuyhQOszfebODWc0AXzhC40z2GShUKj5mT6rrRaX7Kzo2zfcOnrIkHhZuVzMmgUzZ8bjVjWKerwv\n8sIsm/eolw6MGxdOsX3uOdh773RicG852J0kY9y4bBUUCKcfV/O33njjMInUk45UumHx4vCBU825\n/bW0dGkYtfW++8IIqWkVNhHJljSOVFRUuiELRwYZTIuIZEQaRaVL10Kb2R5JB5In6baxF1Y9Wrgw\nrRiyoRb3zcgr5SKmXKSrqwNsnJloFDmyeHHovE1yaJKuWnfdtCMQEWlJzV9VWrQIXnopjNP09a/X\nd9/nnAM33BDuMtd6DC4RkUoy2adSoalrPvCGuy+vSVRtY0i9qJx2WjhNuN5+8YtwinKt7rktIj1X\nVvtUfksYXPIW4FZgNGEo/KlmdlgNY8uEF16AV15Jp6BccQVcemnLgqL24phyEVMuYspFuqopKu8S\nRive092/AOxOuP3vocDVtQwuTWPHwtFHw4ABsN129d33ypXh+oiLL87GGWYiItWqpvnrRXffpdwy\nM2t29wE1jZB0mr/694c33qjrLlfJQTeXiORAVpu/JpvZTWY2MJp+C7xkZmsBGb5VT9eMHRtGfk2j\noJxxhgqKiORbNUVlEPAqcG40TY+WLQMOrFVgadl7b3jssfruc/PN4fTT4be/7XhdtRfHlIuYchFT\nLtJVzdhf/w7c6O7XlnmuR11+V8v+izPPhAsvhE02gbXXjpdPngw77VS7/YqI1FM1fSp3AgcRhr3/\nI/BwvU4lLomh5n0qM2aEG2Ul6fLL4YgjwqiyrW90VSxgau4SkVrJ5HUqAGa2BuGI5Xhgf+BRdz+t\nxrGV7r+mRWXBAujdO9ltbrVVGLV4ww3LP//d78KKFWEQSBGRWshqRz3uvgx4CPgDMA44tpZB1dPh\nhydTUNzD0c6IEfDwwzB9euWCAnD33V0rKGovjikXMeUiplykq8M+FTMrHqE0EUYzvA34Vk2jqpP3\n3gv3807KlluGSUSkUVXTp3IfoS/lIXdfWpeo2saQaPPXhx/CoEHwwAPd2860aXDcceGqe/WNiEjW\nZLL5y91PcPf7iwXFzPY3s6G1D612Jk3qXkEp3pxr221h112Ti0tEJO+q6lMxs93N7BozmwH8EphS\n06hqaOrU7p067N7ylOCbb4b33+9+XNVSe3FMuYgpFzHlIl0V+1TMbDvgBODbwCzCIJLm7rm+4HGH\nHWDw4OrXP+UUOOigcEpwuZtirb12yyIjItLIKvapmNlK4O/AWe7+VrRsurtvndjOzfoBvwc2AVYC\nt7r7kDLWCWcpAAAMJUlEQVTrJdan0tmjlGXLYPVqLhEVEcmYrPWpfB1YDDxpZv9jZgcBSQe3HPix\nu+8MfAk4y8x2SHgfq7z+evXrzpkTmrpUUEREqlexqESd898GdiFcTX8esHE0uGQi91Fx9/fcvTl6\nvBB4GfhsEtsumjQpHJ2YwdadOMbq2zfJKJKj9uKYchFTLmLKRbqqOftrkbvf6+5HA/2ACcCFSQdi\nZv2BAcCzSW1z8mTYc8/Ov+6vf00qAhGRxtKpxh13/5BwB8hbkgzCzNYD/hc4JzpiaWPQoEH0798f\ngD59+jBgwACampqA+JtJcX748ALXXAPPPdcUvboQ/ex4/pprYIMNChQKVNx+mvNNTU2Zikfz2Zkv\nyko8ac0Xl2UlnnrOFwoFhg0bBrDq87Leqhr7q6YBmK1OOCHgIXe/ocI6qzrqP/gANtoovthw5kxY\nujTcVGvcuHCnxq72g4wcCYccorstikjPkLWO+nq5A3ipUkEptWABvPRSPD9nDmy2WRi80Sw0dXWn\nY/3QQ7NfUFp/K21kykVMuYgpF+lK9dwmM9sPOBGYZGYTAAcudveHy62///6h4z28NtlYLr882e2J\niDSi1Ju/qmFm/u67zmab1W4fOUiDiEinZPZ+KmkzMw8HMbWRgxSIiHRao/apSCeovTimXMSUi5hy\nka6GLyoHH5x2BCIiPUfDN3/Nm5f8rYRFRLJAzV91tu22KigiIklq2KKy887wbGIDwtSP2otjykVM\nuYgpF+lq2DF4X3wx7QhERHqe3PapuId7nSxYADNmhNsDv/kmRMPerPLTn8KPfgT9+sGKFRrKXkQa\nh65TqaBYVNxh9mzo0wfWWKPy+i+8ALvtVr/4RESySB317bjyyvBzo43aLyjQswuK2otjykVMuYgp\nF+nKTVG54IK0IxARkY7kpvkrD3GKiGSJmr9ERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJD\nqU9FRERyTUUlZ9ReHFMuYspFTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmo\niIhIYtSnIiLSQ6lPRUREck1FJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPVRD9qmY2RFmNsXMppnZ\nhWnHIyIiXZdqUTGz1YAbgcOBnYETzGyHNGPKOrUXx5SLmHIRUy7SlfaRyl7AK+7+hrsvA/4AHJNy\nTCIi0kWp9qmY2TeAw9399Gj+u8Be7v6jVuupT0VEpJMask9FRER6jtVT3v87wBYl8/2iZW0MGjSI\n/v37A9CnTx8GDBhAU1MTELehNsJ8aXtxFuJJc764LCvxpDnf3NzMueeem5l40py//vrrG/rzYdiw\nYQCrPi/rLe3mr17AVOBgYCYwFjjB3V9utZ6avyKFQmHVm6nRKRcx5SKmXMTSaP5K/ToVMzsCuIHQ\nFHe7u/+6zDoqKiIindSQRaUaKioiIp2njnrpUGl/QqNTLmLKRUy5SJeKioiIJEbNXyIiPZSav0RE\nJNdUVHJG7cUx5SKmXMSUi3SpqIiISGLUpyIi0kOpT0VERHJNRSVn1F4cUy5iykVMuUiXioqIiCRG\nfSoiIj2U+lRERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJDqU9FRERyTUUlZ9ReHFMuYspF\nTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmoiIhIYtSnIiLSQ6lPRUREck1F\nJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPZT6VEREJNdSKypmdrWZvWxmzWY2wsw2SCuWPFF7cUy5\niCkXMeUiXWkeqYwEdnb3AcArwEUpxpIbzc3NaYeQGcpFTLmIKRfpSq2ouPtj7r4ymh0D9EsrljyZ\nN29e2iFkhnIRUy5iykW6stKn8v+Ah9IOQkREumf1Wm7czB4FNildBDhwibv/LVrnEmCZu99by1h6\nihkzZqQdQmYoFzHlIqZcpCvVU4rNbBDwfeAgd1/azno6n1hEpAvqfUpxTY9U2mNmRwDnA19ur6BA\n/ZMiIiJdk9qRipm9AqwJzIkWjXH3H6QSjIiIJCIXV9SLiEg+ZOXsr7LM7Agzm2Jm08zswrTj6Swz\nu93M3jeziSXLPm1mI81sqpk9Yma9S567yMxeiS4KPaxk+R5mNjHKw/Uly9c0sz9ErxltZluUPHdK\ntP5UMzu5ZHl/MxsTPXefmdW8CdTM+pnZ42Y22cwmmdmPGjgXa5nZs2Y2IcrHlY2ai5J9r2Zm483s\ngWi+IXNhZjPM7IXovTE2Wpa/XLh7JidCwXsV2BJYA2gGdkg7rk7+DvsDA4CJJcuuAi6IHl8I/Dp6\nvBMwgdDP1T/63YtHks8CX4we/wM4PHp8JvDb6PHxwB+ix58GXgN6A32Kj6Pn/ggcFz2+CfiPOuTh\nM8CA6PF6wFRgh0bMRbSvdaKfvQjXaO3XqLmI9ncecDfwQKP+j0T7mg58utWy3OWiLm+aLiZ4H+Ch\nkvnBwIVpx9WF32NLWhaVKcAm0ePPAFPK/X6E63b2jtZ5qWT5t4GboscPA3tHj3sBs1qvU/JmOD56\nPBtYrSTHD6eQk/uBQxo9F8A6wNjoA6Ihc0G46PlRoIm4qDRqLl4HNmy1LHe5yHLz12eBt0rm346W\n5d3G7v4+gLu/B2wcLW/9+74TLfss4XcvKs3Dqte4+wpgvpn1rbQtM9sQ+NDjkQzeBjZL6Peqipn1\nJxy9jSH8szRcLqLmngnAe0DB3V+iQXMBXEc4C7S0c7dRc+HAo2b2nJmdFi3LXS5SO6VYVknyTIlq\nTr1O7fRsM1sP+F/gHHdfaG2vP2qIXET/pLtbGET1ETNrou3v3uNzYWZHAu+7e3OUg0p6fC4i+7n7\nTDPbCBhpZlPJ4fsiy0cq7wBblMz3i5bl3ftmtgmAmX0GmBUtfwfYvGS94u9baXmL15hZL2ADd59L\nhdy5+xygt5mtVmZbNRV18P0vcJe7/zVa3JC5KHL3BYQ27z1pzFzsB3zVzKYD9wEHmdldwHsNmAvc\nfWb0czahiXgv8vi+qEdbYRfbF3sRd9SvSeio3zHtuLrwe/QHJpXMX0XUFkr5jrc1ga1o2fE2JnqD\nGeFD6Iho+Q+IO96+TfmOt+LjPtFzfyRuL70JOKNOefg98N+tljVcLoB/I+4EXRt4Eji4EXPRKi8D\niftUrm60XBD619aLHq8LPAMclsf3Rd3eNF1M9BGEM4VeAQanHU8X4r8XeBdYCrwJnBr90R6Lfq+R\nxT9etP5F0ZvjZeCwkuVfACZFebihZPlawJ+i5WOA/iXPDYqWTwNOLlm+FeHskGnRG2aNOuRhP2AF\n4YvBBGB89Lft24C5+Hz0+08AXgB+Gi1vuFy0yktpUWm4XET7LP5/TCL6vMtjLnTxo4iIJCbLfSoi\nIpIzKioiIpIYFRUREUmMioqIiCRGRUVERBKjoiIiIolRUZGGYmYfRT+3NLMTEt72Ra3mn05y+yJ5\noKIijaZ4YdZWwHc688JoaIv2XNxiR+77d2b7Ij2Bioo0ql8B+0c3hzonGjn4ags30Go2s+8DmNlA\nM3vSzP4KTI6W/SUaSXZScTRZM/sVsHa0vbuiZR8Vd2Zm10Trv2Bm3yrZ9igzGx7daOmuOudAJHEa\npVga1WDgJ+7+VYCoiMxz973NbE3gGTMbGa27O7Czu78ZzZ/q7vPM7FPAc2Y2wt0vMrOz3H2Pkn14\ntO1vALu6++fNbOPoNU9E6wwgjOP0XrTPfd39X7X8xUVqSUcqIsFhwMnRfU6eJYy5tG303NiSggJw\nrpk1E8ZP6leyXiX7EUbhxd1nAQXgiyXbnulhvKRmwgCkIrmlIxWRwICz3f3RFgvNBgKLWs0fRLiD\n3lIzGwV8qmQb1e6raGnJ4xXof1JyTkcq0miKH+gfAeuXLH8E+EF03xfMbFszW6fM63sT7oa31Mx2\nINxiteiT4utb7esp4Pio32Yj4ADCbYRFehx9K5JGUzz7ayKwMmruGubuN0S3Oh5vZka4GdKxZV7/\nMHCGmU0mDEc+uuS5W4CJZjbO3U8q7svd/2Jm+xCGul8JnO/us8xsxwqxieSWhr4XEZHEqPlLREQS\no6IiIiKJUVEREZHEqKiIiEhiVFRERCQxKioiIpIYFRUREUmMioqIiCTm/wDm731G+4AB2QAAAABJ\nRU5ErkJggg==\n",
365 | "text/plain": [
366 | ""
367 | ]
368 | },
369 | "metadata": {},
370 | "output_type": "display_data"
371 | }
372 | ],
373 | "source": [
374 | "plt.plot(train_stats['Iteration'], train_stats['Average Q-Value'])\n",
375 | "plt.xlabel(\"Iteration\")\n",
376 | "plt.ylabel(\"Avg. Q-Values\")\n",
377 | "plt.grid(True)\n",
378 | "#plt.savefig(settings['save_dir'] + '_' + \"training_q_values.svg\", bbox_inches='tight')\n",
379 | "plt.show()\n",
380 | "plt.close()"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {},
386 | "source": [
387 | "# Evaluating the best policy"
388 | ]
389 | },
390 | {
391 | "cell_type": "markdown",
392 | "metadata": {},
393 | "source": [
394 | "Let's load the network that collected the highest reward per game episode"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 15,
400 | "metadata": {
401 | "collapsed": true
402 | },
403 | "outputs": [],
404 | "source": [
405 | "best_iteration_index = np.argmax(eval_stats['Reward per Episode'])\n",
406 | "best_iteration = str(int(eval_stats['Iteration'][best_iteration_index]))"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 16,
412 | "metadata": {
413 | "collapsed": false
414 | },
415 | "outputs": [
416 | {
417 | "data": {
418 | "text/plain": [
419 | "'4500000'"
420 | ]
421 | },
422 | "execution_count": 16,
423 | "metadata": {},
424 | "output_type": "execute_result"
425 | }
426 | ],
427 | "source": [
428 | "best_iteration "
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 17,
434 | "metadata": {
435 | "collapsed": true
436 | },
437 | "outputs": [],
438 | "source": [
439 | "agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p')"
440 | ]
441 | },
442 | {
443 | "cell_type": "code",
444 | "execution_count": 18,
445 | "metadata": {
446 | "collapsed": false
447 | },
448 | "outputs": [],
449 | "source": [
450 | "r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True)"
451 | ]
452 | },
453 | {
454 | "cell_type": "code",
455 | "execution_count": 19,
456 | "metadata": {
457 | "collapsed": false
458 | },
459 | "outputs": [
460 | {
461 | "data": {
462 | "text/plain": [
463 | "17.6"
464 | ]
465 | },
466 | "execution_count": 19,
467 | "metadata": {},
468 | "output_type": "execute_result"
469 | }
470 | ],
471 | "source": [
472 | "r_per_episode"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": null,
478 | "metadata": {
479 | "collapsed": true
480 | },
481 | "outputs": [],
482 | "source": []
483 | }
484 | ],
485 | "metadata": {
486 | "kernelspec": {
487 | "display_name": "Python 2",
488 | "language": "python",
489 | "name": "python2"
490 | },
491 | "language_info": {
492 | "codemirror_mode": {
493 | "name": "ipython",
494 | "version": 2
495 | },
496 | "file_extension": ".py",
497 | "mimetype": "text/x-python",
498 | "name": "python",
499 | "nbconvert_exporter": "python",
500 | "pygments_lexer": "ipython2",
501 | "version": "2.7.11"
502 | }
503 | },
504 | "nbformat": 4,
505 | "nbformat_minor": 0
506 | }
507 |
--------------------------------------------------------------------------------
/examples/mountain_car_test.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a place holder for real unit testing.
3 | Right now we just overfit a simple control problem:
4 | - the agent tries to get to the top right corner (1,1) of a 2D map
5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
6 | - action 1 is optimal for all states
7 | """
8 |
9 | from chimp.learners.chainer_backend import ChainerBackend
10 | from chimp.learners.dqn_learner import DQNLearner
11 | from chimp.utils.policies import DQNPolicy
12 |
13 | from chimp.simulators.mdp.mountain_car import MountainCar
14 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
15 |
16 | import numpy as np
17 | import pickle
18 | import pylab as p
19 |
20 | import chainer
21 | import chainer.functions as F
22 | import chainer.links as L
23 | from chainer import Chain
24 |
25 | settings = {
26 |
27 | # agent settings
28 | 'batch_size' : 32,
29 | 'print_every' : 1000,
30 | 'save_dir' : 'results',
31 | 'iterations' : 2000000,
32 | 'eval_iterations' : 100,
33 | 'eval_every' : 1000,
34 | 'save_every' : 20000,
35 | 'initial_exploration' : 50000,
36 | 'epsilon_decay' : 0.000001, # subtract from epsilon every step
37 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
38 | 'epsilon' : 1.0, # Initial exploratoin rate
39 | 'learn_freq' : 1,
40 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
41 | 'model_dims' : (1,2),
42 |
43 | # simulator settings
44 | 'viz' : False,
45 |
46 | # replay memory settings
47 | 'memory_size' : 100000, # size of replay memory
48 | 'n_frames' : 1, # number of frames
49 |
50 | # learner settings
51 | 'learning_rate' : 0.00001,
52 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
53 | 'discount' : 0.95, # discount rate for RL
54 | 'clip_err' : False, # value to clip loss gradients to
55 | 'clip_reward' : False, # value to clip reward values to
56 | 'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations
57 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
58 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
59 | 'gpu' : False,
60 | 'reward_rescale': False,
61 |
62 | # general
63 | 'seed_general' : 1723,
64 | 'seed_simulator' : 5632,
65 | 'seed_agent' : 9826,
66 | 'seed_memory' : 7563
67 |
68 | }
69 |
70 | class TestNet(Chain):
71 |
72 | def __init__(self):
73 | super(TestNet, self).__init__(
74 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
75 | l2=F.Linear(20, 10, bias=0.0),
76 | bn1=L.BatchNormalization(10),
77 | l3=F.Linear(10, 10),
78 | l4=F.Linear(10, 10),
79 | bn2=L.BatchNormalization(10),
80 | lout=F.Linear(10, simulator.n_actions)
81 | )
82 | self.train = True
83 | # initialize avg_var to prevent divide by zero
84 | self.bn1.avg_var.fill(0.1),
85 | self.bn2.avg_var.fill(0.1),
86 |
87 | def __call__(self, ohist, ahist):
88 | h = F.relu(self.l1(ohist))
89 | h = F.relu(self.l2(h))
90 | h = self.bn1(h, test=not self.train)
91 | h = F.relu(self.l3(h))
92 | h = F.relu(self.l4(h))
93 | h = self.bn2(h, test=not self.train)
94 | output = self.lout(h)
95 | return output
96 |
97 |
98 | def car_sim(nsteps, simulator, policy, verbose=False):
99 | mdp = simulator.model
100 |
101 | # re-initialize the model
102 | simulator.reset_episode()
103 |
104 | rtot = 0.0
105 | xpos = np.zeros(nsteps)
106 | vel = np.zeros(nsteps)
107 | # run the simulation
108 | input_state = np.zeros((1,2), dtype=np.float32)
109 | for i in xrange(nsteps):
110 | state = simulator.get_screenshot()
111 | input_state[0] = state
112 | a = policy.action((input_state,None))
113 | simulator.act(a)
114 | r = simulator.reward()
115 | rtot += r
116 | xpos[i], vel[i] = state
117 | if simulator.episode_over():
118 | break
119 | return rtot, xpos, vel
120 |
121 |
122 | mdp = MountainCar()
123 | simulator = MDPSimulator(mdp)
124 |
125 | net = pickle.load(open("../chimp/pre_trained_nets/mountain_car.net", "rb"))
126 | backend = ChainerBackend(settings)
127 | backend.set_net(net)
128 | learner = DQNLearner(settings, backend)
129 |
130 | policy = DQNPolicy(learner)
131 |
132 | r, xtrace, vtrace = car_sim(300, simulator, policy, verbose=True)
133 |
134 | p.plot(xtrace); p.plot(10.0*vtrace)
135 | p.show()
136 |
--------------------------------------------------------------------------------
/examples/run_atari.py:
--------------------------------------------------------------------------------
1 | # be sure to have run ' python setup.py ' from chimp directory
2 |
3 |
4 | # # Training DeepMind's Atari DQN with Chimp
5 |
6 | # First, we load all the Chimp modules.
7 |
8 | from chimp.memories import ReplayMemoryHDF5
9 |
10 | from chimp.learners.dqn_learner import DQNLearner
11 | from chimp.learners.chainer_backend import ChainerBackend
12 |
13 | from chimp.simulators.atari import AtariSimulator
14 |
15 | from chimp.agents import DQNAgent
16 |
17 |
18 | # Then we load Python packages.
19 |
20 | import matplotlib.pyplot as plt
21 |
22 | import numpy as np
23 | import chainer
24 | import chainer.functions as F
25 | import chainer.links as L
26 | from chainer import Chain
27 | import os
28 |
29 | import pandas as ps
30 |
31 |
32 | # Finally, we set training parameters in a params dictionary that will be passed to the modules.
33 |
34 | # Define training settings
35 |
36 | settings = {
37 |
38 | # agent settings
39 | 'batch_size' : 32,
40 | 'print_every' : 10000,
41 | 'save_dir' : './results_atari',
42 | 'iterations' : 5000000,
43 | 'eval_iterations' : 5000,
44 | 'eval_every' : 50000,
45 | 'save_every' : 50000,
46 | 'initial_exploration' : 50000,
47 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step
48 | 'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions
49 | 'epsilon' : 1.0, # Initial exploratoin rate
50 | 'learn_freq' : 4,
51 | 'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
52 | 'model_dims' : (84,84),
53 |
54 | # Atari settings
55 | 'rom' : "Breakout.bin",
56 | 'rom_dir' : './roms',
57 | 'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square
58 | 'action_history' : True,
59 |
60 | # simulator settings
61 | 'viz' : True,
62 | 'viz_cropped' : False,
63 |
64 | # replay memory settings
65 | 'memory_size' : 500000, # size of replay memory
66 | 'frame_skip' : 4, # number of frames to skip
67 |
68 | # learner settings
69 | 'learning_rate' : 0.00025,
70 | 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used
71 | 'discount' : 0.99, # discount rate for RL
72 | 'clip_err' : False, # value to clip loss gradients to
73 | 'clip_reward' : 1, # value to clip reward values to
74 | 'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations
75 | 'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
76 | 'gpu' : True, # NO GPU FOR THIS EXAMPLE
77 | 'reward_rescale': False,
78 |
79 | # general
80 | 'seed_general' : 1723,
81 | 'seed_simulator' : 5632,
82 | 'seed_agent' : 9826,
83 | 'seed_memory' : 7563
84 |
85 | }
86 |
87 |
88 | # set random seed
89 | np.random.seed(settings["seed_general"])
90 |
91 |
92 | # initialize the simulator
93 |
94 | simulator = AtariSimulator(settings)
95 |
96 | # Define the network
97 | class Convolution(Chain):
98 |
99 | def __init__(self):
100 | super(Convolution, self).__init__(
101 | l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
102 | l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
103 | l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
104 | l4=F.Linear(3136, 512, wscale = np.sqrt(2)),
105 | l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)),
106 | )
107 |
108 | def __call__(self, ohist, ahist):
109 | if len(ohist.data.shape) < 4:
110 | ohist = F.reshape(ohist,(1,4,84,84))
111 | h1 = F.relu(self.l1(ohist/255.0))
112 | h2 = F.relu(self.l2(h1))
113 | h3 = F.relu(self.l3(h2))
114 | h4 = F.relu(self.l4(h3))
115 | output = self.l5(h4)
116 | return output
117 |
118 | net = Convolution()
119 |
120 |
121 | # initialize the learner + chainer backend, replay memory, and agent modules
122 |
123 | backend = ChainerBackend(settings)
124 | backend.set_net(net)
125 | learner = DQNLearner(settings, backend)
126 |
127 | memory = ReplayMemoryHDF5(settings)
128 |
129 | agent = DQNAgent(learner, memory, simulator, settings)
130 |
131 | # launch training
132 |
133 | agent.train()
134 |
135 |
136 | # Visualizing results
137 |
138 | train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None)
139 | train_stats.columns = ['Iteration','MSE Loss','Average Q-Value']
140 |
141 | eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None)
142 | eval_stats.columns = ['Iteration','Total Reward','Reward per Episode']
143 |
144 |
145 | plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode'])
146 | plt.xlabel("Iteration")
147 | plt.ylabel("Avg. Reward per Episode")
148 | plt.grid(True)
149 | plt.savefig(settings['save_dir'] + '_' + "evaluation_reward.svg", bbox_inches='tight')
150 | #plt.show()
151 | plt.close()
152 |
153 |
154 | plt.plot(train_stats['Iteration'], train_stats['Average Q-Value'])
155 | plt.xlabel("Iteration")
156 | plt.ylabel("Avg. Q-Values")
157 | plt.grid(True)
158 | plt.savefig(settings['save_dir'] + '_' + "training_q_values.svg", bbox_inches='tight')
159 | #plt.show()
160 | plt.close()
161 |
162 |
163 | # Evaluating the best policy
164 |
165 | # load the network that collected the highest reward per game episode
166 |
167 | best_iteration_index = np.argmax(eval_stats['Reward per Episode'])
168 | best_iteration = str(int(eval_stats['Iteration'][best_iteration_index]))
169 |
170 | agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p')
171 |
172 |
173 | # evaluate policy performance
174 |
175 | r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True)
176 |
177 | r_per_episode
178 |
179 |
180 |
--------------------------------------------------------------------------------
/examples/run_cartpole.py:
--------------------------------------------------------------------------------
1 | """
2 | This is a place holder for real unit testing.
3 | Right now we just overfit a simple control problem:
4 | - the agent tries to get to the top right corner (1,1) of a 2D map
5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
6 | - action 1 is optimal for all states
7 | """
8 |
9 | # Memory
10 | from chimp.memories import ReplayMemoryHDF5
11 |
12 | # Learner (Brain)
13 | from chimp.learners.dqn_learner import DQNLearner
14 | from chimp.learners.chainer_backend import ChainerBackend
15 |
16 | # Agent Framework
17 | from chimp.agents import DQNAgent
18 |
19 | # Simulator
20 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
21 | from chimp.simulators.mdp.cart_pole import CartPole
22 |
23 | # Rollout Policy
24 | from chimp.utils.policies import RandomPolicy
25 |
26 | import numpy as np
27 | import pickle
28 | import pylab as p
29 |
30 | import chainer
31 | import chainer.functions as F
32 | import chainer.links as L
33 | from chainer import Chain
34 |
35 | settings = {
36 |
37 | # agent settings
38 | 'batch_size' : 32,
39 | 'print_every' : 1000,
40 | 'save_dir' : 'results/cartpole-1',
41 | 'iterations' : 10000,
42 | 'eval_iterations' : 200,
43 | 'eval_every' : 1000,
44 | 'save_every' : 1000,
45 | 'initial_exploration' : 10000,
46 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step
47 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
48 | 'epsilon' : 1.0, # Initial exploratoin rate
49 | 'learn_freq' : 1,
50 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
51 | 'model_dims' : (1,4),
52 |
53 | # simulator settings
54 | 'viz' : False,
55 |
56 | # replay memory settings
57 | 'memory_size' : 10000, # size of replay memory
58 | 'n_frames' : 1, # number of frames
59 |
60 | # learner settings
61 | 'learning_rate' : 0.00001,
62 | 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used
63 | 'discount' : 0.99, # discount rate for RL
64 | 'clip_err' : False, # value to clip loss gradients to
65 | 'clip_reward' : False, # value to clip reward values to
66 | 'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations
67 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
68 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
69 | 'gpu' : False,
70 | 'reward_rescale': False,
71 |
72 | # general
73 | 'seed_general' : 1723,
74 | 'seed_simulator' : 5632,
75 | 'seed_agent' : 9826,
76 | 'seed_memory' : 7563
77 |
78 | }
79 |
80 | mdp = CartPole()
81 | simulator = MDPSimulator(mdp)
82 |
83 | class CartNet(Chain):
84 |
85 | def __init__(self):
86 | super(CartNet, self).__init__(
87 | l1=F.Linear(4, 20, bias=0.0),
88 | l2=F.Linear(20, 10, bias=0.0),
89 | bn1=L.BatchNormalization(10),
90 | l3=F.Linear(10, 10),
91 | l4=F.Linear(10, 10),
92 | bn2=L.BatchNormalization(10),
93 | lout=F.Linear(10, simulator.n_actions)
94 | )
95 | self.train = True
96 | # initialize avg_var to prevent divide by zero
97 | self.bn1.avg_var.fill(0.1),
98 | self.bn2.avg_var.fill(0.1),
99 |
100 | def __call__(self, ohist, ahist):
101 | h = F.relu(self.l1(ohist))
102 | h = F.relu(self.l2(h))
103 | h = self.bn1(h, test=not self.train)
104 | h = F.relu(self.l3(h))
105 | h = F.relu(self.l4(h))
106 | h = self.bn2(h, test=not self.train)
107 | output = self.lout(h)
108 | return output
109 |
110 |
111 | def pole_sim(nsteps, simulator, policy, verbose=False):
112 | mdp = simulator.model
113 |
114 | # re-initialize the model
115 | simulator.reset_episode()
116 |
117 | rtot = 0.0
118 | xpos = np.zeros(nsteps)
119 | thetas = np.zeros(nsteps)
120 | # run the simulation
121 | input_state = np.zeros((1,4), dtype=np.float32)
122 | for i in xrange(nsteps):
123 | state = simulator.get_screenshot()
124 | input_state[0] = state
125 | #a = policy.action((input_state,None))
126 | a = policy.action(state)
127 | simulator.act(a)
128 | r = simulator.reward()
129 | rtot += r
130 | xpos[i], thetas[i] = state[0], state[2]
131 | print state, r
132 | if simulator.episode_over():
133 | break
134 | return rtot, xpos, thetas
135 |
136 |
137 | class PoleCartHeuristic():
138 |
139 | def __inti__(self):
140 | self.a = 0
141 |
142 | def action(self, state):
143 | if state[2] > 0:
144 | return 1
145 | else:
146 | return 0
147 |
148 |
149 | net = CartNet()
150 |
151 | # Initialize Learner with a Chainer backend
152 | backend = ChainerBackend(settings)
153 | backend.set_net(net)
154 | learner = DQNLearner(settings, backend)
155 |
156 | # Initialize memory
157 | memory = ReplayMemoryHDF5(settings)
158 |
159 | # Initialize Agent Framework
160 | agent = DQNAgent(learner, memory, simulator, settings)
161 |
162 | # Start training
163 | agent.train(verbose=True)
164 |
165 | #policy = RandomPolicy(simulator.n_actions)
166 | #policy = PoleCartHeuristic()
167 |
168 | #r, xs, ts = pole_sim(100, simulator, policy, verbose=True)
169 |
170 | #p.plot(xs); p.plot(10.0*ts)
171 | #p.show()
172 |
--------------------------------------------------------------------------------
/examples/run_mountain_car.py:
--------------------------------------------------------------------------------
1 | """
2 | File to initialize training.
3 | Contains settings, network definition for Chainer.
4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training.
5 | """
6 |
7 | import numpy as np
8 |
9 | import chainer
10 | import chainer.functions as F
11 | import chainer.links as L
12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils
13 | from chainer import Link, Chain, ChainList
14 |
15 | # Memory
16 | from chimp.memories import ReplayMemoryHDF5
17 |
18 | # Learner (Brain)
19 | from chimp.learners.dqn_learner import DQNLearner
20 | from chimp.learners.chainer_backend import ChainerBackend
21 |
22 | # Agent Framework
23 | from chimp.agents import DQNAgent
24 |
25 | # Simulator
26 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
27 | from chimp.simulators.mdp.mountain_car import MountainCar
28 |
29 | # Rollout Policy
30 | from chimp.utils.policies import RandomPolicy
31 |
32 |
33 | settings = {
34 |
35 | # agent settings
36 | 'batch_size' : 32,
37 | 'print_every' : 1000,
38 | 'save_dir' : 'results/mountain_car',
39 | 'iterations' : 200000,
40 | 'eval_iterations' : 100,
41 | 'eval_every' : 1000,
42 | 'save_every' : 20000,
43 | 'initial_exploration' : 50000,
44 | 'epsilon_decay' : 0.000001, # subtract from epsilon every step
45 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
46 | 'epsilon' : 1.0, # Initial exploratoin rate
47 | 'learn_freq' : 1,
48 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
49 | 'model_dims' : (1,2),
50 |
51 | # simulator settings
52 | 'viz' : False,
53 |
54 | # replay memory settings
55 | 'memory_size' : 100000, # size of replay memory
56 | 'n_frames' : 1, # number of frames
57 |
58 | # learner settings
59 | 'learning_rate' : 0.00001,
60 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
61 | 'discount' : 0.95, # discount rate for RL
62 | 'clip_err' : False, # value to clip loss gradients to
63 | 'clip_reward' : False, # value to clip reward values to
64 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
65 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
66 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
67 | 'gpu' : False,
68 | 'reward_rescale': False,
69 |
70 | # general
71 | 'seed_general' : 1723,
72 | 'seed_simulator' : 5632,
73 | 'seed_agent' : 9826,
74 | 'seed_memory' : 7563
75 |
76 | }
77 |
78 | mdp = MountainCar()
79 | simulator = MDPSimulator(mdp)
80 |
81 | class CarNet(Chain):
82 |
83 | def __init__(self):
84 | super(CarNet, self).__init__(
85 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
86 | l2=F.Linear(20, 10, bias=0.0),
87 | bn1=L.BatchNormalization(10),
88 | l3=F.Linear(10, 10),
89 | l4=F.Linear(10, 10),
90 | bn2=L.BatchNormalization(10),
91 | lout=F.Linear(10, simulator.n_actions)
92 | )
93 | self.train = True
94 | # initialize avg_var to prevent divide by zero
95 | self.bn1.avg_var.fill(0.1),
96 | self.bn2.avg_var.fill(0.1),
97 |
98 |
99 | def __call__(self, ohist, ahist):
100 | h = F.relu(self.l1(ohist))
101 | h = F.relu(self.l2(h))
102 | h = self.bn1(h, test=not self.train)
103 | h = F.relu(self.l3(h))
104 | h = F.relu(self.l4(h))
105 | h = self.bn2(h, test=not self.train)
106 | output = self.lout(h)
107 | return output
108 |
109 |
110 | net = CarNet()
111 |
112 | # Initialize Learner with a Chainer backend
113 | backend = ChainerBackend(settings)
114 | backend.set_net(net)
115 | learner = DQNLearner(settings, backend)
116 |
117 | # Initialize memory
118 | memory = ReplayMemoryHDF5(settings)
119 |
120 | # Initialize Agent Framework
121 | agent = DQNAgent(learner, memory, simulator, settings)
122 |
123 | # Start training
124 | agent.train(verbose=True)
125 |
--------------------------------------------------------------------------------
/examples/run_tiger.py:
--------------------------------------------------------------------------------
1 | '''
2 | File to initialize training.
3 | Contains settings, network definition for Chainer.
4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training.
5 | '''
6 |
7 | import numpy as np
8 |
9 | import chainer
10 | import chainer.functions as F
11 | import chainer.links as L
12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils
13 | from chainer import Link, Chain, ChainList
14 | from memories import ReplayMemoryHDF5
15 |
16 | from learners import Learner
17 | from agents import DQNAgent
18 |
19 | from simulators.pomdp import POMDPSimulator
20 | from simulators.pomdp import TigerPOMDP
21 |
22 | print('Setting training parameters...')
23 | # Set training settings
24 | settings = {
25 | # agent settings
26 | 'batch_size' : 32,
27 | 'print_every' : 5000,
28 | 'save_dir' : 'results/nets_tiger_observation',
29 | 'iterations' : 500000,
30 | 'eval_iterations' : 5000,
31 | 'eval_every' : 5000,
32 | 'save_every' : 5000,
33 | 'initial_exploration' : 10000,
34 | 'epsilon_decay' : 0.0001, # subtract from epsilon every step
35 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
36 | 'epsilon' : 1.0, # Initial exploratoin rate
37 | 'model_dims': (1,1),
38 | 'learn_freq' : 1,
39 |
40 | # simulator settings
41 | 'viz' : False,
42 |
43 | # replay memory settings
44 | 'memory_size' : 100000, # size of replay memory
45 | 'n_frames' : 5, # number of frames
46 |
47 | # learner settings
48 | 'learning_rate' : 0.001,
49 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
50 | 'discount' : 0.95, # discount rate for RL
51 | 'clip_err' : False, # value to clip loss gradients to
52 | 'clip_reward' : False, # value to clip reward values to
53 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
54 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
55 | 'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA" and "SGD"'
56 | 'gpu' : False,
57 | 'reward_rescale': False,
58 |
59 | # general
60 | 'seed_general' : 1723,
61 | 'seed_simulator' : 5632,
62 | 'seed_agent' : 9826,
63 | 'seed_memory' : 7563
64 |
65 | }
66 |
67 | print(settings)
68 |
69 | np.random.seed(settings["seed_general"])
70 |
71 | print('Setting up simulator...')
72 | pomdp = TigerPOMDP( seed=settings['seed_simulator'] )
73 | simulator = POMDPSimulator(pomdp, robs=True)
74 |
75 | settings['model_dims'] = simulator.model_dims
76 |
77 | print('Initializing replay memory...')
78 | memory = ReplayMemoryHDF5(settings)
79 |
80 | print('Setting up networks...')
81 |
82 | class Linear(Chain):
83 |
84 | def __init__(self):
85 | super(Linear, self).__init__(
86 | l1=F.Bilinear(settings["n_frames"], settings["n_frames"], 200),
87 | l2=F.Linear(200, 100, wscale=np.sqrt(2)),
88 | l3=F.Linear(100, 100, wscale=np.sqrt(2)),
89 | l4=F.Linear(100, 50, wscale=np.sqrt(2)),
90 | l5=F.Linear(50, simulator.n_actions, wscale = np.sqrt(2))
91 | )
92 |
93 | def __call__(self, s, action_history):
94 | h1 = F.relu(self.l1(s,action_history))
95 | h2 = F.relu(self.l2(h1))
96 | h3 = F.relu(self.l3(h2))
97 | h4 = F.relu(self.l4(h3))
98 | output = self.l5(h4)
99 | return output
100 |
101 | net = Linear()
102 |
103 | print('Initializing the learner...')
104 | learner = Learner(settings)
105 | learner.load_net(net)
106 |
107 | print('Initializing the agent framework...')
108 | agent = DQNAgent(settings)
109 |
110 | print('Training...')
111 | agent.train(learner, memory, simulator)
112 |
113 | print('Loading the net...')
114 | learner = agent.load(settings['save_dir']+'/learner_final.p')
115 |
116 | ind_max = learner.val_rewards.index(max(learner.val_rewards))
117 | ind_net = settings['initial_exploration'] + ind_max * settings['eval_every']
118 | agent.load_net(learner,settings['save_dir']+'/net_%d.p' % int(ind_net))
119 |
120 | np.random.seed(settings["seed_general"])
121 |
122 | print('Evaluating DQN agent...')
123 | print('(reward, MSE loss, mean Q-value, episodes - NA, time)')
124 | reward, MSE_loss, mean_Q_value, episodes, time, paths, actions, rewards = agent.evaluate(learner, simulator, 50000)
125 | print(reward, MSE_loss, mean_Q_value, episodes, time)
126 |
--------------------------------------------------------------------------------
/logos/chimp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/chimp.png
--------------------------------------------------------------------------------
/logos/monkey_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/monkey_text.png
--------------------------------------------------------------------------------
/roms/README.md:
--------------------------------------------------------------------------------
1 | # Put roms here
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import numpy
4 |
5 | """
6 | This script creates a symbolic link to the chimp source code in your python's site-packages directory
7 | """
8 |
9 | np_path = numpy.__file__
10 | source_path = os.path.dirname(os.path.realpath("setup.py")) + "/chimp"
11 |
12 | np_split = np_path.split("/")
13 | target_path = '/'.join(np_split[:-2]) + "/chimp"
14 |
15 | # symlink to the site packages dir
16 | cmd = "ln -s " + source_path + " " + target_path
17 |
18 | subprocess.call([cmd], shell=True)
19 |
20 |
21 |
--------------------------------------------------------------------------------