├── .gitignore
├── LICENSE
├── README.md
├── chimp
    ├── __init__.py
    ├── agents
    │   ├── __init__.py
    │   ├── agent_test.py
    │   └── dqn_agent.py
    ├── learners
    │   ├── __init__.py
    │   ├── chainer_backend.py
    │   ├── chainer_test.py
    │   └── dqn_learner.py
    ├── memories
    │   ├── __init__.py
    │   ├── mem_test.py
    │   ├── memory.py
    │   └── replay_memory.py
    ├── pre_trained_nets
    │   └── mountain_car.net
    ├── simulators
    │   ├── __init__.py
    │   ├── atari
    │   │   ├── __init__.py
    │   │   └── atari.py
    │   ├── gym
    │   │   ├── __init__.py
    │   │   └── gym_wrapper.py
    │   ├── mdp
    │   │   ├── __init__.py
    │   │   ├── cart_pole.py
    │   │   ├── mdp_simulator.py
    │   │   └── mountain_car.py
    │   └── pomdp
    │   │   ├── __init__.py
    │   │   ├── models
    │   │       ├── __init__.py
    │   │       ├── rock_sample.py
    │   │       ├── rock_test.py
    │   │       ├── simulator.py
    │   │       ├── tiger.py
    │   │       └── tools
    │   │       │   ├── __init__.py
    │   │       │   ├── belief.py
    │   │       │   ├── belief_momdp.py
    │   │       │   └── distributions.py
    │   │   └── sim_loop.py
    └── utils
    │   ├── __init__.py
    │   ├── distributions.py
    │   └── policies.py
├── examples
    ├── atari_tutorial.ipynb
    ├── mountain_car.ipynb
    ├── mountain_car_test.py
    ├── run_atari.py
    ├── run_cartpole.py
    ├── run_mountain_car.py
    └── run_tiger.py
├── logos
    ├── chimp.png
    └── monkey_text.png
├── roms
    └── README.md
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | *.pyc
10 | 
11 | # Packages #
12 | ############
13 | *.7z
14 | *.dmg
15 | *.gz
16 | *.iso
17 | *.jar
18 | *.rar
19 | *.tar
20 | *.zip
21 |  
22 | # Logs and databases #
23 | ######################
24 | *.log
25 | *.sql
26 | *.sqlite
27 | *.hdf5
28 |  
29 | # OS generated files #
30 | ######################
31 | *.DS_Store
32 | *.DS_Store?
33 | *._*
34 | *.Spotlight-V100
35 | *.Trashes
36 | *ehthumbs.db
37 | *Thumbs.db
38 | 
39 | # Data files #
40 | ##############
41 | *.csv
42 | *.jld
43 | *.mat
44 | *.p
45 | 
46 | # Images #
47 | ##########
48 | *.jpg
49 | *.jpeg
50 | *.bitmap
51 | 
52 | # Documents #
53 | #############
54 | *.eps
55 | *.pdf
56 | 
57 | # Misc #
58 | ########
59 | *.swp
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://github.com/sisl/Chimp/blob/master/logos/monkey_text.png" width="300">
 2 | 
 3 | <br>
 4 | 
 5 | Chimp is a general purpose framework for deep reinforcement learning developed at the [Stanford Intelligent Systems Laboratory](http://sisl.stanford.edu/).
 6 | Chimp is based on a simple four-part architecture to allow plug-and-play like capabilities for deep reinforcement
 7 | learning experiments. 
 8 | This package was inspired by the Google DeepMind [paper](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) (V. Mnih, et al). 
 9 | Many of the architectural ideas were taken from DeepMind's
10 | [GORILA](http://arxiv.org/abs/1507.04296) framework and from the
11 | [paper](http://arxiv.org/abs/1508.04186) on distributed Deep Q-Learning by Ong, et al.
12 | 
13 | # Installation
14 | 
15 | First clone Chimp:
16 | ```
17 | git clone https://github.com/sisl/Chimp
18 | ```
19 | Then add the source directory to your `PYTHONPATH`. 
20 | 
21 | ```
22 | cd Chimp
23 | export PYTHONPATH=$(pwd):$PYTHONPATH
24 | ```
25 | 
26 | You will also need numpy and scipy installed, as well as a deep learning backend. Currently only [Chainer](https://github.com/pfnet/chainer) is supported (TensorFlow coming soon). 
27 | 
28 | Once you have the dependencies installed you should be able to run the framework using a CPU. To use the GPU, you will need CUDA and a supported graphcis card. 
29 | 
30 | # Getting Started
31 | 
32 | If you are interested in using it for your own reinforcement learning problems check out the [mountain car tutorial](https://github.com/sisl/Chimp/blob/master/examples/mountain_car.ipynb) to get an idea of how to write your own simulator class. If you would like to use Chimp with the Atari Learning Environemnt check out the [Atari tutorial](https://github.com/sisl/Chimp/blob/master/examples/atari_tutorial.ipynb) to get started.
33 | 
34 | # Architecture 
35 | 
36 | Chimp consists of four main modules: Agent, Learner, Simulator, and Memory. Such decomposition leads to a very powerful and flexible framework for reinforcement learning experiments, where one can quickly switch between simulators, replay memory implementations, and various deep learning backends.
37 | 
38 | Chimp is also powerful in its flexible handling of inputs to the deep neural network. 
39 | The user can specify the history lengths for observations, actions, and even rewards that they want to use as inputs to the model and Chimp will handle the rest. 
40 | 
41 | The specification of the input size is in the form of a tuple ```(s_size, a_size, r_size)```. For the DeepMind Atari experiments, this setting would look like (4,0,0): they use four image frames per input and no action or reward history. 
42 | 
43 | # Components
44 | 
45 | * Memory (implements experience replay)
46 | 	* Currently, we support in-memory numpy arrays and HDF5 allocated storage
47 | 
48 | * Learner ("brain" of the algorithm that does forward and backward passes in a neural net)
49 | 	* We support DQN with arbitrary observation/action history lengths as input
50 | 	* Planning to add LSTM + actor-critic framework
51 | 
52 | * Simulator (environment for the agent to interact with)
53 | 	* Single-player Arcade Learning Environment
54 | 	* MDPs
55 | 
56 | * Agent (general framework that handles all interactions between a learner, a memory, and a simulator)
57 | 
58 | # Dependencies
59 | 
60 | Chimp relies on existing deep learning back-ends. Currently only [Chainer](http://chainer.org/) is supported.
61 | 
62 | Required Python packages:
63 | * [Chainer](https://github.com/pfnet/chainer)
64 | * NumPy
65 | * SciPy
66 | 
67 | Recommended libraries (some functionality will be absent without them):
68 | * Pygame
69 | * CUDA
70 | * Arcade Learning Environment
71 | 
72 | # Authors
73 | 
74 | The original authors of this software are: Yegor Tkachenko, Max Egorov, Hao Yi Ong.
75 | 
76 | # License
77 | 
78 | The software is distributed under the Apache License 2.0
79 | 


--------------------------------------------------------------------------------
/chimp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/chimp/__init__.py


--------------------------------------------------------------------------------
/chimp/agents/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Agent '''
2 | 
3 | from dqn_agent import DQNAgent


--------------------------------------------------------------------------------
/chimp/agents/agent_test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a place holder for real unit testing.
  3 | Right now we just overfit a simple control problem:
  4 |     - the agent tries to get to the top right corner (1,1) of a 2D map
  5 |     - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
  6 |     - action 1 is optimal for all states
  7 | """
  8 | 
  9 | from chimp.learners.chainer_learner import ChainerLearner
 10 | from chimp.learners.dqn_learner import DQNLearner
 11 | from chimp.learners.dqn_learner import DQNPolicy
 12 | 
 13 | from chimp.agents.dqn_agent import DQNAgent
 14 | 
 15 | from chimp.simulators.mdp.mountain_car import MountainCar
 16 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
 17 | 
 18 | from chimp.memories.replay_memory import ReplayMemoryHDF5
 19 | 
 20 | from chimp.utils.policies import *
 21 | 
 22 | import numpy as np
 23 | 
 24 | import chainer
 25 | import chainer.functions as F
 26 | import chainer.links as L
 27 | from chainer import Chain
 28 | 
 29 | settings = {
 30 | 
 31 |     # agent settings
 32 |     'batch_size' : 32,
 33 |     'print_every' : 1000,
 34 |     'save_dir' : 'results',
 35 |     'iterations' : 3000,
 36 |     'eval_iterations' : 200,
 37 |     'eval_every' : 1000,
 38 |     'save_every' : 1000,
 39 |     'initial_exploration' : 10000,
 40 |     'epsilon_decay' : 0.000005, # subtract from epsilon every step
 41 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 42 |     'epsilon' : 1.0,  # Initial exploratoin rate
 43 |     'learn_freq' : 1,
 44 |     'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 45 |     'model_dims' : (1,2),
 46 | 
 47 |     # simulator settings
 48 |     'viz' : False,
 49 | 
 50 |     # replay memory settings
 51 |     'memory_size' : 20000,  # size of replay memory
 52 |     'n_frames' : 1,  # number of frames
 53 | 
 54 |     # learner settings
 55 |     'learning_rate' : 0.0001,
 56 |     'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
 57 |     'discount' : 0.95, # discount rate for RL
 58 |     'clip_err' : False, # value to clip loss gradients to
 59 |     'clip_reward' : False, # value to clip reward values to
 60 |     'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
 61 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 62 |     'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 63 |     'gpu' : False,
 64 |     'reward_rescale': False,
 65 | 
 66 |     # general
 67 |     'seed_general' : 1723,
 68 |     'seed_simulator' : 5632,
 69 |     'seed_agent' : 9826,
 70 |     'seed_memory' : 7563
 71 | 
 72 |     }
 73 | 
 74 | 
 75 | mdp = MountainCar()
 76 | simulator = MDPSimulator(mdp)
 77 | 
 78 | 
 79 | class TestNet(Chain):
 80 | 
 81 |     def __init__(self):
 82 |         super(TestNet, self).__init__(
 83 |             l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
 84 |             l2=F.Linear(20, 10, bias=0.0),
 85 |             bn1=L.BatchNormalization(10),
 86 |             l3=F.Linear(10, 10),
 87 |             l4=F.Linear(10, 10),
 88 |             bn2=L.BatchNormalization(10),
 89 |             lout=F.Linear(10, simulator.n_actions)
 90 |         )
 91 |         self.train = True
 92 |         # initialize avg_var to prevent divide by zero
 93 |         self.bn1.avg_var.fill(0.1),
 94 |         self.bn2.avg_var.fill(0.1),
 95 | 
 96 |     def __call__(self, ohist, ahist):
 97 |         h = F.relu(self.l1(ohist))
 98 |         h = F.relu(self.l2(h))
 99 |         h = self.bn1(h, test=not self.train)
100 |         h = F.relu(self.l3(h))
101 |         h = F.relu(self.l4(h))
102 |         h = self.bn2(h, test=not self.train)
103 |         output = self.lout(h)
104 |         return output
105 | 
106 | 
107 | 
108 | net = TestNet()
109 | custom_learner = ChainerLearner(settings)
110 | custom_learner.set_net(net)
111 | learner = DQNLearner(settings, custom_learner)
112 | 
113 | memory = ReplayMemoryHDF5(settings)
114 | 
115 | agent = DQNAgent(learner, memory, simulator, settings)
116 | 
117 | agent.train(verbose=True)
118 | 


--------------------------------------------------------------------------------
/chimp/agents/dqn_agent.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from copy import deepcopy
  4 | import pickle
  5 | from timeit import default_timer as timer
  6 | 
  7 | from chimp.utils.policies import RandomPolicy
  8 | from chimp.utils.policies import DQNPolicy
  9 | 
 10 | class DQNAgent(object):
 11 | 
 12 |     def __init__(self, learner, memory, simulator, settings, dqn_policy=None, rollout_policy=None):
 13 | 
 14 |         """
 15 |         The learning agent is responsible for communicating and moving
 16 |         data between the three modules: Learner, Simulator, Memory
 17 |         Inputs:
 18 |         - learner: containes the neural network and the optimizer to train it
 19 |         - memory: expereince replay memory that can be minibatch sampled
 20 |         - simulator: simulates the environemnt
 21 |         - settings: hyper parameters for training
 22 |         - rollout_policy: rollout policy, random by default
 23 |         """
 24 | 
 25 |         self.learner = learner
 26 |         self.memory = memory
 27 |         self.simulator = simulator # for populating the experience replay
 28 |         self.evaluator = deepcopy(simulator) # for evaluation
 29 | 
 30 |         self.dqn_policy = dqn_policy
 31 |         if dqn_policy is None:
 32 |             self.dqn_policy = DQNPolicy(learner)
 33 | 
 34 |         self.rollout_policy = rollout_policy
 35 |         if rollout_policy is None:
 36 |             self.rollout_policy = RandomPolicy(simulator.n_actions)
 37 | 
 38 |         self.set_params(settings)
 39 | 
 40 |         self.n_epochs = self.iterations / float(memory.memory_size)
 41 |         self.iteration = []
 42 |         self.loss = []
 43 |         self.q_ave = []
 44 |         self.eval_iteration = []
 45 |         self.r_eval = []
 46 |         self.r_per_episode_eval = [] 
 47 | 
 48 |     def policy(self, obs, epsilon):
 49 |         """
 50 |         e-greedy policy with customazible rollout
 51 |         """
 52 |         if self.random_state.rand() < epsilon:
 53 |             return self.rollout_policy.action(obs) 
 54 |         else:
 55 |             return self.dqn_policy.action(obs)
 56 | 
 57 | 
 58 |     def save(self,obj,name):
 59 |         ''' function to save a file as pickle '''
 60 |         # TODO: don't you need to close the I/O stream?
 61 |         pickle.dump(obj, open(name, "wb"))
 62 | 
 63 |     def load(self,name):
 64 |         ''' function to load a pickle file '''
 65 |         return pickle.load(open(name, "rb"))
 66 | 
 67 | 
 68 |     def train(self, verbose=True):
 69 |         """
 70 |         Trains the network
 71 |         """
 72 |         learner = self.learner
 73 |         memory = self.memory
 74 |         simulator = self.simulator
 75 | 
 76 |         if self.viz:
 77 |             simulator.init_viz_display()
 78 | 
 79 |         # run initial exploration and populate the experience replay
 80 |         self.populate_memory(self.initial_exploration) 
 81 | 
 82 |         # add initial observation to observatin history
 83 |         iobs = simulator.get_screenshot().copy()
 84 |         self.initial_obs(iobs)
 85 | 
 86 |         iteration = 0 # keeps track of all training iterations, ignores evaluation
 87 |         run_time = 0.0
 88 |         start_time = timer() # mark the global beginning of training
 89 |         last_print = timer()
 90 | 
 91 |         while iteration < self.iterations: # for the set number of iterations
 92 | 
 93 |             # perform a single simulator step
 94 |             self.step()
 95 |             # minibatch update for DQN
 96 |             if iteration % self.learn_freq == 0:
 97 |                 loss, qvals = self.batch_update()
 98 |                 self.iteration.append(iteration)
 99 |                 self.loss.append(loss)
100 |                 self.q_ave.append(np.mean(qvals))
101 | 
102 |             if iteration % self.print_every == 0 and verbose:
103 |                 print "Iteration: %d, Loss: %.3f, Average Q-Values: %.2f, Time since print: %.2f, Total runtime: %.2f, epsilon: %.2f" % (iteration, loss, np.mean(qvals), timer() - last_print, timer() - start_time, self.epsilon)
104 |                 last_print = timer()
105 |             
106 |             if iteration % self.save_every == 0:
107 |                 # saving the net, the training history, and the learner itself
108 |                 learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration)))
109 |                 np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T)
110 | 
111 |             if iteration % self.eval_every == 0: # evaluation
112 |                 sim_r, sim_r_per_episode, sim_time = self.simulate(self.eval_iterations, self.eval_epsilon)
113 |                 self.eval_iteration.append(iteration)
114 |                 self.r_eval.append(sim_r)
115 |                 self.r_per_episode_eval.append(sim_r_per_episode)
116 | 
117 |                 if verbose:
118 |                     print "Evaluation, total reward: %.2f, Reward per episode: %.2f" % (sim_r, sim_r_per_episode)
119 | 
120 |                 np.savetxt('%s/evaluation_history.csv' % self.save_dir, np.asarray([self.eval_iteration, self.r_eval, self.r_per_episode_eval]).T)
121 | 
122 |             if iteration % self.target_net_update == 0:
123 |                 learner.copy_net_to_target_net()
124 | 
125 |             self.epsilon -= self.epsilon_decay
126 |             self.epsilon = 0.1 if self.epsilon < 0.1 else self.epsilon
127 | 
128 |             iteration += 1
129 | 
130 |         memory.close()
131 | 
132 |         learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration)))
133 |         np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T)
134 | 
135 |         run_time = timer() - start_time
136 |         print('Overall training + evaluation time: '+ str(run_time))
137 | 
138 | 
139 | 
140 |     def step(self):
141 |         """
142 |         Performs a single step with the DQN and updates the replay memory
143 |         """
144 |         loss = 0.0
145 | 
146 |         simulator = self.simulator
147 | 
148 |         obs = simulator.get_screenshot().copy()
149 |         a = self.policy((self.ohist, self.ahist), self.epsilon)
150 |         simulator.act(a)
151 |         r = simulator.reward()
152 | 
153 |         term = False
154 |         obsp = None
155 |         if simulator.episode_over():
156 |             term = True
157 |             obsp = obs.copy()
158 |             simulator.reset_episode()
159 |             iobs = simulator.get_screenshot().copy()
160 |             self.empty_history()
161 |             self.initial_obs(iobs)
162 |         else:
163 |             obsp = simulator.get_screenshot().copy()
164 |             self.update_history(obsp, a)
165 | 
166 |         if self.viz: # move the image to the screen / shut down the game if display is closed
167 |             simulator.refresh_viz_display()
168 | 
169 |         self.memory.store_tuple(obs, a, r, obsp, term)
170 | 
171 | 
172 |     def batch_update(self):
173 |         """
174 |         Performs a mini-batch update on the DQN
175 |         """
176 |         ohist, ahist, rhist, ophist, term = self.memory.minibatch()
177 |         # take the last as our action and reward
178 |         a = ahist[:,-1] 
179 |         r = rhist[:,-1]
180 |         t = term[:,-1]
181 |         oahist = None
182 |         # TODO: this indexing is a hack to deal with single sample history
183 |         # Using the first history entry of the minibatch (there is only one) - could do this with reshape as well
184 |         if self.ahist_size == 0 or self.ohist_size == 1:
185 |             oahist = (ohist[:,0], None)
186 |             oaphist = (ophist[:,0], None)
187 |         else:
188 |             oahist = (ohist, ahist[:self.ahist_size])
189 |             oaphist = (ophist, ahist[1:self.ahist_size])
190 |         loss, qvals = self.learner.update(oahist, a, r, oaphist, t)
191 |         return loss, qvals
192 | 
193 | 
194 |     #################################################################
195 |     ################### Some Utility Functions ######################
196 |     #################################################################
197 | 
198 |     def simulate(self, nsteps, epsilon, viz=False):
199 |         """
200 |         Simulates the DQN policy
201 |         """
202 |         simulator = self.evaluator # use a different simulator to prevent breaks 
203 |         simulator.reset_episode()
204 |         # add initial observation to observation history
205 |         iobs = simulator.get_screenshot().copy()
206 |         self.initial_eval_obs(iobs)
207 | 
208 |         if self.viz:
209 |             simulator.init_viz_display()
210 | 
211 |         rtot = 0.0
212 |         r_per_episode = 0.0
213 |         episode_count = 0
214 |         start_sim = timer()
215 |         for i in xrange(nsteps):
216 |             # generate reward and step the simulator
217 |             ohist, ahist = self.eval_ohist, self.eval_ahist
218 |             a = self.policy((ohist, ahist), epsilon)
219 | 
220 |             simulator.act(a)
221 |             r = simulator.reward()
222 |             rtot += r
223 |             if simulator.episode_over():
224 |                 simulator.reset_episode()
225 |                 iobs = simulator.get_screenshot().copy()
226 |                 self.empty_eval_history()
227 |                 self.initial_eval_obs(iobs)
228 |                 episode_count += 1
229 |                 r_per_episode = rtot
230 |             else:
231 |                 obsp = simulator.get_screenshot().copy()
232 |                 self.update_eval_history(obsp, a)
233 |  
234 |             if self.viz: # move the image to the screen / shut down the game if display is closed
235 |                 simulator.refresh_viz_display()
236 | 
237 |         if episode_count > 0:
238 |             r_per_episode /= episode_count
239 |         else:
240 |             r_per_episode = rtot
241 |         runtime = timer() - start_sim
242 |         return rtot, r_per_episode, runtime
243 | 
244 | 
245 |     def populate_memory(self, nsamples):
246 |         # TODO: do we need to copy obs and obsp?
247 |         memory = self.memory
248 |         simulator = self.simulator
249 | 
250 |         simulator.reset_episode()
251 |         for i in xrange(nsamples):
252 |             # generate o, a, r, o' tuples
253 |             obs = simulator.get_screenshot().copy() 
254 |             a = self.rollout_policy.action(obs)
255 |             simulator.act(a)
256 |             r = simulator.reward()
257 |             obsp = simulator.get_screenshot().copy() 
258 |             term = False
259 |             if simulator.episode_over():
260 |                 term = True
261 |                 simulator.reset_episode() # reset
262 |             # store the tuples
263 |             memory.store_tuple(obs, a, r, obsp, term)
264 |         simulator.reset_episode()
265 | 
266 | 
267 |     def plot_loss(self):
268 |         try:
269 |             from matplotlib import pyplot
270 |         except ImportError:
271 |             "Can not plot loss, matplotlib required"
272 |         pyplot.plot(self.loss[1:])
273 |         pyplot.xlabel("Iteration")
274 |         pyplot.ylabel("Loss")
275 |         pyplot.show()
276 | 
277 |     def plot_per_sim_reward(self):
278 |         try:
279 |             from matplotlib import pyplot
280 |         except ImportError:
281 |             "Can not plot reward, matplotlib required"
282 |         pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_eval)
283 |         pyplot.xlabel("Iteration")
284 |         pyplot.ylabel("Reward")
285 |         pyplot.title("Total Reward Per Evaluation")
286 |         pyplot.show()
287 | 
288 |     def plot_per_episode_reward(self):
289 |         try:
290 |             from matplotlib import pyplot
291 |         except ImportError:
292 |             "Can not plot loss, matplotlib required"
293 |         pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_per_episode_eval)
294 |         pyplot.xlabel("Reward")
295 |         pyplot.ylabel("Loss")
296 |         pyplot.title("Average Reward Per Episode")
297 |         pyplot.show()
298 | 
299 | 
300 | 
301 |     def set_params(self, settings):
302 |         # set up the setting parameters
303 |         self.random_state = np.random.RandomState(settings.get('seed_agent', None)) # change to a new random seed
304 | 
305 |         self.batch_size = settings.get('batch_size', 32) 
306 |         self.n_frames = settings.get('n_frames', 1)
307 |         self.iterations = settings.get('iterations', 1000000)
308 | 
309 |         self.epsilon = settings.get('epsilon', 1.0) # exploration
310 |         self.epsilon_decay = settings.get('epsilon_decay', 0.00001) # decay in 
311 |         self.eval_epsilon = settings.get('eval_epsilon', 0.0) # exploration during evaluation
312 |         self.initial_exploration = settings.get('initial_exploration', 10000) # of iterations during initial exploration
313 | 
314 |         self.viz = settings.get('viz', False) # whether to visualize the state/observation, False when not supported by simulator
315 | 
316 |         self.eval_iterations = settings.get('eval_iterations', 500)
317 |         self.eval_every = settings.get('eval_every', 5000)
318 |         self.print_every = settings.get('print_every', 5000)
319 |         self.save_every = settings.get('save_every', 5000)
320 |         self.save_dir = settings.get('save_dir', '.')
321 |         # create the directory if it doesnt exist
322 |         if not os.path.isdir(self.save_dir):
323 |             os.makedirs(self.save_dir)
324 |         
325 |         self.learn_freq = settings.get('learn_freq', 1) # how frequently to do back prop on a minibatch
326 |         self.target_net_update = settings.get('target_net_update', 5000)
327 | 
328 |         self.ohist_size, self.ahist_size, self.rhist_size = settings.get('history_sizes', (1,0,0))
329 |         self.ahist_size = 1 if self.ahist_size == 0 else self.ahist_size
330 |         self.ohist_size = 1 if self.ohist_size == 0 else self.ohist_size
331 | 
332 |         self.ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
333 |         self.ahist = np.zeros(self.ahist_size, dtype=np.int32)
334 |         self.rev_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
335 |         self.rev_ahist = np.zeros(self.ahist_size, dtype=np.int32)
336 | 
337 |         self.eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
338 |         self.eval_ahist = np.zeros(self.ahist_size, dtype=np.int32)
339 |         self.rev_eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32)
340 |         self.rev_eval_ahist = np.zeros(self.ahist_size, dtype=np.int32)
341 | 
342 |     #################################################################
343 |     ################# History utility functions #####################
344 |     #################################################################
345 |     """
346 |     These are messy, and could be optimized
347 |     """
348 | 
349 |     def update_history(self, obs, a):
350 |         # roll the histories forward and replace the first entry
351 |         # keep a reversed history so we can easily roll though it
352 |         self.rev_ohist = np.roll(self.rev_ohist, 1, axis=0)
353 |         self.rev_ahist = np.roll(self.rev_ahist, 1, axis=0)
354 |         self.rev_ahist[0] = a
355 |         self.rev_ohist[0] = obs
356 | 
357 |         # reverse to get history in [s0, s1, s2,...,sn] format
358 |         self.ohist = np.flipud(self.rev_ohist)
359 |         self.ahist = np.flipud(self.rev_ahist)
360 | 
361 | 
362 |     def update_eval_history(self, obs, a):
363 |         # roll the histories forward and replace the first entry
364 |         self.rev_eval_ohist = np.roll(self.rev_eval_ohist, 1, axis=0)
365 |         self.rev_eval_ahist = np.roll(self.rev_eval_ahist, 1, axis=0)
366 |         self.rev_eval_ahist[0] = a
367 |         self.rev_eval_ohist[0] = obs
368 | 
369 |         self.eval_ohist = np.flipud(self.rev_eval_ohist)
370 |         self.eval_ahist = np.flipud(self.rev_eval_ahist)
371 | 
372 |     def initial_obs(self, obs):
373 |         self.rev_ohist[0] = obs
374 |         self.ohist[-1] = obs
375 | 
376 |     def initial_eval_obs(self, obs):
377 |         self.rev_eval_ohist[0] = obs
378 |         self.eval_ohist[-1] = obs
379 | 
380 | 
381 |     def empty_history(self):
382 |         self.ohist.fill(self.memory._emptyfloat)
383 |         self.ahist.fill(self.memory._emptyint)
384 |         self.rev_ohist.fill(self.memory._emptyfloat)
385 |         self.rev_ahist.fill(self.memory._emptyint)
386 | 
387 |     def empty_eval_history(self):
388 |         self.eval_ohist.fill(self.memory._emptyfloat)
389 |         self.eval_ahist.fill(self.memory._emptyint)
390 |         self.rev_eval_ohist.fill(self.memory._emptyfloat)
391 |         self.rev_eval_ahist.fill(self.memory._emptyint)
392 | 
393 | 


--------------------------------------------------------------------------------
/chimp/learners/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Learner '''
2 | 


--------------------------------------------------------------------------------
/chimp/learners/chainer_backend.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | (Double) Deep Q-Learning Algorithm Implementation
  3 | Supports double deep Q-learning with on either GPU and CPU
  4 | 
  5 | '''
  6 | 
  7 | import numpy as np
  8 | import chainer
  9 | import chainer.functions as F
 10 | from chainer import optimizers
 11 | from chainer import cuda
 12 | from copy import deepcopy
 13 | 
 14 | import pickle # used to save the nets
 15 | 
 16 | class ChainerBackend(object):
 17 | 
 18 |     def __init__(self, settings, net = None):
 19 | 
 20 |         self.set_params(settings)
 21 | 
 22 |         self.source_net = None
 23 |         self.target_net = None
 24 |         if net is not None:
 25 |             self.set_net(net)
 26 | 
 27 | 
 28 |     def update(self, obs, a, r, obsp, term):
 29 |         """
 30 |         Performs a single mini-batch update
 31 |         """
 32 | 
 33 |         self.source_net.zerograds()  # reset gradient storage to zero
 34 | 
 35 |         # compute loss and qval output layer
 36 |         loss, qvals = self.forward_loss(obs, a, r, obsp, term)
 37 | 
 38 |         qvals.backward() # propagate the loss gradient through the net
 39 |         self.optimizer.update() # carry out parameter updates based on the distributed gradients
 40 |         if self.gpu:
 41 |             return loss, qvals.data.get()
 42 |         else:
 43 |             return loss, qvals.data
 44 | 
 45 | 
 46 |     def forward_loss(self, obs, a, r, obsp, term):
 47 |         """
 48 |         Computes the loss and gradients
 49 |         """
 50 |         if self.gpu:
 51 |             return self.forward_loss_gpu(obs, a, r, obsp, term)
 52 |         else:
 53 |             return self.forward_loss_cpu(obs, a, r, obsp, term)
 54 | 
 55 | 
 56 |     def forward_loss_gpu(self, obs, a, r, obsp, term):
 57 |         # unpack
 58 |         ohist, ahist = obs
 59 |         ophist, aphist = obsp
 60 | 
 61 |         # move to GPU
 62 |         ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist)
 63 |         ophist, aphist = self.to_gpu(ophist), self.to_gpu(aphist)
 64 | 
 65 |         # transfer inputs into Chainer format
 66 |         ohist, ophist = chainer.Variable(ohist), chainer.Variable(ophist, volatile = True)
 67 |         ahist, aphist = chainer.Variable(ahist), chainer.Variable(aphist, volatile = True)
 68 | 
 69 |         # get target Q
 70 |         target_q_all = self.target_net(ophist, aphist) # forward prop
 71 |         target_q_max = np.max(target_q_all.data.get(), axis=1) # max Q for each entry in mini-batch
 72 | 
 73 |         # compute the target values for each entry in mini-batch 
 74 |         target_q_vals = r + self.discount * target_q_max * np.invert(term)
 75 | 
 76 |         # compute the source q-vals
 77 |         source_q_all = self.source_net(ohist, ahist) # forward prop
 78 |         source_q_vals = source_q_all.data.get()[np.arange(source_q_all.data.shape[0]), a]
 79 | 
 80 |         # compute the loss grads
 81 |         qdiff = source_q_vals - target_q_vals 
 82 | 
 83 |         # distribute the loss gradient into the shape of the net's output
 84 |         dQ = np.zeros(source_q_all.data.shape, dtype=np.float32) 
 85 |         dQ[np.arange(dQ.shape[0]), a] = qdiff
 86 | 
 87 |         # set as the output grad layer
 88 |         source_q_all.grad = self.to_gpu(dQ)
 89 | 
 90 |         # compute loss
 91 |         loss = np.mean(dQ**2)
 92 | 
 93 |         return loss, source_q_all
 94 | 
 95 | 
 96 |     def forward_loss_cpu(self, obs, a, r, obsp, term):
 97 |         # unpack
 98 |         ohist, ahist = obs
 99 |         ophist, aphist = obsp
100 | 
101 |         # transfer inputs into Chainer format
102 |         ohist, ophist = self.chainer_var(ohist), self.chainer_var(ophist, volatile = True)
103 |         ahist, aphist = self.chainer_var(ahist), self.chainer_var(aphist, volatile = True)
104 | 
105 |         # get target Q
106 |         target_q_all = self.target_net(ophist, aphist)
107 |         target_q_max = np.max(target_q_all.data, axis=1)
108 | 
109 |         # compute the target values for each entry in mini-batch 
110 |         target_q_vals = r + self.discount * target_q_max * np.invert(term)
111 | 
112 |         # compute the source q-vals
113 |         source_q_all = self.source_net(ohist, ahist) # forward prop
114 |         source_q_vals = source_q_all.data[np.arange(source_q_all.data.shape[0]),a]
115 | 
116 |         # compute the loss
117 |         qdiff = source_q_vals - target_q_vals 
118 | 
119 |         # distribute the loss gradient into the shape of the net's output
120 |         dQ = np.zeros(source_q_all.data.shape, dtype=np.float32) 
121 |         dQ[np.arange(dQ.shape[0]), a] = qdiff
122 | 
123 |         # set as the output grad layer
124 |         source_q_all.grad = dQ
125 | 
126 |         # compute loss
127 |         loss = np.mean(dQ**2)
128 | 
129 |         return loss, source_q_all
130 | 
131 | 
132 |     def forward(self, obs):
133 |         """
134 |         Returns the Q-values for the network input obs
135 |         """
136 |         # turn train off for bn, dropout, etc
137 |         self.source_net.train = False
138 |         if self.gpu:
139 |             return self.forward_gpu(obs)
140 |         else:
141 |             return self.forward_cpu(obs)
142 | 
143 | 
144 |     def forward_cpu(self, obs):
145 |         """
146 |         Performs forward pass on CPU, returns Q values
147 |         """
148 |         # unpack
149 |         ohist, ahist = obs
150 |         # transfer inputs into Chainer format
151 |         ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True)
152 |         # evaluate
153 |         qvals = self.source_net(ohist, ahist)
154 |         return qvals.data
155 | 
156 |     def forward_gpu(self, obs):
157 |         """
158 |         Performs forward pass on CPU, returns Q values
159 |         """
160 |         # unpack
161 |         ohist, ahist = obs
162 |         # move to gpu
163 |         ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist)
164 |         # transfer inputs into Chainer format
165 |         ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True)
166 |         # evaluate
167 |         qvals = self.source_net(ohist, ahist)
168 |         return qvals.data.get()
169 | 
170 |     #################################################################  
171 |     #################### Utility Functions ##########################
172 |     #################################################################  
173 | 
174 |     def to_gpu(self, var):
175 |         if var is None:
176 |             return None
177 |         return cuda.to_gpu(var)
178 | 
179 |     def chainer_var(self, var, volatile=False):
180 |         if var is None:
181 |             return None
182 |         return chainer.Variable(var, volatile=volatile)
183 | 
184 |     def set_net(self, net):
185 |         self.source_net = deepcopy(net)
186 |         self.target_net = deepcopy(net)
187 |         if self.gpu:
188 |             cuda.get_device(0).use()
189 |             self.source_net.to_gpu()
190 |             self.target_net.to_gpu()
191 |         self.optimizer.setup(self.source_net)
192 |         self.target_net.train = False
193 | 
194 | 
195 |     def params(self):
196 |         ''' collect net parameters (coefs and grads) '''
197 |         self.source_net.params()
198 | 
199 |     
200 |     def set_params(self, params):
201 | 
202 |         self.gpu = params.get('gpu',False)
203 |         self.learning_rate = params.get('learning_rate',0.00025)
204 |         self.decay_rate = params.get('decay_rate',0.95)
205 |         self.discount = params.get('discount',0.95)
206 |         self.clip_err = params.get('clip_err',False)
207 |         self.target_net_update = params.get('target_net_update',10000)
208 |         self.double_DQN = params.get('double_DQN',False)
209 | 
210 |         # setting up various possible gradient update algorithms
211 |         opt = params.get('optim_name', 'ADAM')
212 |         if opt == 'RMSprop':
213 |             self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate)
214 | 
215 |         elif opt == 'ADADELTA':
216 |             print("Supplied learning rate not used with ADADELTA gradient update method")
217 |             self.optimizer = optimizers.AdaDelta()
218 | 
219 |         elif opt == 'ADAM':
220 |             self.optimizer = optimizers.Adam(alpha=self.learning_rate)
221 | 
222 |         elif opt == 'SGD':
223 |             self.optimizer = optimizers.SGD(lr=self.learning_rate)
224 | 
225 |         else:
226 |             print('The requested optimizer is not supported!!!')
227 |             exit()
228 | 
229 |         if self.clip_err is not False:
230 |             self.optimizer.add_hook(chainer.optimizer.GradientClipping(self.clip_err))
231 | 
232 |         self.optim_name = params['optim_name']
233 | 


--------------------------------------------------------------------------------
/chimp/learners/chainer_test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a place holder for real unit testing.
  3 | Right now we just overfit a simple control problem:
  4 |     - the agent tries to get to the top right corner (1,1) of a 2D map
  5 |     - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
  6 |     - action 1 is optimal for all states
  7 | """
  8 | 
  9 | from chimp.learners.chainer_backend import ChainerBackend
 10 | from chimp.learners.dqn_learner import DQNLearner
 11 | from chimp.learners.dqn_learner import DQNPolicy
 12 | 
 13 | import numpy as np
 14 | 
 15 | import chainer
 16 | import chainer.functions as F
 17 | import chainer.links as L
 18 | from chainer import Chain
 19 | 
 20 | settings = {
 21 | 
 22 |     # agent settings
 23 |     'batch_size' : 32,
 24 |     'print_every' : 500,
 25 |     'save_dir' : 'results/nets_rocksample_belief_rmsprop',
 26 |     'iterations' : 100000,
 27 |     'eval_iterations' : 100,
 28 |     'eval_every' : 1000,
 29 |     'save_every' : 500,
 30 |     'initial_exploration' : 500,
 31 |     'epsilon_decay' : 0.00001, # subtract from epsilon every step
 32 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 33 |     'epsilon' : 1.0,  # Initial exploratoin rate
 34 |     'learn_freq' : 1,
 35 |     'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 36 |     'model_dims' : (1,2),
 37 | 
 38 |     # simulator settings
 39 |     'viz' : False,
 40 | 
 41 |     # replay memory settings
 42 |     'memory_size' : 1000,  # size of replay memory
 43 |     'n_frames' : 1,  # number of frames
 44 | 
 45 |     # learner settings
 46 |     'learning_rate' : 0.00025, 
 47 |     'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
 48 |     'discount' : 0.95, # discount rate for RL
 49 |     'clip_err' : False, # value to clip loss gradients to
 50 |     'clip_reward' : 1, # value to clip reward values to
 51 |     'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
 52 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 53 |     'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 54 |     'gpu' : False,
 55 |     'reward_rescale': False,
 56 | 
 57 |     # general
 58 |     'seed_general' : 1723,
 59 |     'seed_simulator' : 5632,
 60 |     'seed_agent' : 9826,
 61 |     'seed_memory' : 7563
 62 | 
 63 |     }
 64 | 
 65 | n_actions = 2
 66 | o_dims = settings['model_dims']
 67 | n_samples = settings['batch_size']
 68 | 
 69 | class TestNet(Chain):
 70 | 
 71 |     def __init__(self):
 72 |         super(TestNet, self).__init__(
 73 |             #l1=F.Bilinear(settings["history_sizes"][0], settings["history_sizes"][1], 20),
 74 |             l1=F.Linear(o_dims[1], 20, bias=0.0),
 75 |             l2=F.Linear(20, 10, bias=0.0),
 76 |             bn1=L.BatchNormalization(10),
 77 |             lout=F.Linear(10, n_actions)
 78 |         )
 79 |         self.train = True
 80 |         # initialize avg_var to prevent divide by zero
 81 |         self.bn1.avg_var.fill(0.1),
 82 | 
 83 |     def __call__(self, ohist, ahist):
 84 |         h = F.relu(self.l1(ohist))
 85 |         h = F.relu(self.l2(h))
 86 |         h = self.bn1(h, test=not self.train)
 87 |         output = self.lout(h)
 88 |         return output
 89 | 
 90 | def make_batch(n_samples, o_dims, n_actions):
 91 |     obs = np.zeros((n_samples,)+o_dims, dtype=np.float32)
 92 |     obsp = np.zeros((n_samples,)+o_dims, dtype=np.float32)
 93 |     a = np.zeros(n_samples, dtype=np.int32)
 94 |     r = np.zeros(n_samples, dtype=np.float32)
 95 |     term = np.zeros(n_samples, dtype=np.bool)
 96 |     for i in xrange(n_samples):
 97 |         obs[i] = np.random.uniform(0.0, 1.0, o_dims)
 98 |         a[i] = np.random.randint(n_actions)
 99 |         obsp[i] = (obs[i] + 0.25) if a[i] == 1 else (obs[i] - 0.25)
100 |         obsp[i] = np.clip(obsp[i], 0.0, 1.0)
101 |         r[i] = np.sum(obs[i])
102 |     return obs, a, r, obsp, term
103 | 
104 | 
105 | net = TestNet()
106 | custom_learner = ChainerBackend(settings)
107 | custom_learner.set_net(net)
108 | 
109 | learner = DQNLearner(settings, custom_learner)
110 | 
111 | policy = DQNPolicy(learner)
112 | 
113 | obst, a, r, obsp, term = make_batch(10, o_dims, n_actions)
114 | 
115 | for i in xrange(10):
116 |     ohist = (obst[i], None)
117 |     a = policy.action(ohist)
118 |     print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None))
119 | 
120 | print "TRAINING"
121 | for i in xrange(3000):
122 |     obs, a, r, obsp, term = make_batch(n_samples, o_dims, n_actions)
123 |     ohist = (obs, None)
124 |     ophist = (obsp, None)
125 |     #loss, q_all = custom_learner.forward_loss(ohist, a, r, ophist, term)
126 |     loss, q_all = learner.update(ohist, a, r, ophist, term)
127 |     if i % 500 == 0:
128 |         print loss
129 |     
130 | 
131 | for i in xrange(10):
132 |     ohist = (obst[i], None)
133 |     a = policy.action(ohist)
134 |     print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None))
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/chimp/learners/dqn_learner.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | (Double) Deep Q-Learning Algorithm Implementation
 3 | Supports double deep Q-learning with on either GPU and CPU
 4 | 
 5 | '''
 6 | 
 7 | import numpy as np
 8 | import pickle # used to save the nets
 9 | from copy import deepcopy
10 | 
11 | class DQNLearner(object):
12 | 
13 |     def __init__(self, settings, backend):
14 | 
15 |         """
16 |         Functions that must be defined by the custom learner:
17 |         - forward_loss(obs, a, r, obsp, term) # computes scores and loss
18 |         - forward(obs) # computes scores
19 |         - update(obs, a, r, obsp) # update the params
20 |         - get_net() # returns the network object
21 |         - set_net(net) # sets the source and target nets and moves to gpu (if needed)
22 |         Fields owned by the learner:
23 |         - source_net: generates source Q-vals
24 |         - target_net: generates target Q-vals
25 |         """
26 | 
27 |         self.backend = backend
28 | 
29 |         self.clip_reward = settings.get('clip_reward', False)
30 |         self.reward_rescale = settings.get('reward_rescale', False)
31 |         self.r_max = 1 # keep the default value at 1
32 | 
33 | 
34 |     def update(self, obs, a, r, obsp, term):
35 |         r = self.pre_process_reward(r)
36 |         return self.backend.update(obs, a, r, obsp, term)
37 | 
38 |     def forward_loss(self, obs, a, r, obsp, term):
39 |         return self.backend.forward_loss(obs, a, r, obsp, term)
40 | 
41 |     def forward(self, obs):
42 |         return self.backend.forward(obs)
43 | 
44 |     def copy_net_to_target_net(self):
45 |         ''' update target net with the current net '''
46 |         self.backend.target_net = deepcopy(self.backend.source_net)
47 | 
48 |     def save(self,obj,name):
49 |         pickle.dump(obj, open(name, "wb"))
50 | 
51 |     def load(self,name):
52 |         return pickle.load(open(name, "rb"))
53 | 
54 |     def save_net(self,name):
55 |         ''' save a net to a path '''
56 |         self.save(self.backend.source_net,name)
57 | 
58 |     def load_net(self,net):
59 |         ''' load in a net from path or a variable'''
60 |         if isinstance(net, str): # if it is a string, load the net from the path
61 |             net = self.load(net)
62 |         self.backend.set_net(net)
63 | 
64 | 
65 |     def save_training_history(self, path='.'):
66 |         ''' save training history '''
67 |         train_hist = np.array([range(len(self.train_rewards)),self.train_losses,self.train_rewards, self.train_qval_avgs, self.train_episodes, self.train_times]).T
68 |         eval_hist = np.array([range(len(self.val_rewards)),self.val_losses,self.val_rewards, self.val_qval_avgs, self.val_episodes, self.val_times]).T
69 |         # TODO: why is this here and not in agent?
70 |         np.savetxt(path + '/training_hist.csv', train_hist, delimiter=',')
71 |         np.savetxt(path + '/evaluation_hist.csv', eval_hist, delimiter=',')
72 | 
73 |     def params(self):
74 |         """
75 |         Returns an iterator over netwok parameters 
76 |         Note: different back-ends will return different param containers
77 |         """
78 |         # TODO: return a dictionary here?
79 |         self.backend.params()
80 |         
81 | 
82 |     def pre_process_reward(self, r):
83 |         """
84 |         Clips and re-scales the rewards 
85 |         """
86 |         if self.clip_reward:
87 |             r = np.clip(r,-self.clip_reward,self.clip_reward)
88 |         if self.reward_rescale:
89 |             self.r_max = max(np.amax(np.absolute(r)),self.r_max) 
90 |             r = r / self.r_max
91 |         return r
92 | 
93 | 


--------------------------------------------------------------------------------
/chimp/memories/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Implements Experience Replay Memory '''
2 | 
3 | from replay_memory import ReplayMemoryHDF5
4 | from memory import ReplayMemory


--------------------------------------------------------------------------------
/chimp/memories/mem_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from replay_memory import ReplayMemoryHDF5
 3 | 
 4 | 
 5 | settings = {
 6 |         'save_dir' : 'results/test',
 7 |         'seed_memory' : 1,
 8 |         'history_sizes' : (5, 2, 0),
 9 |         'memory_size' : 1000,
10 |         'model_dims' : (1,20),
11 |         'batch_size' : 32
12 |     }
13 | 
14 | mem = ReplayMemoryHDF5(settings)
15 | 
16 | o_dims = settings['model_dims']
17 | 
18 | for i in xrange(1000):
19 |     obs = np.random.random(o_dims) + i # random obs
20 |     a = np.random.randint(10) + i# 10 actions
21 |     r = np.random.rand() + i
22 |     obsp = np.random.random(o_dims) + i
23 |     term = bool(np.random.binomial(1,0.1)) # 10% chance reach terminal state
24 |     mem.store_tuple(obs, a, r, obsp, term)
25 | 
26 | o,a,r,op,terms=mem.minibatch()
27 | #mem.close()
28 | 


--------------------------------------------------------------------------------
/chimp/memories/memory.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | An alternative replay memory that does not utilize HDF5 - less efficient
 3 | '''
 4 | 
 5 | import numpy as np
 6 | 
 7 | class ReplayMemory(object):
 8 | 
 9 |     def __init__(self, settings):
10 |         
11 |         self.random_state = np.random.RandomState(settings['seed_memory'])
12 |         self.memory_size = settings['memory_size']
13 |         self.model_dims = settings['model_dims']
14 |         self.n_frames = settings['n_frames']
15 |         self.data = [np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32),
16 |             np.zeros((self.memory_size, self.n_frames), dtype=np.float32),
17 |             np.zeros(self.memory_size, dtype=np.int32),
18 |             np.zeros(self.memory_size, dtype=np.float32),
19 |             np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32),
20 |             np.zeros((self.memory_size, self.n_frames), dtype=np.float32),
21 |             np.zeros(self.memory_size, dtype=np.bool)]
22 |         self.counter = 0
23 | 
24 |     # function to sample a mini-batch
25 |     def minibatch(self, batch_size):
26 |         # sampling a mini-batch of the given size with replacement
27 |         ind = self.random_state.randint(0,min(self.counter,self.memory_size),batch_size)
28 |         return self.data[0][ind], self.data[1][ind], self.data[2][ind], self.data[3][ind], self.data[4][ind], self.data[5][ind], self.data[6][ind]
29 | 
30 |     # function to store the observed experience and keep the count within the replay memory
31 |     def store_tuple(self, s0, ahist0, a, r, s1, ahist1, episode_end_flag = False):
32 | 
33 |         # keep the most recent observations within the limit of the memory
34 |         ind = self.counter % self.memory_size
35 | 
36 |         self.data[0][ind] = s0
37 |         self.data[1][ind] = ahist0
38 |         self.data[2][ind] = a
39 |         self.data[3][ind] = r
40 | 
41 |         if not episode_end_flag:
42 |             self.data[4][ind] = s1
43 |             self.data[5][ind] = ahist1
44 | 
45 |         self.data[6][ind] = episode_end_flag
46 |     
47 |         self.counter += 1
48 | 


--------------------------------------------------------------------------------
/chimp/memories/replay_memory.py:
--------------------------------------------------------------------------------
  1 | ''' Implements class for reading/writing experiences to the replay dataset.
  2 | 
  3 | We assume
  4 | (1) Actions and rewards for the full history fit comfortably in memory,
  5 | (2) The belief state representation for the full history does not,
  6 | (3) A single sample of belief states fits comfortably in memory.
  7 | 
  8 | For instance, if the replay dataset stores the last 1 million experiences,
  9 | then the history of actions is 1 byte x 1 M = 1 MB. The same holds for the
 10 | history of rewards. However, a modest belief state representation might be
 11 | a dense vector with a maximum of 1,000 Float64 elements (typical state spaces
 12 | are on the order of millions). In this case the full history of 1 million
 13 | states would be (1,000 elem x 8 bytes x 1 M = 8 GB).
 14 | 
 15 | N.B.!
 16 | Memory is organized as (a, r, s', end_of_game_flag). We refer to s' 
 17 | simply as "state". To sample (s, a, r, s', end_of_game_flag) 
 18 | we take s' from the current location in memory, and (a, r, s', end_of_game_flag) 
 19 | from the location one step forward.
 20 | '''
 21 | 
 22 | import numpy as np
 23 | import h5py
 24 | import os
 25 | 
 26 | class ReplayMemoryHDF5(object):
 27 |     ''' Wrapper around a replay dataset residing on disk as HDF5. '''
 28 |     
 29 |     def __init__(self, settings, filename='memory.hdf5', overwrite=True, empty=-1):
 30 | 
 31 |         if not os.path.exists(settings['save_dir']):
 32 |             os.makedirs(settings['save_dir'])
 33 | 
 34 |         filename = settings['save_dir'] + '/' + filename
 35 |         self.random_state = np.random.RandomState(settings['seed_memory'])
 36 |         self.ohist_size, self.ahist_size, self.rhist_size = settings['history_sizes']
 37 | 
 38 |         self.ahist_size = 1 if self.ahist_size is 0 else self.ahist_size
 39 |         self.rhist_size = 1 if self.rhist_size is 0 else self.rhist_size
 40 | 
 41 |         self.max_size = max(settings['history_sizes'])
 42 |         self.batch_size = settings['batch_size']
 43 | 
 44 |         if overwrite:
 45 |             self.fp = h5py.File(filename, 'w')
 46 |         else:
 47 |             self.fp = h5py.File(filename, 'a')
 48 | 
 49 |         if all(x in self.fp for x in ('observations', 'actions', 'rewards', 'next_observations', 'terminals')):
 50 |             self.observations = self.fp['observations']
 51 |             self.memory_size = self.observations.shape[0]
 52 | 
 53 |             self.actions = np.empty(self.memory_size, dtype=np.int32)
 54 |             self.fp['actions'].read_direct(self.actions)
 55 | 
 56 |             self.rewards = np.empty(self.memory_size, dtype=np.float32)
 57 |             self.fp['rewards'].read_direct(self.rewards)
 58 | 
 59 |             self.next_observations = self.fp['next_observations']
 60 | 
 61 |             self.terminals = np.empty(self.memory_size, dtype=bool)
 62 |             self.fp['terminals'].read_direct(self.terminals)
 63 | 
 64 |             if self.memory_size != settings['memory_size']:
 65 |                 print("Warning: dataset loaded from %s is of size %d, "
 66 |                     "not %d as indicated in |settings|. Using existing size."
 67 |                     % (filename, self.memory_size, settings['memory_size']))
 68 | 
 69 |         else:
 70 |             self.memory_size = settings['memory_size']
 71 |             obs_shape = settings['model_dims']
 72 | 
 73 |             self.observations = self.fp.create_dataset('observations', (self.memory_size,) + obs_shape, dtype=np.float32)
 74 |             self.next_observations = self.fp.create_dataset('next_observations', (self.memory_size,) + obs_shape, dtype=np.float32)
 75 | 
 76 |             self.fp.create_dataset('actions', (self.memory_size,), dtype='int32')
 77 |             self.fp.create_dataset('rewards', (self.memory_size,), dtype='float32')
 78 |             self.fp.create_dataset('terminals', (self.memory_size,), dtype=bool)
 79 | 
 80 |             self.actions = np.empty(self.memory_size, dtype=np.int32)
 81 |             self.rewards = np.empty(self.memory_size, dtype=np.float32)
 82 |             self.terminals = np.empty(self.memory_size, dtype=np.bool)
 83 | 
 84 |             self.observations.attrs['head'] = 0
 85 |             self.observations.attrs['valid'] = 0
 86 | 
 87 |         # index of current "write" location
 88 |         self.head = self.observations.attrs['head']
 89 | 
 90 |         # greatest index of any valid experience; i.e., [0, self.valid)
 91 |         self.valid = self.observations.attrs['valid']
 92 | 
 93 |         # initialize histories
 94 |         self.ohist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32)
 95 |         self.ophist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32)
 96 |         self.ahist = np.zeros((self.batch_size, self.ahist_size), dtype=np.int32)
 97 |         self.rhist = np.zeros((self.batch_size, self.rhist_size), dtype=np.float32)
 98 |         self.thist = np.zeros((self.batch_size, self.ohist_size), dtype=np.bool)
 99 | 
100 |         self._emptyint = np.int32(empty)
101 |         self._emptyfloat = np.float32(empty)
102 | 
103 |     def minibatch(self):
104 |         ''' Uniformly sample (o,a,r,o') experiences from the replay dataset.
105 | 
106 |         Args:
107 |             batch_size: size of mini-batch
108 | 
109 |         Returns:
110 |             Five numpy arrays that corresponds to o, a, r, o', and the terminal
111 |             state indicator.
112 |         '''
113 |         batch_size = self.batch_size
114 |         if batch_size >= self.valid:
115 |             raise ValueError("Can't draw sample of size %d from replay dataset of size %d"
116 |                        % (batch_size, self.valid))
117 | 
118 |         ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size
119 |         max_hist = self.max_size
120 | 
121 |         indices = self.get_indices(batch_size)
122 | 
123 |         self.clear_history()
124 | 
125 |         # TODO: can we get rid of this loop by sorting inidces and then reshaping? 
126 |         for i in xrange(batch_size):
127 |             # all end on the same index
128 |             endi = indices[i]
129 |             starti = endi - max_hist
130 |             # starting indecies if no terminal states
131 |             starto, starta, startr = endi-ohist_size, endi-ahist_size, endi-rhist_size
132 | 
133 |             # look backwards and find first terminal state
134 |             termarr = np.where(self.terminals[starti:endi-1]==True)[0]
135 |             termidx = starti
136 |             if termarr.size is not 0:
137 |                 termidx = endi - (endi-starti - termarr[-1]) + 1
138 | 
139 |             # if starts before terminal, change start index
140 |             starto = termidx if starto < termidx else starto
141 |             starta = termidx if starta < termidx else starta
142 |             startr = termidx if startr < termidx else startr
143 | 
144 |             ohl, ahl, rhl = (endi - starto), (endi - starta), (endi - startr)
145 |         
146 |             # load from memory
147 |             self.ohist[i, ohist_size-ohl:] = self.observations[xrange(starto, endi)]
148 |             self.ophist[i, ohist_size-ohl:] = self.next_observations[xrange(starto, endi)]
149 |             self.ahist[i, ahist_size-ahl:] = self.actions[xrange(starta, endi)]
150 |             self.rhist[i, rhist_size-rhl:] = self.rewards[xrange(startr, endi)]
151 |             self.thist[i, ohist_size-ohl:] = self.terminals[xrange(starto, endi)]
152 | 
153 |         return self.ohist, self.ahist, self.rhist, self.ophist, self.thist
154 | 
155 | 
156 |     def get_indices(self, batch_size):
157 |         ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size
158 |         max_hist = self.max_size
159 | 
160 |         # want to sample from valid history sets
161 |         start_shift = self.random_state.randint(max_hist)
162 | 
163 |         # indices corresponding to ranges from which to sample
164 |         indices = self.random_state.choice(xrange(1,self.valid/max_hist), size=batch_size, replace=False)
165 |         # shift all the indices and offset
166 |         indices *= max_hist
167 |         indices += start_shift
168 | 
169 |         return indices
170 | 
171 | 
172 |     def store_tuple(self, obs, action, reward, obsp, terminal):
173 |         ''' Stores an experience tuple into the replay dataset, i.e., a 
174 |         triple (obs, action, reward, obsp, terminal) where |obsp| is the observation
175 |         made when the agent takes |action| and recieves |reward| 
176 |         while |obs| is the observation made prior to taking |action|.
177 |         The observation |obs| is assumed to be at index (self.head).
178 | 
179 |         Args:
180 |             obs: observation made at time t of shape provided by user (obs_shape)
181 |             action: index of action chosen
182 |             reward: float value of reward recieved after taking action a
183 |                 or None if the input action ended the game
184 |             terminal: indicates if obsp is terminal
185 | 
186 |         '''
187 |         self.actions[self.head] = action
188 |         self.rewards[self.head] = reward
189 |         self.terminals[self.head] = terminal
190 |         self.observations[self.head] = obs
191 |         self.next_observations[self.head] = obsp
192 | 
193 |         # update head and valid pointers
194 |         self.head = (self.head + 1) % self.memory_size
195 |         self.valid = min(self.memory_size, self.valid + 1)
196 |         
197 |     def clear_history(self):
198 |         self.ohist.fill(self._emptyfloat)
199 |         self.ophist.fill(self._emptyfloat)
200 |         self.ahist.fill(self._emptyint)
201 |         self.rhist.fill(0.0)
202 |         self.thist.fill(False)
203 | 
204 |     def close(self):
205 |         ''' Stores the memory dataset into the file when program ends. '''
206 |         self.fp['actions'][:] = self.actions
207 |         self.fp['rewards'][:] = self.rewards
208 |         self.fp['terminals'][:] = self.terminals
209 |         self.observations.attrs['head'] = self.head
210 |         self.observations.attrs['valid'] = self.valid
211 |         self.fp.close()
212 | 
213 |     def __del__(self):
214 |         try:
215 |             self.close()
216 |         except:
217 |             pass # already closed
218 |         


--------------------------------------------------------------------------------
/chimp/pre_trained_nets/mountain_car.net:
--------------------------------------------------------------------------------
  1 | ccopy_reg
  2 | _reconstructor
  3 | p0
  4 | (c__main__
  5 | TestNet
  6 | p1
  7 | c__builtin__
  8 | object
  9 | p2
 10 | Ntp3
 11 | Rp4
 12 | (dp5
 13 | S'_persistent'
 14 | p6
 15 | (lp7
 16 | sS'name'
 17 | p8
 18 | NsS'_children'
 19 | p9
 20 | (lp10
 21 | S'bn2'
 22 | p11
 23 | aS'bn1'
 24 | p12
 25 | aS'lout'
 26 | p13
 27 | aS'l4'
 28 | p14
 29 | aS'l2'
 30 | p15
 31 | aS'l3'
 32 | p16
 33 | aS'l1'
 34 | p17
 35 | asg11
 36 | g0
 37 | (cchainer.links.normalization.batch_normalization
 38 | BatchNormalization
 39 | p18
 40 | g2
 41 | Ntp19
 42 | Rp20
 43 | (dp21
 44 | g6
 45 | (lp22
 46 | S'avg_mean'
 47 | p23
 48 | aS'avg_var'
 49 | p24
 50 | aS'N'
 51 | p25
 52 | asg23
 53 | cnumpy.core.multiarray
 54 | _reconstruct
 55 | p26
 56 | (cnumpy
 57 | ndarray
 58 | p27
 59 | (I0
 60 | tp28
 61 | S'b'
 62 | p29
 63 | tp30
 64 | Rp31
 65 | (I1
 66 | (I10
 67 | tp32
 68 | cnumpy
 69 | dtype
 70 | p33
 71 | (S'f4'
 72 | p34
 73 | I0
 74 | I1
 75 | tp35
 76 | Rp36
 77 | (I3
 78 | S'<'
 79 | p37
 80 | NNNI-1
 81 | I-1
 82 | I0
 83 | tp38
 84 | bI00
 85 | S'\xc7\xfac;\x08\x15B<\x00\x00\x00\x00\x8b-u;k\x18\xc9:\x07\x8bE<\x18\xa6\xff:\xc4\x83!=\x0b<\x89<\xe3\x08\x958'
 86 | p39
 87 | tp40
 88 | bsg8
 89 | g11
 90 | sS'decay'
 91 | p41
 92 | F0.9
 93 | sS'eps'
 94 | p42
 95 | F1e-05
 96 | sS'_cpu'
 97 | p43
 98 | I01
 99 | sS'beta'
100 | p44
101 | cchainer.variable
102 | Variable
103 | p45
104 | (g26
105 | (g27
106 | (I0
107 | tp46
108 | g29
109 | tp47
110 | Rp48
111 | (I1
112 | (I10
113 | tp49
114 | g36
115 | I00
116 | S'\n{\x1e\xbeZ\xd7\x87\xbe\x9er\x00\xbf\x0fp\xe5\xbe\xd2\xe1\xd2\xbe`\x1b\x05\xbfOte>`s\x10\xbfY\x8f\x06\xbf\x8f8\x87\xbe'
117 | p50
118 | tp51
119 | bcchainer.flag
120 | Flag
121 | p52
122 | (Ntp53
123 | Rp54
124 | g44
125 | tp55
126 | Rp56
127 | sS'_params'
128 | p57
129 | (lp58
130 | S'gamma'
131 | p59
132 | ag44
133 | asg25
134 | I0
135 | sg24
136 | g26
137 | (g27
138 | (I0
139 | tp60
140 | g29
141 | tp61
142 | Rp62
143 | (I1
144 | (I10
145 | tp63
146 | g36
147 | I00
148 | S'\x97\xa4\xb9=\xf74\xbd=vR\xb8=&\x95\xb9=\x88\x9a\xb8=\xeb\x1b\xc3=\\\xcd\xb8=\xb8\xa0\xf0=\r\x94\xc7=NS\xb8='
149 | p64
150 | tp65
151 | bsg59
152 | g45
153 | (g26
154 | (g27
155 | (I0
156 | tp66
157 | g29
158 | tp67
159 | Rp68
160 | (I1
161 | (I10
162 | tp69
163 | g36
164 | I00
165 | S'N\x80\x91?k!\xa2?\xfe1\xae?\x1d\xa7\xb9?\r\xf7\xb8? O\xae?V\x8f\xc4?C}\x94?\x84\xbe\xa8?\xb5n\x9f?'
166 | p70
167 | tp71
168 | bg54
169 | g59
170 | tp72
171 | Rp73
172 | sbsg12
173 | g0
174 | (g18
175 | g2
176 | Ntp74
177 | Rp75
178 | (dp76
179 | g6
180 | (lp77
181 | g23
182 | ag24
183 | ag25
184 | asg23
185 | g26
186 | (g27
187 | (I0
188 | tp78
189 | g29
190 | tp79
191 | Rp80
192 | (I1
193 | (I10
194 | tp81
195 | g36
196 | I00
197 | S'\x05\xb2\xd6;\x00\x00\x00\x00B\xecL;\x8b+\xc2<\x00\x00\x00\x00\xab\xbc\xf8;OJ\x1e8\x18\xec\xfa<\x7f\x1d\x87<k\x07\x8a<'
198 | p82
199 | tp83
200 | bsg8
201 | g12
202 | sg41
203 | F0.9
204 | sg42
205 | F1e-05
206 | sg43
207 | I01
208 | sg44
209 | g45
210 | (g26
211 | (g27
212 | (I0
213 | tp84
214 | g29
215 | tp85
216 | Rp86
217 | (I1
218 | (I10
219 | tp87
220 | g36
221 | I00
222 | S"\xce\xd6\xb6\xbd\xd4\xcb\xb5=H\x1c\xcc\xbd'>\x08>\x87\x17'>\xa4\xa9\xb6=s\x83o=\xda\xff\x01>\xb7\xb9\x92=\x91G\x1d\xbe"
223 | p88
224 | tp89
225 | bg54
226 | g44
227 | tp90
228 | Rp91
229 | sg57
230 | (lp92
231 | g59
232 | ag44
233 | asg25
234 | I0
235 | sg24
236 | g26
237 | (g27
238 | (I0
239 | tp93
240 | g29
241 | tp94
242 | Rp95
243 | (I1
244 | (I10
245 | tp96
246 | g36
247 | I00
248 | S'\x7f\x89\xb8=vR\xb8=p\\\xb8=\xe0\xa3\xbc=vR\xb8=\xb0\xc7\xb8=\xb3R\xb8=M\xc5\xbd=H\xfc\xb9=:\xa6\xb9='
249 | p97
250 | tp98
251 | bsg59
252 | g45
253 | (g26
254 | (g27
255 | (I0
256 | tp99
257 | g29
258 | tp100
259 | Rp101
260 | (I1
261 | (I10
262 | tp102
263 | g36
264 | I00
265 | S'\xf6T\xa1?c\xd6\x7f?0\xb5\xdc?\xe0\xa8\x8f?\x00\x00\x80?#"\xba?2\xd1\xae?*\xfa\x8d?_\xa0\x80?\t\x08\xa5?'
266 | p103
267 | tp104
268 | bg54
269 | g59
270 | tp105
271 | Rp106
272 | sbsg13
273 | g0
274 | (cchainer.links.connection.linear
275 | Linear
276 | p107
277 | g2
278 | Ntp108
279 | Rp109
280 | (dp110
281 | g6
282 | (lp111
283 | sg8
284 | g13
285 | sS'W'
286 | p112
287 | g45
288 | (g26
289 | (g27
290 | (I0
291 | tp113
292 | g29
293 | tp114
294 | Rp115
295 | (I1
296 | (I3
297 | I10
298 | tp116
299 | g36
300 | I00
301 | S'%\x0b!\xbf\x83\xc2V>V\xd6\xe3\xbe^\xb0v\xbeQv?>\x90\xf1\x19\xbfk\xcbe?\xf2\xdf\x8b\xbe\xb1h\xf7\xbe\xb0\xce\x97>\xd4\xc1Q\xbe\xf2\xa8O\xbfY\xf5\xee\xbe\xad\x08(\xbf\x98?,\xbf\xa8\x862\xbfMj%?\x19YN\xbe\x9aF\x06\xbf\x96\xb0\x08\xbf3-\xc7\xbe\xab\xe2<\xbd\xb2\x85\xa7\xbe-i\x85\xbe,\xb2\x18\xbf\x94\xd5"\xbf\xc3\xc38?\xa4\xce(\xbeW28\xbf\xd6\xea\xd2\xbe'
302 | p117
303 | tp118
304 | bg54
305 | g112
306 | tp119
307 | Rp120
308 | sg29
309 | g45
310 | (g26
311 | (g27
312 | (I0
313 | tp121
314 | g29
315 | tp122
316 | Rp123
317 | (I1
318 | (I3
319 | tp124
320 | g36
321 | I00
322 | S"Y'6=\x85\xeb\x12?H\x1c\x93>"
323 | p125
324 | tp126
325 | bg54
326 | g29
327 | tp127
328 | Rp128
329 | sg57
330 | (lp129
331 | g112
332 | ag29
333 | asg43
334 | I01
335 | sbsg43
336 | I01
337 | sg14
338 | g0
339 | (g107
340 | g2
341 | Ntp130
342 | Rp131
343 | (dp132
344 | g6
345 | (lp133
346 | sg8
347 | g14
348 | sg112
349 | g45
350 | (g26
351 | (g27
352 | (I0
353 | tp134
354 | g29
355 | tp135
356 | Rp136
357 | (I1
358 | (I10
359 | I10
360 | tp137
361 | g36
362 | I00
363 | S'\x0fq\x99>z\xb6\x08>]?\x8b\xbe\x93<\xfd\xbep\x98p\xbd\xb0\xf4\xe4\xbd\xec\n\xa6\xbe\xe2\x1f\x9c<*\x18\xb4;\x03\xa3e\xbe\xb1\x02$>\n&\xaa\xbd`\xfb{>\x8b\x02\xb4>\x10v\xb9\xbel\x15\xfb\xbd.\x88D\xbd`Vy\xbe\xee$ ?0\xc8\x8c=\xb6G\x17>\xd4\xcb\xe6\xbe\xb6\xa2\n\xbf\xed\x94\xa1\xbd\xd9\x19\x1a?\xd7\xf6\x9c\xbe\x96\xca\xd4\xbdE\x13\xda\xbd\xb8\x8d8\xbfl\x02\xab=t@\xc9\xbdT\xa4\xf6\xbcP\xd5\x83\xbd>\xf8Q\xbe\xeb~\xae\xbdk \x8f=4\x81\xcf\xber\xa0,>\xdb2\xa3>\x0bk\x08=\xdb4\x97\xbd/\x15\xab<\xcb=\xa4\xbc\x15(\x1e=\x1a\x12\xd1\xben\x90\x0c\xbeF\xd7\xf8\xbd\x15?\xca=\xbbj|\xbe\xda\xc5\xb0\xbe\xcaLv\xbe\xb37\xd9=\x07a\x0e\xbfS\x0b\x82>\xf3\xe9\x07?e\x7f2\xbfC\xd2\x08?\x0e\xb0\xac\xbe\xf0c\x97\xbf,b\x1a?4\xd8\xa5\xbe*Z\xa9=\xa6\xc5\x85>\x91\xc94\xbf\xed\xda\x14\xbe\xe8u9?7&;\xbe\xaf9e=\xcca\xc7=\xc9\x08\x1f\xbf\xc1\x9f\x94\xbe\\\x8b\x89\xbe\x9be\xd2\xbd\xb4\xb9;?[\x80\xab>X\xf8$\xbf\xc7\x85+\xbdtY\xff\xbdz\x1fA\xbf\x02\xe8&?\xb6\xbd\xdc\xbe\xf1\x91\xd0\xbcz\xea\x03\xbe\xd5;#>\xe0\xe1\x97\xbd\x8e\xc82\xbf\xdf\xe0.?\xdeZ\x84>\xa8\xb7]\xbf\x1a\x82\x1e?\xa2\xff\x8f\xbesM\xcf\xba\nv\xb7=\xbe\xc4\x88\xbeh\x93a\xbe$i\xe0>\xea\xf50\xbf\xe8u\x8e\xbe\xb7\xf9\x01\xbc\x17\x8d\xf3\xbd'
364 | p138
365 | tp139
366 | bg54
367 | g112
368 | tp140
369 | Rp141
370 | sg29
371 | g45
372 | (g26
373 | (g27
374 | (I0
375 | tp142
376 | g29
377 | tp143
378 | Rp144
379 | (I1
380 | (I10
381 | tp145
382 | g36
383 | I00
384 | S'\xfbK\xaa9.\xa1\xbc\xbbb\xa4\xfa=\xc6\xf1\xd0\xb62Q\xf2:\xfb-\xb2=\x80\x00I=\xc6F=\xbe\xc8\x88\x15>\xa6\xb1u\xbb'
385 | p146
386 | tp147
387 | bg54
388 | g29
389 | tp148
390 | Rp149
391 | sg57
392 | (lp150
393 | g112
394 | ag29
395 | asg43
396 | I01
397 | sbsS'train'
398 | p151
399 | I00
400 | sg15
401 | g0
402 | (g107
403 | g2
404 | Ntp152
405 | Rp153
406 | (dp154
407 | g6
408 | (lp155
409 | sg8
410 | g15
411 | sg112
412 | g45
413 | (g26
414 | (g27
415 | (I0
416 | tp156
417 | g29
418 | tp157
419 | Rp158
420 | (I1
421 | (I10
422 | I20
423 | tp159
424 | g36
425 | I00
426 | S'pqy>\x08\xde\x8f\xbd\x90\x8a\xb5>\xcd\xb8F?X\xdfF\xbe\xe0\xde\xf3=\x06\t\x1a?p\xff\xef=eu|>q\xb9\xac>\xe5k\x89\xbe\x96\xf2q\xbc)\xdd\xc0\xbe\x02\x84w\xbe\xd2\x04\xad>)\xab\x85\xbd\x89-\x14>\xae\x94\x08\xbes\x80\x9e=\x9c\x7f\x05>%N=>C;\x03?\xf55\x16\xbfB\xc8\x83\xbe\xcb\xa5\xed\xbd\x9f\x13`>\xca$c\xbcY\xe9+>\x8b\xff\xc5\xbdyh\x93\xbe\xf3\xd9S\xbeF@\x15>\xdc7]\xbe\x12\xb3\xce\xbe*yv\xbd\xc1X;\xbe\x10\xec\xa6>\xcf\xe8I\xbe\x00\xf5}\xbd\xe9\x15\x01>\xd2\xa8\x02\xbe\xb2\xc0\xae\xbe\xe2\xe7\xd7<\xf5\x91\xdd\xbe\x875\x19\xbc}\xdb\xdf<:_\xc0\xbd\xc55\x97>\x12b >\n\xfcS\xbd\xa1\xe1\x0c>\x16\x04\xec>\xe2c\x14>\x7f\xbfw>\xa6\xe1k>\x1al\x03>}W\xb0\xbesw\xd7\xbed5\x8c\xbe\x06\x99\x07>\xa4@=>%\x81\x11?\x89\x98\x05\xbe\x04e%\xbf\xc4@\x0b<\xa0|\x8b\xbe^5L\xbf\xd1\x1e\x05>X\x96+>\rZd\xbe\xf5\xb1\x8e>\xcaRP\xbe\xc3\xb3\xce>\x9d\xd8\xdd>+\x13\xae>p\xf0\x06>1\xf6w\xbe\x0f\xd3\xf1>Z\xb9\x9a\xbd\\\x1c4>^\x84*\xbe\x12\x90\x00>\xe0o\x91>\xb0#U\xbc8D\xad\xbe\xf0\xa4\xd1\xbd\xd3\xae\x81\xbe\xc6\xf5\xd0\xbeL\xd2\x12\xbd\xd4<\x95>\xdf `=\x8aS\xe2\xbdP\t\xc5=U1\x9e\xbe\xbc\xb7\xaf\xbe\x0eo\x8f\xbe\x92\xc0\x9c\xbe\x1c\xe1\x88\xbe\xff\x17\x96>rM{<[%~\xbeYpi=\xc4Y\x00\xbf\xb5?x\xbd^\xa8\xb0>s\x13\x01\xbf\xb2\xc4\x0c\xbe\x85\x01\xcc=\xa68\xcf>\xf2\x8d*\xbd\x13\xd6x>A\xde~\xbc\xa1K\x99>\xff}?=O\xdb<>\x0c:%?\xd4\xbb\xb8;\xc1\xae\x1f\xbc\rD\xfb\xbd\x95\xc9\xfc<ow4\xbe\x81Aq\xbe\x92\xd0p\xbd\x147<<&\xb6\x04<\xb0?I\xben\x08k\xbd\xb1\xe3m=6\x80\x0e?\xc2\r\x84\xbdXS\xe6> L\xb8\xbd\x9c\x84\xdf=mc\x9f\xbe\xe9\x08\x16=\xad\xad!?^s\x95\xbe\xdd\xd0r=\xe4+\xd1>\xeep6>\x9a[\x98\xbd[\xed\xa8>\x1aP\x06>\xea\xa1\x8b\xbcC\xa7\x83>\x95Ya>)r7\xbf68\xe3\xbd\xe4\xea4>a\xbb\x82>?\x08\xb2\xbd33\x03?=\xc5\x08?\xedq\x85>\xabjY>F\r|=|z\x89\xbe$\xdf\xd8\xbdT\xf7\x8f\xbe\xa0\x82\xc6>\xeaS\x1b>\xe2\t\xd3;\xee\xef\n>\x84-\xf0>\x95\x9a\x0c\xbe\xf9\x07\x90=\xea\xac\x00\xbfe\xf8%\xbdw\xf8\x11\xbd\x95\x10\xdb\xbcl\x08;\xbe\xe2\xd9e>9\xb5{>\x8ekF>\x9b\xbcN>\x03S\xe9=&\xc3>\xbd\x1a\xbd\xe0=\x99\x90\x8a\xbc*\xbdQ\xbe?,\xff=\xde0\xae\xbe{\xa7\xec>\xb5|\xeb>\x1c\x84W\xbc\x06\xd6\x0b?i%`?\x0ej\xa0\xbe\xf5\x0c9\xbe?+\xe2\xbd\x82\xf9\x84\xbd\xa8%5\xbc\x80\x1f)\xbcZ\x1a\xde\xbc<\x88\xcc\xbc\xc5\xbf\xe7\xbe+\xa0\xfb\xbd$\x85\x9d\xbc4j\x91>\x9c\xb2d\xbd'
427 | p160
428 | tp161
429 | bg54
430 | g112
431 | tp162
432 | Rp163
433 | sg29
434 | g45
435 | (g26
436 | (g27
437 | (I0
438 | tp164
439 | g29
440 | tp165
441 | Rp166
442 | (I1
443 | (I10
444 | tp167
445 | g36
446 | I00
447 | S'C\x8dn=X\x8b&\xbaoM\xc7\xbd/U\n>\x00\x00\x00\x00\xee(\xd4=\xbf\xcd\xc9=\xa7g\x04>\xe8w\x8e=6\x01G\xbd'
448 | p168
449 | tp169
450 | bg54
451 | g29
452 | tp170
453 | Rp171
454 | sg57
455 | (lp172
456 | g112
457 | ag29
458 | asg43
459 | I01
460 | sbsg16
461 | g0
462 | (g107
463 | g2
464 | Ntp173
465 | Rp174
466 | (dp175
467 | g6
468 | (lp176
469 | sg8
470 | g16
471 | sg112
472 | g45
473 | (g26
474 | (g27
475 | (I0
476 | tp177
477 | g29
478 | tp178
479 | Rp179
480 | (I1
481 | (I10
482 | I10
483 | tp180
484 | g36
485 | I00
486 | S'\xd1\xf7\xe0\xbd\xf5\xedz\xbd\'\x07\x82\xbfCx\xa1\xbe\n\xc8g=r\xba\x97>Q\xd4/\xbe\x15\x1e\xea\xbd\xea5\xf2>Tm\x9e\xbd\xc7 \xbf>}\xd7\x0e?\x82\xcb\x93\xbe\xbd[\x1a\xbf\x04f0>?\xa7c\xbeh\xf8\x8a\xbd_\x05\x88\xbe\x0c\x03\x8e\xbf\xe45\xc5\xbduL\xe2\xbd~L\xb1\xbe\n\xcb\x90?\xc0x\xaf\xbeI\xd3==Td\x08\xbe]n7\xbd\xa4\x82\xa3>,\x9a\x8b>\xbeb\x82?\xc0\xf0\xee=.[\x84\xbez!\x1e>\x82^\x83\xbd\xaa\xd9/\xbe;\xe4F=\xca\xa1\xae>\x93\xc5w\xbf3\xfa/\xbd\xbf\xc6\xb7=^-I\xbe\x0c\xb84>\xa8~t\xbf\x075\xb6>\xe2\x8e;>\x1c\x05\xdb>\xac\xfb*?\x18C\x1b?\xc4\xad;\xbe\xd9\xbc~\xbe\x1e\xce\xf0>\x12\xf4\xf7\xbe\x93\x92E?j\x80\n\xbed\xb3U>\xe4\xfd\xf7\xbe%\xde\x8e\xbf\xfbF\xe3>\xcf\x96\x19>\x1e\xea\x14>\xd8\xc7\x0c\xbf\x84=\x91?\\5\x80\xbf\x9e\xe7q\xbd%v\xb1>\xddV\x11>G\xe7y>\x0c\xdb\xd4>\'\xed\xc7>\xc2\x1aA\xbe\xd0\xa4\xb2\xbc\x1f\xef\x0e\xbe\x97^R?Y\xf0T=\xb1\x00\x87\xbe\xd0\x85c=\xb2."\xbf\xa0\xc6\xe6>/\xf19>xv\x02?\x92\xff2\xbe\xca\x01\x12\xbf(\xeb\xce\xbd\xf1\x10\x1f>TF\r?\xe4!>?\xe3\xea3\xbd\xef\x96N\xbe\xc7V\xc4\xbe\xfea|\xbe\x89\xd0\x04\xbf\xe2W\x94>P[\xb1\xbf\x9f\xa5[?F\x14\xc9=\xe3\xd2j?M\x19\x1d\xbex\x91\x92\xbe\xcf\xe9\n>\xf9\xd3\x8b\xbf'
487 | p181
488 | tp182
489 | bg54
490 | g112
491 | tp183
492 | Rp184
493 | sg29
494 | g45
495 | (g26
496 | (g27
497 | (I0
498 | tp185
499 | g29
500 | tp186
501 | Rp187
502 | (I1
503 | (I10
504 | tp188
505 | g36
506 | I00
507 | S'E\ro\xbd6P\xc9;\xde\x82\x03\xbeR\xa0"<\xb9\x89\xaa=#\x91\x14\xbd\x83\xbc\xd4=L\xf3)\xbd\xe7i\x97=O\xff\n>'
508 | p189
509 | tp190
510 | bg54
511 | g29
512 | tp191
513 | Rp192
514 | sg57
515 | (lp193
516 | g112
517 | ag29
518 | asg43
519 | I01
520 | sbsg17
521 | g0
522 | (g107
523 | g2
524 | Ntp194
525 | Rp195
526 | (dp196
527 | g6
528 | (lp197
529 | sg8
530 | g17
531 | sg112
532 | g45
533 | (g26
534 | (g27
535 | (I0
536 | tp198
537 | g29
538 | tp199
539 | Rp200
540 | (I1
541 | (I20
542 | I2
543 | tp201
544 | g36
545 | I00
546 | S"\xa7'\xb5\xbf\x964\xbf\xbe\xb2p\xee>C\xd6d\xbf\xe5\xc0\xa0?\x87\xaa\x9e@\x03\xf0\xcc\xbdxr\x82\xc0D\x0e\xb7?Y,>\xc0Yz\x8d?O\xa97@\x0ej\xcd=\xb6\xda\xf0\xbf\xb8\xab\xab?\xd3m\x86=\x0c\xeb\xc1\xbe\xf9\x7f\xaf\xbf\x0bW\x9d?\xbe\x96\xa3?*c@?\xd7R\xbc\xbf\xbe\x82@\xbf\xfd\x80\xed\xbf}(->\xf22\xd6\xbfW \r\xbfw\x81+\xbf\xed\x97J>\xde\xdb\xc3>V\x14S\xbe<\x97 \xc0\x00T\xec>2[\xd4?\xb6\x1e\xba\xbe^\xea\xda\xbe\xc11\x81\xbf\x9aM\xf0?\t\x1f\xa9>\xb2A\x9c\xbe"
547 | p202
548 | tp203
549 | bg54
550 | g112
551 | tp204
552 | Rp205
553 | sg29
554 | g45
555 | (g26
556 | (g27
557 | (I0
558 | tp206
559 | g29
560 | tp207
561 | Rp208
562 | (I1
563 | (I20
564 | tp209
565 | g36
566 | I00
567 | S'y\x10E>\x90\x16d>\xeb\xda\x9d>\xe4\xea\x9b\xbd6\xd6\x89>sk\x0c=\xdc\xdeZ\xbd\xb9\xfe9\xbe.r\x88>\x1a\xa6`\xbd&A\xa7>_\xc9\xc3>c\x0cg>\xf0\xcfQ>\x106\x83>\xd6\xe2\xf1=\xe7g\xa4>x\xb0i>4\x1a\x1a=\xbfo\xc7>'
568 | p210
569 | tp211
570 | bg54
571 | g29
572 | tp212
573 | Rp213
574 | sg57
575 | (lp214
576 | g112
577 | ag29
578 | asg43
579 | I01
580 | sbsg57
581 | (lp215
582 | sb.


--------------------------------------------------------------------------------
/chimp/simulators/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' 
 2 | Environment simulators.
 3 | 
 4 | * Arcade Learning Environment for Atari game simulation
 5 | * Tiger Problem
 6 | 
 7 | Required functions:
 8 | __init__, get_screenshot, act, reward, game_over, reset_game
 9 | 
10 | Require attributes:
11 | n_actions, 
12 | 
13 | '''
14 | 


--------------------------------------------------------------------------------
/chimp/simulators/atari/__init__.py:
--------------------------------------------------------------------------------
1 | ''' 
2 | Environment simulators.
3 | 
4 | '''
5 | 
6 | from atari import AtariSimulator


--------------------------------------------------------------------------------
/chimp/simulators/atari/atari.py:
--------------------------------------------------------------------------------
  1 | from ale_python_interface import ALEInterface
  2 | import pygame
  3 | 
  4 | import numpy as np
  5 | import scipy.misc as spm
  6 | 
  7 | 
  8 | class AtariSimulator(object):
  9 | 
 10 |     def __init__(self, settings):
 11 | 
 12 |         '''Initiate Arcade Learning Environment (ALE) using Python interface
 13 |         https://github.com/bbitmaster/ale_python_interface/wiki
 14 | 
 15 |         - Set number of frames to be skipped, random seed, ROM and title for display.
 16 |         - Retrieve a set of legal actions and their number.
 17 |         - Retrieve dimensions of the original screen (width/height), and set the dimensions
 18 |         of the cropped screen, together with the padding used to crop the screen rectangle.
 19 |         - Set dimensions of the pygame display that will show visualization of the simulation.
 20 |         (May be cropped --- showing what the learner sees, or not --- showing full Atari screen)
 21 |         - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format
 22 |         '''
 23 | 
 24 |         self.ale = ALEInterface()
 25 |         self.ale.setInt("frame_skip",settings["frame_skip"])
 26 |         self.ale.setInt("random_seed",settings["seed_simulator"])
 27 |         self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"])
 28 | 
 29 |         self.title = "ALE Simulator: " + str(settings["rom"])
 30 |         self.actions = self.ale.getLegalActionSet()
 31 |         self.n_actions = self.actions.size
 32 | 
 33 |         self.screen_dims = self.ale.getScreenDims()
 34 |         self.model_dims = settings['model_dims']
 35 |         self.pad = settings['pad']
 36 | 
 37 |         print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1]))
 38 |         print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1]))
 39 | 
 40 |         self.viz_cropped = settings['viz_cropped']
 41 |         if self.viz_cropped:
 42 |             self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2))
 43 |         else:
 44 |             self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2))
 45 | 
 46 |         # preallocate an array to accept ALE screen data (height/width) !
 47 |         self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8)
 48 | 
 49 | 
 50 |     def get_screenshot(self):
 51 |         '''returns a cropped snapshot of the simulator
 52 |         - store grayscale values in a preallocated array
 53 |         - cut out a square from the rectangle, using provided padding value
 54 |         - downsample to the desired size and transpose from (height/width) to (width/height)
 55 |         '''
 56 | 
 57 |         self.ale.getScreenGrayscale(self.screen_data)
 58 |         self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:]
 59 |         self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest'
 60 | 
 61 |         return self.frame
 62 | 
 63 | 
 64 |     def act(self,action_index):
 65 |         '''function to transition the simulator from s to s' using provided action
 66 |         the action that is provided is in form of an index
 67 |         simulator deals with translating the index into an actual action'''
 68 | 
 69 |         self.last_reward = self.ale.act(self.actions[action_index])
 70 | 
 71 | 
 72 |     def reward(self):
 73 |         '''return reward - has to be called after the "act" function'''
 74 | 
 75 |         return self.last_reward
 76 | 
 77 | 
 78 |     def episode_over(self):
 79 |         '''return a boolean indicator on whether the game is still running'''
 80 | 
 81 |         return self.ale.game_over()
 82 |         
 83 | 
 84 |     def reset_episode(self):
 85 |         '''reset the game that ended'''
 86 | 
 87 |         self.ale.reset_game()
 88 | 
 89 | 
 90 |     def init_viz_display(self):
 91 |         '''initialize display that will show visualization'''
 92 | 
 93 |         pygame.init()
 94 |         self.screen = pygame.display.set_mode(self.display_dims)
 95 |         if self.title:
 96 |             pygame.display.set_caption(self.title)
 97 | 
 98 | 
 99 |     def refresh_viz_display(self):
100 |         '''if display is shut down, shut the game down
101 |         else move the current simulator's frame (cropped or not cropped) into the pygame display,
102 |         after expanding it 2x along x and y dimensions'''
103 | 
104 |         for event in pygame.event.get():
105 |             if event.type == pygame.QUIT:
106 |                 exit
107 | 
108 |         if self.viz_cropped:
109 |             self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed
110 |         else:
111 |             self.surface = pygame.surfarray.make_surface(self.screen_data.T)
112 | 
113 |         self.screen.blit(pygame.transform.scale2x(self.surface),(0,0))
114 |         pygame.display.flip()
115 | 
116 | 


--------------------------------------------------------------------------------
/chimp/simulators/gym/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Import OpenAI Gym Wrapper
3 | """
4 | 
5 | 


--------------------------------------------------------------------------------
/chimp/simulators/gym/gym_wrapper.py:
--------------------------------------------------------------------------------
 1 | class GymWrapper():
 2 | 
 3 |     def __init__(self, env):
 4 | 
 5 |         self.env = env
 6 |         self.last_reward = 0.0
 7 |         self.current_state = None
 8 |         self.terminal_flag = False
 9 |         self.n_actions = env.action_space.n
10 |         self.model_dims = env.observation_space.shape
11 | 
12 |     def act(self, action):
13 |         """
14 |         Transitions to the next state and computes the reward
15 |         """
16 |         state, reward, done, info = self.env.step(action)
17 |         self.last_reward = reward
18 |         self.current_state = state
19 |         self.terminal_flag = done
20 |     def reward(self):
21 |         return self.last_reward
22 | 
23 |     def get_screenshot(self):
24 |         return self.current_state
25 | 
26 |     def episode_over(self):
27 |         """
28 |         Checks if the car reached the top of the mountain
29 |         """
30 |         return self.terminal_flag
31 | 
32 |     def reset_episode(self):
33 |         self.current_state = self.env.reset()
34 | 
35 |     def simulate(self, nsteps):
36 |         """
37 |         Runs a simulation using the provided DQN policy for nsteps
38 |         """
39 | 
40 |         self.reset_episode()
41 | 
42 |         rtot = 0.0
43 |         # run the simulation
44 |         for i in xrange(nsteps):
45 |             self.env.render()
46 |             state = self.get_screenshot()
47 |             a = self.env.action_space.sample()
48 |             self.act(a)
49 |             r = self.reward()
50 |             rtot += r
51 |             if self.episode_over():
52 |                 break
53 |         return rtot
54 |         
55 | 


--------------------------------------------------------------------------------
/chimp/simulators/mdp/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Import MDP models and simulator
3 | """
4 | 
5 | 


--------------------------------------------------------------------------------
/chimp/simulators/mdp/cart_pole.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #################################################################
 4 | # Implements the simulator class for pole cart MDP
 5 | #################################################################
 6 | 
 7 | class CartPole():
 8 | 
 9 |     def __init__(self):
10 |         self.actions = np.array([-1,1])
11 |         self.n_actions = 2
12 | 
13 |         self.state_shape = (1,4) # x, xdot, theta, thetadot
14 | 
15 |         self.gravity = 9.8
16 |         self.mass_cart = 1.0
17 |         self.mass_pole = 0.3
18 |         self.total_mass = self.mass_cart + self.mass_pole
19 |         self.length = 0.7
20 |         self.polemass_length = self.mass_pole * self.length
21 |         self.force_mag = 10.0
22 |         self.tau = 0.02
23 | 
24 |         self.term_deg = 0.2094384
25 | 
26 | 
27 |     def transition(self, s, a):
28 |         if self.isterminal(s):
29 |             return s.copy()
30 |         x, xdot, theta, thetadot = s[0], s[1], s[2], s[3]
31 | 
32 |         sint = np.sin(theta)
33 |         cost = np.cos(theta)
34 | 
35 |         force = self.actions[a] * self.force_mag
36 | 
37 |         temp = (force + self.polemass_length * thetadot**2 * sint) / self.total_mass
38 |         thetaacc = (self.gravity * sint - cost * temp) / (self.length * (4.0/3.0 - self.mass_pole * cost**2 /
39 |             self.total_mass))
40 |         xacc = temp - self.polemass_length * thetaacc * cost / self.total_mass
41 | 
42 |         sp = np.zeros(4, dtype=np.float32)
43 |         sp[0] = x + self.tau * xdot
44 |         sp[1] = xdot + self.tau * xacc
45 |         sp[2] = theta + self.tau * thetadot
46 |         sp[3] = thetadot + self.tau * thetaacc
47 | 
48 |         return sp
49 | 
50 |     def reward(self, s, a):
51 |         r = 0.0
52 |         if self.isterminal(s):
53 |             r = -1.0
54 |         return r
55 |         
56 | 
57 |     def isterminal(self, s):
58 |         if (s[0] < -2.4 or s[0] > 2.4 or s[2] < -self.term_deg or s[2] > self.term_deg):
59 |             return True
60 |         return False
61 | 
62 | 
63 |     def initial_state(self):
64 |         s = np.zeros(4, dtype=np.float32)
65 |         s[0] = 2.2 * np.random.rand() - 1.1
66 |         s[1], s[2], s[3] = 0.0, 0.0, 0.0
67 |         return s
68 | 
69 | 


--------------------------------------------------------------------------------
/chimp/simulators/mdp/mdp_simulator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #################################################################
 4 | # Implements the simulator class for MDPs 
 5 | #################################################################
 6 | 
 7 | class MDPSimulator():
 8 | 
 9 |     def __init__(self, model):
10 |         """
11 |         Implements the multi-agent simulator:
12 |         This serves as a wrapper for MDP problem types 
13 |         """
14 | 
15 |         self.model = model # problem instance
16 | 
17 |         # initalize 
18 |         self.current_state = model.initial_state()
19 |         self.last_action = 0
20 |         self.last_reward = 0.0
21 | 
22 |         self.model_dims = model.state_shape
23 | 
24 |         self.n_actions = model.n_actions
25 | 
26 |     def act(self, action):
27 |         """
28 |         Transitions the model forward by moving
29 |         """
30 |         mdp = self.model
31 | 
32 |         self.last_reward = mdp.reward(self.current_state, action)
33 |         self.current_state = mdp.transition(self.current_state, action)
34 |         if self.episode_over():
35 |             self.last_reward += mdp.reward(self.current_state, action)
36 | 
37 |     def reward(self):
38 |         return self.last_reward
39 | 
40 |     def get_screenshot(self):
41 |         return self.current_state 
42 | 
43 |     def episode_over(self):
44 |         return self.model.isterminal(self.current_state)
45 | 
46 |     def reset_episode(self):
47 |         self.current_state = self.model.initial_state()
48 |         self.last_reward = 0.0
49 | 
50 |     def n_actions(self):
51 |         return self.model.n_actions
52 | 


--------------------------------------------------------------------------------
/chimp/simulators/mdp/mountain_car.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #################################################################
 4 | # Implements the mountain car MDP 
 5 | #################################################################
 6 | 
 7 | class MountainCar():
 8 | 
 9 |     def __init__(self,
10 |                  term_r = 10.0,
11 |                  nonterm_r = -1.0,
12 |                  height_reward = True,
13 |                  discrete = False,
14 |                  discount = 0.95):
15 | 
16 |         self.actions = np.array([-1.0, 0.0, 1.0])
17 |         self.n_actions = 3
18 | 
19 |         self.state_shape = (1,2) # x and v
20 | 
21 |         self.term_r = term_r
22 |         self.nonterm_r = nonterm_r
23 | 
24 |         self.vmin, self.vmax = (-0.07, 0.07)
25 |         self.xmin, self.xmax = (-1.2, 0.6)
26 | 
27 |         self.height_reward = height_reward
28 | 
29 |         self.discrete = discrete
30 |         self.xgrid = 10
31 |         self.vgrid = 10
32 |         self.discrete_x = np.linspace(self.xmin, self.xmax, self.xgrid)
33 |         self.discrete_v = np.linspace(self.vmin, self.vmax, self.vgrid)
34 | 
35 | 
36 |     def transition(self, s, a):
37 |         """
38 |         Returns a next state, given a state and an action
39 |         """
40 |         sp = np.zeros(2, dtype=np.float32)
41 |         #sp = np.zeros(2, dtype=np.float32)
42 |         sp[1] = s[1] + 0.001 * self.actions[a] - 0.0025 * np.cos(3 * s[0])
43 |         sp[1] = self.vclip(sp[1])
44 |         sp[0] = self.xclip(s[0] + sp[1])
45 | 
46 |         return sp
47 | 
48 |     
49 |     def reward(self, s, a):
50 |         """
51 |         Rewarded for reaching goal state, penalized for all other states
52 |         """
53 |         r = s[0] if (self.height_reward and s[0] > 0.0) else 0
54 |         if s[0] >= self.xmax:
55 |             r += self.term_r
56 |         else:
57 |             r += self.nonterm_r
58 |         return r
59 | 
60 | 
61 |     def isterminal(self, s):
62 |         if s[0] >= self.xmax:
63 |             return True
64 |         return False
65 | 
66 |     def initial_state(self):
67 |         xi = np.random.uniform(self.xmin, self.xmax*0.9)
68 |         vi = 0.0
69 |         return np.array([xi, vi], dtype=np.float32)
70 | 
71 | 
72 | 
73 |     #################################################################  
74 |     ########################## UTILITIES ############################
75 |     #################################################################  
76 | 
77 |     def clip(self, val, lo, hi):
78 |         return min(hi, max(val, lo))
79 | 
80 |     def vclip(self, val):
81 |         return self.clip(val, self.vmin, self.vmax)
82 | 
83 |     def xclip(self, val):
84 |         return self.clip(val, self.xmin, self.xmax)
85 | 
86 |     def find_nearest(self, vals, target):
87 |         idx = (np.abs(vals - target)).argmin()
88 |         return vals[target]
89 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/__init__.py:
--------------------------------------------------------------------------------
 1 | ''' 
 2 | Environment simulators.
 3 | 
 4 | '''
 5 | 
 6 | from models.simulator import POMDPSimulator
 7 | from models.simulator_momdp import MOMDPSimulator
 8 | from models.tiger import TigerPOMDP
 9 | from models.rock_sample import RockSamplePOMDP
10 | 
11 | from models.tools.belief import DiscreteBelief
12 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/__init__.py:
--------------------------------------------------------------------------------
1 | # dummy file
2 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/rock_sample.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from copy import deepcopy
  3 | from tools.belief_momdp import MOMDPBelief
  4 | import math
  5 | import itertools
  6 | 
  7 | #################################################################
  8 | # Implements the Rock Sample POMDP problem
  9 | #################################################################
 10 | 
 11 | class RockSamplePOMDP():
 12 | 
 13 |     # constructor
 14 |     def __init__(self, 
 15 |                  xs=7, # size of grid y dim
 16 |                  ys=7, # size of grid x dim
 17 | 
 18 |                  rocks={(2,4):False, (3,4):True, (5,5):False, # (2,0):False, (0,1):True, (3,1):False, (6,3):True,
 19 |                                           (1,6):True},
 20 |                  
 21 |                  seed=1, # random seed
 22 |                  rbad=-10.0, rgood=10.0, rexit=10.0, rbump=-100.0, # reward values
 23 |                  d0=20, # quality of rover observation,
 24 |                  h_conf=0.5, # confidence level before moving in heuristic policy
 25 |                  discount=0.99):
 26 |         
 27 |         self.random_state = np.random.RandomState(seed) # used for sampling
 28 |         self.discount = discount
 29 | 
 30 |         self.xs = xs - 1 # y-size of the grid
 31 |         self.ys = ys - 1 # x-size of the grid
 32 | 
 33 |         self.rocks = rocks # dictionary mapping rock positions to their types (x,y) => good or bad
 34 |         self.rock_pos = [k for k in sorted(rocks.keys())]
 35 |         self.rock_types = [rocks[k] for k in sorted(rocks.keys())]
 36 |         self.rock_map = {(k):i for (i, k) in enumerate(sorted(rocks.keys()))}
 37 |         k = len(rocks)
 38 |         self.k = k # number of rocks
 39 | 
 40 |         self.rbad = rbad
 41 |         self.rgood = rgood
 42 |         self.rbump = rbump
 43 |         self.rexit = rexit
 44 | 
 45 |         # states: state is represented by the rover position and the rock types
 46 |         self.rover_states = [(j,i) for i in range(xs) for j in range(ys)] # fully observable vars
 47 |         rs = itertools.product(*(xrange(2) for i in xrange(k)))
 48 |         self.rock_states = [[bool(j) for j in i] for i in rs]
 49 |         self.n_rock_states = len(self.rock_states)
 50 |         self.n_rover_states = len(self.rover_states)
 51 |         
 52 |         # actions: total of 5+k
 53 |         self.ractions = [0, # move left
 54 |                          1, # move right
 55 |                          2, # move up
 56 |                          3, # move down
 57 |                          4] # sample
 58 |         for i in range(k):
 59 |             self.ractions.append(5+i) # sample rock i
 60 | 
 61 |         # observations
 62 |         self.robs = [0, # none
 63 |                      1, # good
 64 |                      2] # bad
 65 | 
 66 |         # pre-allocate state variables
 67 |         self.rover_state = np.zeros(2) # rover (x,y) position
 68 |         self.rock_state = np.zeros(k, dtype=np.bool) # (good, bad) type for each rock
 69 | 
 70 |         self.d0 = d0
 71 |         self.h_conf = h_conf
 72 | 
 73 |         self.action_vectors = [[-1, 0], [1, 0], [0, 1], [0, -1]]
 74 | 
 75 |         # belief and observation dimensions
 76 |         self.xdims = 2
 77 |         self.odims = 1
 78 | 
 79 |     ################################################################# 
 80 |     # Setters
 81 |     ################################################################# 
 82 |     def set_discount(self, d):
 83 |         self.discount = d
 84 | 
 85 |     def set_rewards(self, rs, rg, rb, re, rm):
 86 |         self.rsample = rs
 87 |         self.rgood = rg
 88 |         self.rbad = rb
 89 |         self.rexit = re
 90 | 
 91 |     ################################################################# 
 92 |     # S, A, O Spaces
 93 |     ################################################################# 
 94 |     def fully_obs_states(self):
 95 |         return self.rover_states
 96 | 
 97 |     def partially_obs_states(self):
 98 |         return self.rock_states
 99 | 
100 |     def actions(self):
101 |         return self.ractions
102 | 
103 |     def observations(self):
104 |         return self.robs
105 | 
106 |     ################################################################# 
107 |     # Reward Function
108 |     ################################################################# 
109 |     def reward(self, x, y, a):
110 |         # Rewarded:
111 |         # sampling good or bad rocks
112 |         # exiting the map
113 |         # trying to move off the grid
114 |         rocks = self.rocks
115 |         xpos, ypos = x
116 | 
117 |         # if in terminal state, no reward 
118 |         if self.isterminal(x, y):
119 |             return 0.0
120 |         # if exit get exit reward
121 |         if a == 1 and xpos == self.xs:
122 |             return self.rexit 
123 |         # if trying to move off the grid
124 |         if (a == 0 and xpos == 0) or (a == 2 and ypos == self.ys) or (a == 3 and ypos == 0):
125 |             return self.rbump
126 |         # if trying to sample
127 |         if a == 4:
128 |             # if in a space with a rock
129 |             if x in rocks:
130 |                 # if rock is good
131 |                 if rocks[x]:
132 |                     return self.rgood
133 |                 # if rock is bad
134 |                 else:
135 |                     return self.rbad
136 |         return 0.0
137 | 
138 |     ################################################################# 
139 |     # Distribution Functions
140 |     ################################################################# 
141 |     # rover moves determinisitcally: distribution is just the position of rover 
142 |     def fully_obs_transition(self, x, y, a, dist):
143 |         xpos = x[0]
144 |         ypos = x[1]
145 |         # going left
146 |         if a == 0 and xpos > 0:
147 |             xpos -= 1
148 |         # going right
149 |         elif a == 1 and xpos < (self.xs+1):
150 |             xpos += 1
151 |         # going up
152 |         elif a == 2 and ypos < self.ys:
153 |             ypos += 1
154 |         # going down
155 |         elif a == 3 and ypos > 0:
156 |             ypos -= 1
157 |         dist[0] = xpos
158 |         dist[1] = ypos
159 |         return dist
160 | 
161 |     # the positions of rocks don't change, good rocks turn bad after sampling
162 |     def partially_obs_transition(self, x, y, a, dist):
163 |         # fill the distribution with our y var
164 |         for i in xrange(len(y)):
165 |             dist[i] = y[i]
166 |         # if a rock is sampled it becomes bad
167 |         if a == 4:
168 |             rocks = self.rocks
169 |             # if we are on a rock state change type to bad
170 |             if x in rocks:
171 |                 ri = self.rock_map[x] 
172 |                 self.rock_types[ri] = False
173 |                 rocks[x] = False
174 |                 dist[ri] = False
175 |         return dist
176 | 
177 |     # sample the transtion distribution 
178 |     def sample_fully_obs_state(self, d):
179 |         # deterministic transition
180 |         return (d[0], d[1]) 
181 | 
182 |     def sample_partially_obs_state(self, d):
183 |         # rock states do not change
184 |         return d
185 | 
186 |     # returns the observation dsitribution of o from the (x,y,a) 
187 |     def observation(self, x, y, a, dist):
188 |         prob = 0.0
189 |         # if the action checks a rock 
190 |         if self.is_check_action(a):
191 |             xpos = x[0]
192 |             ypos = x[1]
193 | 
194 |             ri = self.act2rock(a) # rock index
195 |             rock_pos = self.rock_pos[ri] # rock position
196 |             rock_type = y[ri] # rock type
197 | 
198 |             r = math.sqrt((xpos - rock_pos[0])**2 + (ypos - rock_pos[1])**2) 
199 |             eta = math.exp(-r/self.d0)
200 |             p_correct = 0.5 + 0.5 * eta # probability of correct measure
201 | 
202 |             dist.fill(0.0)
203 |             # if rock is good
204 |             if rock_type == True:
205 |                 dist[1] = p_correct
206 |                 dist[2] = 1.0 - p_correct
207 |             # rock is bad
208 |             else:
209 |                 dist[1] = 1 - p_correct
210 |                 dist[2] = p_correct
211 |         else:
212 |             dist.fill(0.0)
213 |             dist[0] = 1.0
214 |         return dist
215 | 
216 | 
217 |     # sample the observation distirbution
218 |     def sample_observation(self, d):
219 |         oidx = self.categorical(d)
220 |         return self.robs[oidx]
221 | 
222 |     def fully_obs_transition_pdf(self, d, x): 
223 |         if d[0] == x[0] and d[1] == x[1]:
224 |             return 1.0
225 |         else:
226 |             return 0.0
227 | 
228 |     # only single rock configuration, always return 1
229 |     def partially_obs_transition_pdf(self, d, y):
230 |         if y == d:
231 |             return 1.0
232 |         else:
233 |             return 0.0
234 | 
235 |     # pdf for observation prob
236 |     def observation_pdf(self, d, dval):
237 |         assert dval < 3, "Attempting to retrive pdf value larger than observation size"
238 |         return d[dval]
239 | 
240 |     # numpy categorical sampling hack
241 |     def categorical(self, d):
242 |         return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0]
243 | 
244 | 
245 |     ################################################################# 
246 |     # Create functions
247 |     ################################################################# 
248 |     def create_fully_obs_transition_distribution(self):
249 |         td = np.array([0,0]) # position of rover
250 |         return td
251 | 
252 |     def create_partially_obs_transition_distribution(self):
253 |         return deepcopy(self.rock_types)
254 | 
255 |     def create_observation_distribution(self):
256 |         od = np.zeros(3) + 1.0/3 # none, good, bad 
257 |         return od
258 | 
259 |     def create_belief(self):
260 |         return MOMDPBelief(self.n_rock_states)
261 | 
262 |     def initial_belief(self):
263 |         return MOMDPBelief(self.n_rock_states)
264 | 
265 |     def initial_fully_obs_state(self):
266 |         # returns a (0, y) tuple
267 |         return (0, self.random_state.randint(self.xs+1))
268 | 
269 |     def initial_partially_obs_state(self):
270 |         for (i, k) in enumerate(sorted(self.rocks.keys())):
271 |             t = bool(self.random_state.randint(2))
272 |             self.rock_types[i] = t 
273 |             self.rocks[k] = t
274 |         return deepcopy(self.rock_types)
275 | 
276 | 
277 |     ################################################################# 
278 |     # Misc Functions
279 |     ################################################################# 
280 |     def isterminal(self, x, y):
281 |         xpos, ypos = x
282 |         if xpos > self.xs:
283 |             return True
284 |         return False
285 | 
286 |     def index2action(self, ai):
287 |         return ai
288 | 
289 |     def is_check_action(self, a):
290 |         return True if a > 4 else False
291 | 
292 |     def act2rock(self, a):
293 |         return a - 5
294 | 
295 |     def n_xstates(self):
296 |         return len(self.rover_states)
297 | 
298 |     def n_ystates(self):
299 |         return len(self.rock_states)
300 | 
301 |     def n_actions(self):
302 |         return len(self.ractions)
303 | 
304 |     def n_obsevations(self):
305 |         return 2
306 | 
307 | 
308 |     ################################################################# 
309 |     # Policies
310 |     ################################################################# 
311 | 
312 |     def heuristic_policy(self, sc):
313 |         # takes in a screen shot, [x, b] array
314 |         x = (sc[0], sc[1]) # x and y pos
315 |         b = np.array(sc[2:]) # belief
316 |         return self.heuristic(x, b)
317 | 
318 |     def heuristic(self, x, b):
319 |         # if we are not confident, keep checking randomly
320 |         if b.max() < self.h_conf:
321 |             return self.random_state.randint(5, 5+self.k)
322 |         else:
323 |             ri = b.argmax() # index of highest confidence rock state 
324 |             y = self.rock_states[ri] # rock state
325 |             # find closest good rock
326 |             c = float('inf')
327 |             ci = -1
328 |             for (i, t) in enumerate(y):
329 |                 # if rock is good
330 |                 if t:
331 |                     # if on the rock sample
332 |                     if x == self.rock_pos[i]:
333 |                         return 4
334 |                     xrover = x[0]
335 |                     yrover = x[1]
336 |                     xrock, yrock = self.rock_pos[i]
337 |                     dist = math.sqrt((xrock-xrover)**2 + (yrock-yrover)**2)
338 |                     if dist < c:
339 |                         c = dist
340 |                         ci = i
341 |             if ci > -1:
342 |                 return self.move_to(x, self.rock_pos[ci])
343 |         # if no good rocks left move right
344 |         return 1
345 |                     
346 |     # action to move rover from origin o to target t
347 |     def move_to(self, o, t):
348 |         # vector components
349 |         v = [t[0] - o[0], t[1] - o[1]]
350 |         sa = float('inf')
351 |         ai = 1
352 |         # move in the direction that minimizes angle between action and target
353 |         for (i, a) in enumerate(self.action_vectors):
354 |             ang = angle(v, a)
355 |             if ang < sa:
356 |                 sa = ang
357 |                 ai = i
358 |         return ai
359 | 
360 | def dotproduct(v1, v2):
361 |     return sum((a*b) for a, b in zip(v1, v2))
362 | 
363 | def length(v):
364 |   return math.sqrt(dotproduct(v, v))
365 | 
366 | def angle(v1, v2):
367 |     return math.acos(dotproduct(v1, v2) / (length(v1) * length(v2)))    
368 | 
369 | 
370 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/rock_test.py:
--------------------------------------------------------------------------------
 1 | from rock_sample import RockSamplePOMDP
 2 | 
 3 | pomdp = RockSamplePOMDP()
 4 | 
 5 | x = pomdp.initial_fully_obs_state()
 6 | y = pomdp.initial_partially_obs_state()
 7 | 
 8 | tdx = pomdp.create_fully_obs_transition_distribution()
 9 | tdy = pomdp.create_partially_obs_transition_distribution()
10 | od = pomdp.create_observation_distribution()
11 | 
12 | for a in range(pomdp.n_actions()):
13 |     print "Action ", x, y, a
14 |     tdx = pomdp.fully_obs_transition(x, y, a, tdx)
15 |     tdy = pomdp.partially_obs_transition(x, y, a, tdy)
16 |     od = pomdp.observation(x, y, a, od)
17 |     x = pomdp.sample_fully_obs_state(tdx)
18 |     y = pomdp.sample_partially_obs_state(tdy)
19 |     o = pomdp.sample_observation(od)
20 |     print "Observation ", x, y, o
21 | 
22 | b = pomdp.initial_belief()
23 | 
24 | x = (1,1)
25 | a = 6
26 | 
27 | od = pomdp.observation(x, y, a, od)
28 | o = pomdp.sample_observation(od)
29 | 
30 | b.update(pomdp, x, a, o)
31 | 
32 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/simulator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #################################################################
 4 | # This file implements a pomdp simulator using the interface
 5 | # defined in the README
 6 | #################################################################
 7 | 
 8 | class POMDPSimulator():
 9 | 
10 |     # constructor
11 |     def __init__(self, pomdp, robs=False):
12 |         self.pomdp = pomdp
13 |         self.current_state = pomdp.initial_state()
14 |         self.current_action = None
15 |         self.current_observation = np.array([-1])
16 |         self.current_belief = pomdp.initial_belief()
17 |         self.current_reward = 0.0
18 | 
19 |         self.robs = robs # returns observation or belief
20 | 
21 |         self.tdist = pomdp.create_transition_distribution()
22 |         self.odist = pomdp.create_observation_distribution()
23 | 
24 |         self.n_actions = self.pomdp.n_actions()
25 |         self.n_states = self.pomdp.n_states()
26 | 
27 |         if not robs:
28 |             self.model_dims = self.pomdp.belief_shape
29 |         else:
30 |             self.model_dims = self.pomdp.observation_shape
31 |     
32 |     #@profile 
33 |     # progress single step in simulation
34 |     def act(self, ai):
35 |         pomdp = self.pomdp
36 |         s = self.current_state
37 |         b = self.current_belief
38 |         tdist = self.tdist
39 |         odist = self.odist
40 | 
41 |         a = pomdp.index2action(ai)
42 | 
43 |         r = pomdp.reward(s, a)
44 |         
45 |         tdist = pomdp.transition(s, a, tdist)
46 |         s = pomdp.sample_state(tdist)
47 | 
48 |         odist = pomdp.observation(s, a, odist)
49 |         o = pomdp.sample_observation(odist)
50 | 
51 |         b.update(pomdp, a, o)
52 | 
53 |         self.current_reward = r
54 |         self.current_state = s
55 |         self.current_observation = o
56 | 
57 |     # returns the current simulator belief
58 |     def get_screenshot(self):
59 |         if self.robs:
60 |             return np.array([self.current_observation])
61 |         else:
62 |             return self.current_belief.new_belief()
63 | 
64 |     # returns the current reward
65 |     def reward(self):
66 |         return self.current_reward
67 | 
68 |     # check if reached terminal states
69 |     def episode_over(self):
70 |         return self.pomdp.isterminal(self.current_state)
71 | 
72 |     def reset_episode(self):
73 |         pomdp = self.pomdp
74 |         self.current_state = pomdp.initial_state()
75 |         self.current_belief = pomdp.initial_belief()
76 | 
77 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tiger.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from copy import deepcopy
  3 | from tools.belief import DiscreteBelief
  4 | 
  5 | #################################################################
  6 | # Implements the Tiger POMDP problem
  7 | #################################################################
  8 | 
  9 | class TigerPOMDP():
 10 | 
 11 |     # constructor
 12 |     def __init__(self, 
 13 |                  seed=999, # random seed
 14 |                  rlisten=-1.0, rtiger=-100.0, rescape=10.0, # reward values
 15 |                  pcorrect=0.85, # correct observation prob
 16 |                  discount=0.95): # discount
 17 | 
 18 |         self.random_state = np.random.RandomState(seed)
 19 |         self.rlisten = rlisten
 20 |         self.rtiger = rtiger
 21 |         self.rescape = rescape
 22 |         self.pcorrect = pcorrect
 23 |         self.discount = discount
 24 | 
 25 |         # transition arrs
 26 |         self.tstates = [0, 1] # left, right
 27 | 
 28 |         # actions
 29 |         self.tactions = [0, 1, 2] # open left, open right, listen
 30 | 
 31 |         # observations arrs
 32 |         self.tobs = [0, 1] # observed on the left, observed on the right
 33 | 
 34 |         # belief and observation shape
 35 |         self.belief_shape = (2,1)
 36 |         self.observation_shape = (1,1)
 37 | 
 38 |     ################################################################# 
 39 |     # Setters
 40 |     ################################################################# 
 41 |     def set_discount(self, d):
 42 |         self.discount = d
 43 | 
 44 |     def set_rewards(self, rl, rt, re):
 45 |         self.rlisten = rl
 46 |         self.rtiger = rt
 47 |         self.rescape = re
 48 | 
 49 |     def set_listen_prob(self, pc):
 50 |         self.pcorrect = pc
 51 | 
 52 |     ################################################################# 
 53 |     # S, A, O Spaces
 54 |     ################################################################# 
 55 |     def states(self):
 56 |         return self.tstates
 57 | 
 58 |     def actions(self):
 59 |         return self.tactions
 60 | 
 61 |     def observations(self):
 62 |         return self.tobs
 63 | 
 64 |     ################################################################# 
 65 |     # Reward Function
 66 |     ################################################################# 
 67 |     def reward(self, s, a):
 68 |         r = 0.0
 69 |         rt = self.rtiger
 70 |         re = self.rescape
 71 |         if a == 2:
 72 |             r += self.rlisten
 73 |         elif a == 1:
 74 |             r = (r + rt) if s == 1 else (r + re) 
 75 |         else:
 76 |             r = (r + rt) if s == 0 else (r + re) 
 77 |         return r
 78 | 
 79 |     ################################################################# 
 80 |     # Distribution Functions
 81 |     ################################################################# 
 82 |     # returns the transtion distriubtion of s' from the (s,a) pair
 83 |     def transition(self, s, a, dist):
 84 |         if a == 0 or a == 1:
 85 |             dist[0] = 0.5
 86 |             dist[1] = 0.5
 87 |         elif s == 0:
 88 |             dist[0] = 1.0 
 89 |             dist[1] = 0.0
 90 |         else:
 91 |             dist[0] = 0.0
 92 |             dist[1] = 1.0
 93 |         return dist
 94 | 
 95 |     # sample the transtion distribution 
 96 |     def sample_state(self, d):
 97 |         sidx = self.categorical(d)
 98 |         return self.tstates[sidx]
 99 | 
100 |     # returns the observation dsitribution of o from the (s,a) pair
101 |     def observation(self, s, a, dist):
102 |         p = self.pcorrect
103 |         if a == 2:
104 |             if s == 0:
105 |                 dist[0] = p
106 |                 dist[1] = 1.0 - p
107 |             else:
108 |                 dist[0] = 1.0 - p
109 |                 dist[1] = p
110 |         else:
111 |             dist[0] = 0.5
112 |             dist[1] = 0.5
113 |         return dist
114 | 
115 |     # sample the observation distirbution
116 |     def sample_observation(self, d):
117 |         oidx = self.categorical(d)
118 |         return self.tobs[oidx]
119 | 
120 |     # pdf should be in a distributions module
121 |     def transition_pdf(self, d, dval):
122 |         assert dval < 2, "Attempting to retrive pdf value larger than state size"
123 |         return d[dval]
124 | 
125 |     def observation_pdf(self, d, dval):
126 |         assert dval < 2, "Attempting to retrive pdf value larger than state size"
127 |         return d[dval]
128 | 
129 |     # numpy categorical sampling hack
130 |     def categorical(self, d):
131 |         return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0]
132 | 
133 |     ################################################################# 
134 |     # Create functions
135 |     ################################################################# 
136 |     def create_transition_distribution(self):
137 |         td = np.array([0.5, 0.5])
138 |         return td
139 | 
140 |     def create_observation_distribution(self):
141 |         od = np.array([0.5, 0.5])
142 |         return od
143 | 
144 |     def create_belief(self):
145 |         return DiscreteBelief(self.n_states())
146 | 
147 |     def initial_belief(self):
148 |         return DiscreteBelief(self.n_states())
149 | 
150 |     def initial_state(self):
151 |         return self.random_state.randint(2)
152 | 
153 |     ################################################################# 
154 |     # Misc Functions
155 |     ################################################################# 
156 | 
157 |     def isterminal(self, s):
158 |         # no terminal state in model
159 |         return False
160 | 
161 |     def index2action(self, ai):
162 |         return ai
163 | 
164 |     def n_states(self):
165 |         return 2
166 | 
167 |     def n_actions(self):
168 |         return 3
169 | 
170 |     def n_obsevations(self):
171 |         return 2
172 | 
173 |     ################################################################# 
174 |     # Policies
175 |     ################################################################# 
176 | 
177 |     def optimal_policy(self):
178 |         def pol(b):
179 |             if b[0] < 0.04:
180 |                 return 0
181 |             elif b[0] > 0.96:
182 |                 return 1
183 |             else:
184 |                 return 2
185 |         return pol
186 | 
187 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # init file
2 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/belief.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from copy import deepcopy
 3 | 
 4 | #################################################################
 5 | # Implements Belief and Belief Updater
 6 | #################################################################
 7 | 
 8 | class DiscreteBelief():
 9 | 
10 |     def __init__(self, n):
11 |         self.bold = np.zeros(n) + 1.0/n
12 |         self.bnew = np.zeros(n) + 1.0/n
13 |         self.n = n
14 | 
15 |     def __getitem__(self, idx):
16 |         return self.bnew[idx]
17 | 
18 |     def __setitem__(self, idx, val):
19 |         self.bold[idx] = val
20 |         self.bnew[idx] = val
21 | 
22 |     def update(self, pomdp, a, o):
23 |        
24 |         # swap pointers
25 |         (bnew, bold) = (self.bold, self.bnew)
26 | 
27 |         sspace = pomdp.states()
28 |         
29 |         td = pomdp.create_transition_distribution()
30 |         od = pomdp.create_observation_distribution()
31 | 
32 |         # old belief is now new, new is fresh
33 |         bnew.fill(0.0)
34 | 
35 |         for (i, sp) in enumerate(sspace):
36 |             # get the distributions
37 |             od = pomdp.observation(sp, a, od)
38 |             # get the prob of o from the current distribution
39 |             probo = pomdp.observation_pdf(od, o)
40 |             # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero
41 |             if probo == 0.0:
42 |                 continue
43 |             b_sum = 0.0 # belef for state sp
44 |             for (j, s) in enumerate(sspace):
45 |                 td = pomdp.transition(s, a, td)
46 |                 pp = pomdp.transition_pdf(td, sp)
47 |                 b_sum += pp * bold[j]
48 |             bnew[i] = probo * b_sum
49 |         norm = sum(bnew)
50 |         for i in range(self.length()):
51 |             bnew[i] /= norm
52 |         (self.bnew, self.bold) = (bnew, bold)
53 |         return self
54 | 
55 |     def length(self):
56 |         return self.n
57 | 
58 |     def empty(self):
59 |         self.bold.fill(0.0)
60 |         self.bnew.fill(0.0)
61 | 
62 |     def empty_old(self):
63 |         self.bold.fill(0.0)
64 | 
65 |     def empty_new(self):
66 |         self.bnew.fill(0.0)
67 | 
68 |     def old_belief(self):
69 |         return self.bold
70 | 
71 |     def new_belief(self):
72 |         return self.bnew
73 | 
74 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/belief_momdp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from copy import deepcopy
 3 | 
 4 | #################################################################
 5 | # Implements Belief and Belief Updater
 6 | #################################################################
 7 | 
 8 | class MOMDPBelief():
 9 | 
10 |     def __init__(self, n):
11 |         self.bold = np.zeros(n) + 1.0/n
12 |         self.bnew = np.zeros(n) + 1.0/n
13 |         self.n = n
14 | 
15 |     def __getitem__(self, idx):
16 |         return self.bnew[idx]
17 | 
18 |     def __setitem__(self, idx, val):
19 |         self.bold[idx] = val
20 |         self.bnew[idx] = val
21 | 
22 |     def update(self, pomdp, x, a, o):
23 |        
24 |         # swap pointers
25 |         (bnew, bold) = (self.bold, self.bnew)
26 | 
27 |         yspace = pomdp.partially_obs_states()
28 |         
29 |         tdp = pomdp.create_partially_obs_transition_distribution()
30 |         od = pomdp.create_observation_distribution()
31 | 
32 |         # old belief is now new, new is fresh
33 |         bnew.fill(0.0)
34 | 
35 |         # iterate
36 |         for (i, yp) in enumerate(yspace):
37 |             # get the distributions
38 |             od = pomdp.observation(x, yp, a, od)
39 |             # get the prob of o from the current distribution
40 |             probo = pomdp.observation_pdf(od, o)
41 |             # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero
42 |             if probo == 0.0:
43 |                 continue
44 |             b_sum = 0.0 # belef for state sp
45 |             for (j, y) in enumerate(yspace):
46 |                 tdp = pomdp.partially_obs_transition(x, y, a, tdp)
47 |                 pp = pomdp.partially_obs_transition_pdf(tdp, yp)
48 |                 b_sum += pp * bold[j]
49 |             bnew[i] = probo * b_sum
50 |         norm = sum(bnew)
51 |         for i in xrange(self.length()):
52 |             bnew[i] /= norm
53 |         (self.bnew, self.bold) = (bnew, bold)
54 |         return self
55 | 
56 |     def length(self):
57 |         return self.n
58 | 
59 |     def empty(self):
60 |         self.bold.fill(0.0)
61 |         self.bnew.fill(0.0)
62 | 
63 |     def empty_old(self):
64 |         self.bold.fill(0.0)
65 | 
66 |     def empty_new(self):
67 |         self.bnew.fill(0.0)
68 | 
69 |     def old_belief(self):
70 |         return self.bold
71 | 
72 |     def new_belief(self):
73 |         return self.bnew
74 | 
75 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/models/tools/distributions.py:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | # Implements distriubtions for POMDP models
 3 | #################################################################
 4 | 
 5 | import numpy as np
 6 | from copy import deepcopy
 7 | 
 8 | class Categorical():
 9 | 
10 |     def __init__(self, n):
11 |         self.indices = np.zeros(n, dtype=np.int64)
12 |         self.weights = np.zeros(n) + 1.0/n
13 |         self.n = n
14 | 
15 |     def __getitem__(self, idx):
16 |         return (self.indices[idx], self.weights[idx])
17 | 
18 |     def __setitem__(self, idx, val):
19 |         self.
20 | 
21 |     def sample(self):
22 |         idx = self.quantile(np.random.rand())
23 |         return self.indices[idx]
24 | 
25 | 
26 |     def quantile(self, p):
27 |         k = self.n
28 |         pv = self.weights
29 |         i = 1
30 |         v = pv[1]
31 |         while v < p and i < k:
32 |             i += 1
33 |             v += pv[i]
34 |         return i
35 | 
36 | 


--------------------------------------------------------------------------------
/chimp/simulators/pomdp/sim_loop.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sys
 3 | from models.tiger import TigerPOMDP
 4 | from models.simulator import POMDPSimulator
 5 | 
 6 | #####################################################################
 7 | # This is a sample simulation loop for the DRL framework using POMDPs
 8 | #####################################################################
 9 | 
10 | # initialize pomdp
11 | pomdp = TigerPOMDP(seed=1)
12 | 
13 | # initialize and pass the pomdp into simulator
14 | sim = POMDPSimulator(pomdp) # state and initial belief automatically initialized
15 | 
16 | sim.n_states # number of states-input layer size
17 | 
18 | opt = pomdp.optimal_policy()
19 | 
20 | steps = 50000
21 | 
22 | rtot = 0.0
23 | 
24 | for i in xrange(steps):
25 |     # get the initial state
26 |     s = sim.get_screenshot()
27 |     # pick random action
28 |     #ai = np.random.randint(sim.n_actions)
29 |     # pick optimal aciton
30 |     ai = opt(s) 
31 | 
32 |     # progress simulation
33 |     sim.act(ai)
34 | 
35 |     # get reward and next states
36 |     r = sim.reward() # real valued reward
37 |     sp = sim.get_screenshot() # pomdp state, this is a belief
38 | 
39 |     print "Step: ", i
40 |     #print "Action ", ai, " Reward: ", r, " Screen Shot: ", sp
41 |     #print "Current State: ", sim.current_state, " Current Belief: ", sim.current_belief.bnew, "\n"
42 | 
43 |     rtot += r
44 | 
45 |     # check if reached terminal state
46 |     if sim.episode_over():
47 |         sim.reset_episode()
48 | 
49 | print "Total reward: ", rtot
50 | 


--------------------------------------------------------------------------------
/chimp/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Dummy File
3 | """
4 | 


--------------------------------------------------------------------------------
/chimp/utils/distributions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | #################################################################
 4 | # Implements helper functions
 5 | #################################################################
 6 | 
 7 | def categorical(p, rng):
 8 |     """
 9 |     Draws multinomial samples from distribution p
10 |     """
11 |     return np.argmax(rng.multinomial(1,p))
12 | 
13 | def softmax(z):
14 |     """
15 |     Computes softmax values for each Q-value in x
16 |     """
17 |     # TODO: extend to multi-dimensional input? 
18 |     ex = np.exp(z - np.max(z)) 
19 |     return ex / np.sum(ex)
20 | 


--------------------------------------------------------------------------------
/chimp/utils/policies.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from chimp.utils.distributions import *
 4 | 
 5 | #################################################################
 6 | # Implements DQN controllers
 7 | #################################################################
 8 | 
 9 | 
10 | class DQNPolicy():
11 |     """
12 |     Class that handles policies generated by the DQN
13 |     """
14 | 
15 |     def __init__(self, learner):
16 |         self.learner = learner
17 | 
18 |     def action(self, obs):
19 |         """
20 |         Returns the actions with the highes Q value given observation obs
21 |         """
22 |         q_vals = self.learner.forward(obs)
23 |         return np.argmax(q_vals)
24 | 
25 | 
26 | class StochasticDQNPolicy():
27 |     """
28 |     Generates actions stochastically according to Q-vals
29 |     Network output is turned into probs using softmax
30 |     """
31 | 
32 |     def __init__(self, learner, seed=None):
33 |         self.learner = learner
34 |         self.rng = np.random.RandomState(seed)
35 | 
36 |     def action(self, obs):
37 |         """
38 |         Returns the action according to probs generated by taking softmax over Qs 
39 |         """
40 |         q_vals = self.learner.forward(obs)
41 |         q_probs = softmax(q_vals)
42 |         return categorical(q_probs[0], self.rng)
43 | 
44 | 
45 | class EpsGreedyPolicy():
46 |     """
47 |     Epsilon greedy policy
48 |     """
49 | 
50 |     def __init__(self, policy, n_actions, eps, seed=None):
51 |         self.polciy = policy
52 |         self.n_actions = n_actions
53 |         self.eps = eps
54 |         self.rng = np.random.RandomState(seed)
55 | 
56 |     def action(self, obs):
57 |         if self.rng.rand() < self.eps:
58 |             return self.rng.randint(self.n_actions) 
59 |         else:
60 |             return self.policy.action(obs)
61 | 
62 | 
63 | class RandomPolicy():
64 | 
65 |     # constructor
66 |     def __init__(self, n_actions, rng = np.random.RandomState()):
67 |         self.rng = rng
68 |         self.n_actions = n_actions
69 | 
70 |     def action(self, obs):
71 |         return self.rng.randint(self.n_actions) 
72 | 
73 | 
74 | class OneStepLookAhead():
75 | 
76 |     # constructor
77 |     def __init__(self, simulator, n_rollouts=100):
78 |         self.simulator = simulator
79 | 
80 |     def action(self, obs):
81 |         # run each action n_rollouts times, take the highest average
82 |         pass
83 | 
84 | 
85 | class SingleAction():
86 |     """
87 |     Dummy single action policy
88 |     """
89 | 
90 |     def __init__(self, a):
91 |         self.a = a
92 | 
93 |     def action(self, obs):
94 |         return self.a
95 | 


--------------------------------------------------------------------------------
/examples/atari_tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# be sure to have run ' python setup.py ' from chimp director"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Training DeepMind's Atari DQN with Chimp"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "Load Chimp modules"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "from chimp.memories import ReplayMemoryHDF5\n",
 37 |     "\n",
 38 |     "from chimp.learners.dqn_learner import DQNLearner\n",
 39 |     "from chimp.learners.chainer_backend import ChainerBackend\n",
 40 |     "\n",
 41 |     "from chimp.simulators.atari import AtariSimulator\n",
 42 |     "\n",
 43 |     "from chimp.agents import DQNAgent"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "Load Python packages"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 3,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "%matplotlib inline  \n",
 62 |     "import matplotlib.pyplot as plt\n",
 63 |     "\n",
 64 |     "import numpy as np\n",
 65 |     "import random\n",
 66 |     "import chainer\n",
 67 |     "import chainer.functions as F\n",
 68 |     "import chainer.links as L\n",
 69 |     "from chainer import Chain\n",
 70 |     "import os\n",
 71 |     "\n",
 72 |     "import pandas as ps"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "Set training parameters"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 4,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "settings = {\n",
 91 |     "\n",
 92 |     "    # agent settings\n",
 93 |     "    'batch_size' : 32,\n",
 94 |     "    'print_every' : 5000,\n",
 95 |     "    'save_dir' : './results_atari',\n",
 96 |     "    'iterations' : 5000000,\n",
 97 |     "    'eval_iterations' : 5000,\n",
 98 |     "    'eval_every' : 50000,\n",
 99 |     "    'save_every' : 50000,\n",
100 |     "    'initial_exploration' : 50000,\n",
101 |     "    'epsilon_decay' : 0.000005, # subtract from epsilon every step\n",
102 |     "    'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions\n",
103 |     "    'epsilon' : 1.0,  # Initial exploratoin rate\n",
104 |     "    'learn_freq' : 4,\n",
105 |     "    'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r)\n",
106 |     "    'model_dims' : (84,84),\n",
107 |     "    \n",
108 |     "    # Atari settings\n",
109 |     "    'rom' : \"Breakout.bin\",\n",
110 |     "    'rom_dir' :  './roms',\n",
111 |     "    'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square\n",
112 |     "    'action_history' : True,\n",
113 |     "\n",
114 |     "    # simulator settings\n",
115 |     "    'viz' : True,\n",
116 |     "    'viz_cropped' : False,\n",
117 |     "\n",
118 |     "    # replay memory settings\n",
119 |     "    'memory_size' : 1000000,  # size of replay memory\n",
120 |     "    'frame_skip' : 4,  # number of frames to skip\n",
121 |     "\n",
122 |     "    # learner settings\n",
123 |     "    'learning_rate' : 0.00025, \n",
124 |     "    'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used\n",
125 |     "    'discount' : 0.99, # discount rate for RL\n",
126 |     "    'clip_err' : False, # value to clip loss gradients to\n",
127 |     "    'clip_reward' : 1, # value to clip reward values to\n",
128 |     "    'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations\n",
129 |     "    'optim_name' : 'RMSprop', # currently supports \"RMSprop\", \"ADADELTA\", \"ADAM\" and \"SGD\"'\n",
130 |     "    'gpu' : True,\n",
131 |     "    'reward_rescale': False,\n",
132 |     "\n",
133 |     "    # general\n",
134 |     "    'seed_general' : 1723,\n",
135 |     "    'seed_simulator' : 5632,\n",
136 |     "    'seed_agent' : 9826,\n",
137 |     "    'seed_memory' : 7563\n",
138 |     "\n",
139 |     "    }"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "metadata": {},
145 |    "source": [
146 |     "You may want to set a smaller number of iterations (like 100000) - for illustration purposes. We set the GPU option to True, turn it off if your machine does not support it. Be sure to have the requested rom in the indicated directory."
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 5,
152 |    "metadata": {
153 |     "collapsed": true
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "# set random seed\n",
158 |     "np.random.seed(settings[\"seed_general\"])\n",
159 |     "random.seed(settings[\"seed_general\"])"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "Now we initialize the simulator first, as we need to use some information it provides - e.g., number of actions."
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 6,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "Original screen width/height: 160/210\n",
181 |       "Cropped screen width/height: 84/84\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "simulator = AtariSimulator(settings)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "Here we define the convolutional network, in a format required by Chainer - the deep learning library we use."
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 7,
199 |    "metadata": {
200 |     "collapsed": false
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "#Define the network\n",
205 |     "class Convolution(Chain):\n",
206 |     "\n",
207 |     "    def __init__(self):\n",
208 |     "        super(Convolution, self).__init__(\n",
209 |     "            l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),\n",
210 |     "            l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),\n",
211 |     "            l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),\n",
212 |     "            l4=F.Linear(3136, 512, wscale = np.sqrt(2)),\n",
213 |     "            l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)),\n",
214 |     "        )\n",
215 |     "\n",
216 |     "    def __call__(self, ohist, ahist):\n",
217 |     "        if len(ohist.data.shape) < 4:\n",
218 |     "            ohist = F.reshape(ohist,(1,4,84,84))\n",
219 |     "        h1 = F.relu(self.l1(ohist/255.0))\n",
220 |     "        h2 = F.relu(self.l2(h1))\n",
221 |     "        h3 = F.relu(self.l3(h2))\n",
222 |     "        h4 = F.relu(self.l4(h3))\n",
223 |     "        output = self.l5(h4)\n",
224 |     "        return output\n",
225 |     "\n",
226 |     "net = Convolution()"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "We then initialize the learner + chainer backend, replay memory, and agent modules."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 8,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "backend = ChainerBackend(settings)\n",
245 |     "backend.set_net(net)\n",
246 |     "learner = DQNLearner(settings, backend)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 9,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "memory = ReplayMemoryHDF5(settings)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 10,
263 |    "metadata": {
264 |     "collapsed": false
265 |    },
266 |    "outputs": [],
267 |    "source": [
268 |     "agent = DQNAgent(learner, memory, simulator, settings)"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "markdown",
273 |    "metadata": {},
274 |    "source": [
275 |     "Now let the agent train."
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {
282 |     "collapsed": true
283 |    },
284 |    "outputs": [],
285 |    "source": [
286 |     "agent.train()"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "# Visualizing results"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "First, let's visualize the training and evaluation results."
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": 11,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [],
310 |    "source": [
311 |     "train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n",
312 |     "train_stats.columns = ['Iteration','MSE Loss','Average Q-Value']"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 12,
318 |    "metadata": {
319 |     "collapsed": true
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n",
324 |     "eval_stats.columns = ['Iteration','Total Reward','Reward per Episode']"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 13,
330 |    "metadata": {
331 |     "collapsed": false
332 |    },
333 |    "outputs": [
334 |     {
335 |      "data": {
336 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEPCAYAAACHuClZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXmcHGWd/z/fyTGTyTAJ5CIhCZNwJJAAAUSOoGk8EFbQ\nCCJEFMKiyyrnqiisyrGrrC7+WGEFFEUCIuoKIoqC4UiLIGdCIDeEMBNCJoeZkExmckwy398f33pS\n1dVV1dV19DH9fb9e/Zqu6u6qp5+pfj71vZ6HmBmKoiiKEoe6cjdAURRFqX5UTBRFUZTYqJgoiqIo\nsVExURRFUWKjYqIoiqLERsVEURRFiU2qYkJEY4noaSJaQkSLiOhya//1RLSGiBZYj9PSbIeiKIqS\nLpRmnQkR7Q9gf2ZeSERNAOYD+CSAcwF0MvMtqZ1cURRFKRn90zw4M68DsM56vo2IlgE4wHqZ0jy3\noiiKUjpKFjMhohYA0wC8aO26jIgWEtHPiGhIqdqhKIqiJE9JxMRycT0I4Epm3gbgDgATmXkaxHJR\nd5eiKEoVk2rMBACIqD+ARwE8xsy3erx+IIA/MvORHq/pxGGKoigRYOaShhJKYZn8HMBSp5BYgXnD\nWQAW+32YmfXBjOuvv77sbaiUh/aF9oX2RfCjHKQagCei6QDOB7CIiF4FwAD+HcBniWgagF4ArQAu\nSbMdfYHW1tZyN6Fi0L6w0b6w0b4oL2lncz0HoJ/HS4+neV5FURSltGgFfJUwe/bscjehYtC+sNG+\nsNG+KC+pB+DjQERcye1TFEWpRIgI3AcD8EoCZLPZcjehYtC+sNG+sNG+KC8qJoqiKEps1M2lKIrS\nx1A3l6IoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+\nsNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGai\nKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoP\nttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEps\nNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhU\nCeoPttG+sNG+sNG+KC8qJoqiKAmwdi1w883lbkX5SDVmQkRjAdwHYBSAXgA/ZebbiGhfAL8BcCCA\nVgCfYeYtHp/XmImiKFVBNgt8/evASy+VuyV9M2ayG8BXmHkKgBMBXEpEkwFcA+BJZp4E4GkA16bc\nDkVRlFTp6QF27ix3K8pHqmLCzOuYeaH1fBuAZQDGAvgkgHutt90LYGaa7egLqD/YRvvCRvvCptx9\nsWuXiklJIKIWANMAvABgFDOvB0RwAIwsVTsURVHSoKcH2LGj3K0oH/1LcRIiagLwIIArmXkbEbkD\nIb6BkdmzZ6OlpQUAMHToUEybNg2ZTAaAfSdSC9uZTKai2qPblbNtqJT2lGvb7CvX+V99NYutWwGg\n9OfPZrOYM2cOAOwdL0tN6kWLRNQfwKMAHmPmW619ywBkmHk9Ee0PYB4zH+bxWQ3AK4pSFfzyl8Dl\nlwMdHeVuSd8MwAPAzwEsNUJi8QcAs63nFwJ4pATtqGrcd6G1jPaFjfaFTbF9sXw58JOfJHd+jZmk\nCBFNB3A+gA8R0atEtICITgPwfQAfJaIVAD4M4HtptkNRFMXN/PnAHXckd7xaz+bSubkURalJfvxj\n4MorgW3bgAED4h/vRz8SN9fu3UC/fvGPF4e+6uZSFEWpODo7xTX15pvJHK+nR/7WqnVSUExI+BwR\nXWdtjyei96ffNMWJ+sZttC9stC9sTF/Mmwf8138Vfn9np/xdtCiZ8+/aJX9VTPy5A1K9Psva7gRw\ne2otUhSlaFpbgd/9rtytqAzmzwcWLiz8vs5OoLk5OTFRy6QwxzPzpQB2AAAzbwYwMNVWKXk4c+lr\nHe0LG9MXf/0rcPfd5W1LuTF90d5uWwlBbNsGnHBC8pZJrRYuhhGTHiLqB6uwkIhGQCZtVBSlQti0\nyXbb1Drt7eGsg85O4MQTgcWLkzmvWiaFuQ3AwwBGEtF3ATwL4KZUW6Xkob5xG+0LG9MXHR1yp13L\nmL4oRkyOPlren0Tf1XrMpOB0Ksz8SyKaD6kHIQAzmXlZ6i1TFCU0apnYtLcDI0YUfl9nJzB0KDB5\nMrBkCXD88fHOW+uWia+YENF+js0NAH7lfI2ZK2DSgNpB4wQ22hc2pi82bVLLxBkzaW4u/P7OTmCf\nfYCpUyVuEldM1DLxZz4kTkIAxgPYbD0fCmA1gAmpt05RlFB0dKhlAgDd3cDWreEG9G3bREyOOCKZ\nIHytWya+MRNmnsDMEwE8CeBMZh7OzMMAnAFgbqkaqAgaJ7DRvrAxfbFpE9DVBfTWcGpMNpvFunXy\nPEw2l7FMjjgimSB8rVsmYQLwJzDzn80GMz8G4KT0mqQo0fjgB2vX1bNpk/zt6ipvO8pNezswalT4\nALxaJskRRkzWEtG3iKjFenwTwNq0G6bkonECG7++ePllGUxqCdMXHR1AU1Ntu7oymQza24EDDyw8\noO/ZI/UgjY3AmDEyn9b69fHOv2sX0L+/ikkQswCMgKQHPwxZFXFW4CcUpcQwy+CwcWO5W1J6du6U\ngWz06NoWEwB7xaSQm2vbNmDwYIBIHklYJz09Yulo0aIPzNzBzFcC+CCADzDzlZrJVXo0TmDj1Rdm\n8Kg1Mclms9i0CdhvPxnIatXNB0hftLcDLS2FrQMTfDdMnRo/brJrl1iHapn4QERHENGrABYDWEJE\n84loavpNU5TwmLvBDRvK245y0NEBDBumbi4Aod1cJl5iSNIyUTHx5ycAvsLMBzLzgQC+CuCudJul\nuNGYiY1XXxgxSdIyufji+H70tMlkMti0ScRkn31qW0ycMZNdu8T16UcaYrJrl4pJIQYz8zyzwcxZ\nAINTa5GiRCANMfnLX4DXXkvueGmhbi6b9nbggAMkEG6yq7zo7BRLzjB1KrB0abzUarVMCrOKiL7t\nyOb6FoBVaTdMyUVjJjZefZGGmHR2AitXJne8NMhms+rmsjAxk9Gjgfr64EHdbZkMGSJ/46RWa8yk\nMP8Myeb6nfUYbu1TFADA3LnAXWV2fCYtJsxyl//WW8kcL03UzSXs3g28957MyzVwYHBGl1tMAEkT\n7u6Ofn61TArAzJuZ+QpmPgbAcQCus9Y0UUpIJcdM/v534LnnSnc+v5jJgAHJiUl3t7g8Kt0yMTGT\nqG6uww+XAH5f4LDDMhg+XNZfL2SZuLO5AGDQIGD79ujnV8ukAET0ABE1E9FgAIsALCWiq9NvmlIt\nrF1b/jviHTuAsWOTy+Yyg3I1WCZR3VzbtwPLlgFvvJFe20qJcXEBxbu5ALVM4hLGzXU4M28FMBPA\nY5AJHj+faquUPCo5ZtLeXlox8YuZjBsnlklQFk9YOjuBkSOBVasqe74rU2cSxc1lZguoBsEMw9y5\nWey/vzwP4+ZyBuCB5CwTLVr0ZwARDYCIyR+YuQfWqouKAlSOZTJ0qAwiSbSls1PucpubK3+Klqhu\nrrXWpEh9RUw2bYpnmQwapJZJHMLWmbRC0oGfIaIDAWxNs1FKPpUcMym1mPjFTBoaJPiaRNzEDDYH\nHVTZg20mk4ns5mpvl6lEKj0uFJbm5kxsN5fGTKITJgB/GzMfwMz/xEIbgFNK0DYlQc46C3un506S\n3bslTlEJlkktigkQPZtr7Vop1qv07xcWZ8ykkJvLLwCvlkl0fMWEiD5n/f2K+wHgipK1UAEQP2by\nt78Ba9Yk0xYnGzZIFlUlxEzSEJODDy79nfvddwNtbeHeO29e9Lm52tuBD3yg74jJokXZslsmKibe\nmCr3fXweSpXADGzeDGzZkvyx29tlwO3sTCbwHRUjJiNHJpPRZe5cS22Z9PYC114LPPtsuPeblOiG\nhmhurmOOkZUJy21ZJkESMZOoYsIsVnotu7l8l+1l5p9Yf28sXXNqF3MxDhjg/XqcmElnp6zfkIaY\nrF0rs7S++aYMbIMGxT9mlL4wYtLQUN2Wyfz50v6wVuThh2cwbJg8j+LmOuAAYOJEyVo76qji21tJ\ndHVlQru5vLK54qQG9/TI9VpIxPoyYepMJhLRH4loIxFtIKJHiGhiKRpXSzz5JHDOOekc2xSlpSUm\nY8YkW339pz8Bny8y+Xz79uTdXE1NpbdMHn9cMsjCiolxcQHR3FyjR1dHXKgQvb0yKadJDS61ZaJi\nEi6b6wEA/wdgNIAxAH4L4FdpNqoWWb48eACJEzOJKiZ/+ANwzz3B7zEDUpJismEDsGSJ/+uljJkM\nGyYDVamqxB97DPjsZ8OLybx52b2WyeDBIiZh3Y3t7XIjUI64UNJ0dAD19VnU18t2lAr4OJbJrl1i\nDamYBNPIzL9g5t3W434ADWk3rNZobU1vwIoqJi+/LIISRBqWSWenuF2KicGkJSZEpRtsOzpkgaZZ\ns8KLyZYt2Csm/fvLYBZmQNyxQwbUYcP6hmXS3m73AxBtbq6kLBMtWvTnMSK6xpox+EAi+jqAPxPR\nfkS0X9oNrBVaW8Vl4UecmElUMdmyJdhCAERMkrZMOjtlQPRbS6QUdSbOO9dSDbZPPAF88IMiXmHF\nZP/9M3vdXEB4V9e6deISIpLvl7ZYMqfbh5IIktm7HWQhONd/d1KtlsnOnUAlTJDhG4B38Bnr7yWu\n/edBKuE1fpIAbW2SVWPucJKko0OOGUVM3nrLHqi9MK6SpMUEEOvE+MALkXQ2l/POtVRi8thjwOmn\nA6NGyY2FGaCCMAWLBpPRNWpU8OecNRkHH5z+93vzTeDYY0XEBqewGpLz+wDBg7pz/Xcn1Roz2bgR\n+Nzn0kn9L4YwRYsTAh4qJAnR2ioXo5+rK27MZPz4aGLS2wusWOH/nrTcXID/ABcmZhI3TdmZ7VMK\nN1dvrwTfTz9dZr3df/9w07gsXJjNEZOw/wdjUQJybbS3pzsIbtsmjwcfLPzeKDcD7e1AT09273aQ\nm8vLxQXEK1o0wt/QUHox2b49mSzKuAQVLX7d8fwc12s3pdmoWmPbNrmIJ04MdnVFZfNmYMKEaGIy\nbJi/q2v3bmnvyJHJi8n48WKZhMWISWMjUFcXb5Ej04ZSWiavvSZZXBOt27OxY8PdaW7dikhuLmNR\nAnITM3as3NAYVq0Cvv/90M0vSHe3iOTPfx78vrfflmu12PihO2YSZCH4iUmcosVyWiYVLyYQN5bh\nWtdrp4U5OBHdTUTrieh1x77riWgNES2wHqGO1Zdpa5N1q4cP9xeTuDGTqGJy0kmynKkXGzbID7h/\n/+TF5Kij/AfwoJgJkEzcxDnglMIyMS4uQ1gxGTgw4+nmKoTTMgHyXV133imV+EmxfTswfbpkLb75\npv/7VqwQ4Sn23FLNn9m7XcjNlZZl0r+/WJl79kQ7ThSqQUzI57nXth/3APiYx/5bmPkY6/F4yGP1\nWVpbpfBv2LB0LJOODjl+sWLy3nsiJn6WiXNAam5OTky2bRMxiWKZAMmLyZgx0hdxrZ0gHn8cOM1x\nWxVWTMy8XIawou6OMTitr927gfvvB955J7lZDbq7ZVbnz30OmDPH/31vvgkcdxxw++3FDcitrWLN\nGqK4uZKwTIhKb51Ug5iwz3Ovbe8DMD8LwGtVxrBiVBMYyyRITOLGTNKwTJyukqQtk2nTosVMgGTE\nxHn3Wlcn/VeMuBVDdzewYAHgNLjCisk772Rju7mA3IyuuXPl5qOxEfjHP/I/+9OfBguCF93dMuBd\ndBFw773+QrFyJXDeeSJ0f/xjuGMzi0WzcWN2775Cbi539TsQLwDvTJZQMcnnKCLaSkSdAI60npvt\nI2Ke9zIiWkhEPyOiITGPVfVUomXCLP74970PWL3aO3feaZkkLSaTJok1ENbt4BSTuBldZv13Z9ZR\nmq6ud9+V7CvngFBMzCRpN9ecOcDs2XKnv3p1/mefegq47jq5Gw/L9u0iTlOniog98YT3+958Ezjk\nEOCKK4Dbbgt37HXrZCAf4hhJosZM4k6nUujcaVDxYsLM/Zi5mZn3Yeb+1nOzHSd59Q4AE5l5GoB1\nAG6Jcaw+QWtrYcskbsxk7Fi54IMKuZx0d8uPo7FRgsJeGV0mkwtIXkyam0UAvayBtGMm3d0yIPR3\nJM6nWYuxfn1+Kq+XmPT2AjfdZK/82NsLdHdnsO++9nuKcXN5WSYdHWKZnHuuiMk77+R/9u23pb9/\n85tw3w+QPjV1Hf/8z/6B+JUrRdjOPlviK4sWFT72ihXA5Mm510XUbK6kLJNSFi4aq6/chKkzSRRm\ndv7Mfwog0JidPXs2WlpaAABDhw7FtGnT9l40xt1R7dttbRm0tADz52exbBkAJHv8jg4J0jY2ZvHn\nPwMzZxb+/JYtQENDFtmsTCa4dCmweXPu++fPz+LQQ6W9++wDvP22vD9uezs75XhDh2bxyCPA1KmF\nP79jh6TJrl8PjBiRwcaN0c8/ebKc3/n6pEnAH/+YxXHHJf//37Qpg1Gjcl8fOxZYuTK3P++/P4tv\nfhOYPj2DGTOARx/NoqEB6N/fPt6GDcC++waf76ST5P+7eHEWdXXyukz2mMUNNwCnn57B0KFAXV0W\nTz2Vf728/XYG3/secP31WRxwAHDKKfbrK1cCF1+cAVHu+bu7xQ2VzQLnnZfBNdcAjzySxZAhdvue\neiqL1lZg4sQMBg4EPvaxLK69Fnj00eDvs3y5/H+cr9fXA6tXe1+P27bl/38BuX42bwai/P56eoAt\nW+R89fUZq5Aw/OfjbG/fnsF772Uxe/YcANg7XpYcZk71AaAFwCLH9v6O5/8G4IGAz3ItMHIk89q1\nzA89xDxzpvd75s2bF+nY3d3M9fXMvb3MEyYwv/lmuM8tXco8aZI8v/565m9+M/89H/848yOPyPOn\nnmKeMSNSE3Po7WWuq2PetYv58suZb7kl/z1efTFqlPQhM/PPf8584YXR2/DGG8wHHZS775lnmE84\nofhj7dlT+D233858ySW5+3btYh4wgLmnx953//2yb/Zsu52jR8/L+dyPfsT8pS8Fn6+tjfmAA/L3\njxnDPG4c8+OPy/b3v8/81a/mvqezk7mhQb7XlCnMc+farz30EDMR8+LF+ce+4Qbm666zt08/nfnh\nh3Pfs3Il84EH2tvr1jEPHcq8aVPw97nqKuabb869Ln77W+azzvJ+/7e/zXzjjfn7OzqYhwwJPpcf\nDz5on2/KFObXXot2nCjceivzZZfl7rPGztTHd+cjzHQqkSGiBwD8HcChRLSaiC4C8N9E9DoRLQQw\nwxKUmmX7dolljBqVTsxk82apQyASn3LYuMmWLZJ9AwCHH+4dhE8jAG/W5xgwQFwvYYPeSbq5vFJH\nTR8Uk920ZAkwZUrhz5ipTZwMGCCp4s7VMV95BbjsMuDhh6WNHR3iDnQS5v/gdnEZDjpIAuMf+Yhs\ne7m5jEu2rg742teAm2+W/c8/D1xyiXwPrwQAp5sLkGr4+fNz32PiJYZRo4AjjwQWLgz+PsuXS4zN\nSRQ3V1Ixk1IXLlZ8zAQAiKgfEc2LenBm/iwzj2HmemYez8z3MPMFzHwkM09j5pnM7DMDU23Q1iY/\n2rq6dGImHR3Y61MvRkzee88OaE6Z4p0enEYA3vlDnzjRO6MrTMwkTgDea7AZNkx+sO++G/44P/uZ\nDHSFBNErZgLkx01eeQU44wxZHfGhh+RaaWnJ5HymqalwNpc7LdgwaRJwwQVSXAgA48blB+BbWyWz\nDZDZjZcular2s84C7rtPjuE1ILv9+n5icvDBufsGDy4cx/CKmUTJ5ho4UNKid+8OPp8Xms1VQEyY\neQ+AXs24Sg9zpwekY5l0dNgV0sVaJkZMDjlEBhXnD2T3bkkbNYNgGmLiZZl43eUzS9vM9ONxLRO/\nwcZPVL3YuVNqNd73PuCFF4LfG0ZM9uyRO/RjjpFMqzlz8mtMgHD/B3cml+EHPwCuv97e9srmMhXq\ngAyeV1wh6/DceKMUXTY2etfjmGwugxET5/9z5cpcywQoHBTfvl2+j2mTIUo2F1H0WhPN5go3a/A2\nAIusavbbzCPthtUKbW2StQTIwNDR4T1gmmBbsSQhJgMHyo/VmdG1YYO4YUzGUxpiMmGCiK2zJuEz\nnwFuvDGb85mdO6WNddbVPHJkfDHxGmyKEZPf/14KL887LxkxWbFCXEhDhwJnnilT1c+fD2zfns35\nTBw315AhuRN6jh4tNwzOFGCnmADApZeK2+1f/kW2Bw/2FhO3m2vsWPnrtLy8LJNBg4Izo1auFAu2\nf//c30iQm8uvAt6cL4qYqGUSTkx+B+DbAJ4BMN/xUBLA1JgA9hTWSa7HnYSYAPkDqfvutqlJBgyT\nthoV50A+aJAI7Nq1sr1qlbh33PM2uWc1HjzYpM3Gb4MTv9iRF3ffDXzhC8AJJyQjJq+8IlYOINfJ\nrFmSXuuOmYRxc/lZJm7695d2OV17bjEZPBiYOTN3209MnAMeUb6ry8syaWgIHty94iVANMsEiD6l\nilom4WYNvhey0uILzHyveaTftNrA6eYC/F1dcWImSYmJcyB11pgAYhU0Nha3bKwX7h+6M25y++3y\ngx0zJpPzGbeYEMVzdcW1TN5+WyraZ84Ut9TSpf4DInPucrNO3GJy7LH2a7NnSzuPPTaT85k4lokX\nbleXW0zcDB7sPRi73VxArpjs3i3ncR+7kKVg4iVAcTETPzGJ6uYqt2Xi7ttyEGYN+DMBLATwuLU9\njYgKrL+nhMXp5gJk4E8ybmKyuYB4YnL44bkDqVcQNwlXl5eYrFolIjVnjszt5BYsr/VWgsTkhhuC\n7z793CBhM7ruuQc4/3xp06BB8rkFC/zPBXjHaJxiMn++bZkAwNFHA0ccIa5GJ2HFJIxlAkgQ3mR0\nMRcWE7+YidvNBeSKSVubCKr7/1jIzeVnmRTK5vLqb3O+JGImpSxarBrLBMANAN4P4D0AYOaF0AWx\nEsPp5gL8LZM4MZMo2VxuMZk6FXjxRdvl4bZMgGTExD2QmwkI778fmDFDrINly7I5n/ETE6+Mrp4e\n4LvfBd54w78NfoNNmIyuPXtETC6+2N4X5Oryc3EBtpjs3i1T1B9zjP0aEfDII1LY6cS4uYIEL6yb\nC8i1TDZbs+w5K+7dhI2ZACImr7wibXWnBRsKubmclonzNxLHMoni5iq3ZVItYtLDzO4hKKZnXAHk\ngtu0KfeHnXRGl9vNtXVruM8560wA4LDDgC99SQa0Bx/0dpUkZZk4B3Lj5rrtNuDyy+U19+BSjGWy\nerUMzkHzXgUNNoVcXXPnyv/zyCPtfVHFZMwYGfiXLAEOOCA/PjJhQv4gYhIR/Aaz3btFFEaO9P8O\nTpxiYqwS9wqFTvzExGvAGztWjrVmjT2NipsgS8FM8Jh0zESzuaIRRkyWENFnAfQjokOI6H8hhYhK\nTFavlh+UyesHKidm4qwzAeRH/61vyUyu114LPPBAadxcBx0EPPqo9FEmI2LS3JzJ+YyXmHjVSAD2\n/FpRxcTt7nMzd67UXDiJKiYNDSLojz2W6+Jy4nVdBP0f1q8X15jzmgvC6eYq5OIC/GMmXpaJMwjv\nZ5kEubna2+V1Yyk5+8LPzbVnjwz0fjGGJCwTLVr053IAUwDsBPArAFsBXJVmo/oSu3b5++7dwXcg\nfcskqpvL8P73A6++CnzlK8Dxx+e+5jWIrV9fXBGYV8ykq0vqGYi8s5W8xGTyZPGnuzFi4jWBoV8b\nnLgTEdy88oqsx+Fk4kRpo5eABYkJIDcbv/+9v5h4EZTRtXp1eBcX4G2ZBFFMzATIFRMvyyTIzeUX\nLwH8rYNt26R//KwrtUyiEyabq5uZvwngwwBOYeZvMnMJw0vVze9+B3zxi96vuYPvQDoxkyTFBJAf\n43/8R/4g6CUm558P/OUv4dvrHshHjpTMpfPPt8+9Zk025zPFisnhh6fj5nIWFjohEuvkxRfzP+M1\nlYqTsWPlc85MLide14X7/9DVJZbkpz4FfOxjwCc+4X8+N04xcVa/+1GMmwuwxcQrLRgIHtyd8RIg\nXMwkKPgOaMwkDmGyuY4jokUAXocUL75GRD6XtuLmnXfkjs4Ld/AdqHzLJAgvMWlrE3dEWNwDOZEE\ntM1dbdiYyaRJEmR3B6JXrgROOSVYTIKK2oIyut54Q8TPK0Dt5+oKY5kQSfZWWNz/h3/9V+AnP5FU\n5dWrc6vcC7HvvnLXvXVreDeXW0yY/adJP/ZY4KWXvNOCgWA3V5Bl0q+fnNe9CFfQjYI5n1om0Qjj\n5robwJeZuYWZWwBcClmOt2r5xz/kx10K2tu9ffeAv5vLXZQHRIuZ7N4tP2wTuA0rJmZhLHfAtxDu\nQYxZMp+KmSer0I+9qQmoq8vk7PMSkyFD5L3uzKuVKyX2Usgy8bt7Dcromj/f34KIIyaTJ/v3idd1\n4XZzPfsscNddwIUX5iZVhIHInvDx7bfzb37ceMVMdu2SAkjn+jAGEzP0SgsGgt1cbsvE2Rd+y+cW\nur7UMolOGDHZw8x/MxssS/FGmAqtcli/Xu6GSvEPX7tWgtleWVReboMkLRMTRDfTjAweLN+50Ap5\nXV3ygzB3WmFxi8nmzXKhry9iKs8gqwAIHzMB8l1de/bIgPjBDwavb15owPFzdQWJyXHHSazJ3feF\nxOSoo2TOq2Jw/h82bpSbEy8XUljGjxcL08uSduMVMwka7EwQ3iteAgRbCkGWCeA9qBe6vtQyiU4Y\nMfkrEf2EiDJENIOI7gCQJaJjiOiYgp+uQLZulYGkrS39cxkXj9+KdWnGTJwuLkB+uM3NhdOD3WnB\nYXGLibl7L9YyCfJpNzXZi3QZworJmjWSyTRypNxFmroJrzYEDTh+GV3OKU/cNDfLjcPrr+fuLyQm\nH/848P/+n//rhWImRuDqwvzSfRg3Dnj5ZTlu0P8G8HZz+QXfDccd5y8Kfm6uHTsk3uT8/bj7wiuj\nK4ybKwns5HRkAAAgAElEQVTLpFRFi8yVIyZhVlo8yvrr9rQeDYABfCjRFpUA80N7+21YKwWmR3u7\nuLJWr5Y7WsPOneJuO+CA3PcnaZm4xQSwXV3u2WadRImXAPlismaN/MCSdHN5TUnuJyaTJuVOTums\nZRg3Ttrn7h9mGQyDBs0pU0Q4nPgF350cd1z+tCiFAvBRcFpvL7+cn11WLOPHA9ls4XgJEE1Mrr7a\nP+PPz81lrG4v15khqpurGEvaUC7LZOdOOW+cm4WkCJPNdUrAo+qEBLAHvNbW9M/V3i4ptO64SVtb\nfo0JID+Q7u58d0iUmEmQmAThrjEJi5dlcsQRxf04C/3YBw4EiDI5d5xhLROnmHitsQ5I3zc0BNdh\nTJkis/Y6CQq+G445JndiQ1OpXuhuP4hCdSZJiMm4cbL4VVgxcd/ZF7pzbmryt4T93E5eAuXuCz8x\nCervSpo1+L33JAU/iEqxSoBwbq4+h9MySfs8vb0y+AQtMuSESAYkryB8sUQVk6Qsk3fflQE0ScvE\nq9Zkxw7vH1QhMfFyPRYabACxLN56K/f6CYqXOD/nFBPj4gqqKI+C8/8Q5HoLy/jx0sdhxMQrZlLI\nMgnCz80V5phR3FxRA/BprLS4ejXw298Gv0fFpMxs3SqDbNpiYibUM24uJ0GZMV6urigxE+ckj4ZS\nismaNcC0adIOd4qmF8x2UVkQ/ftn88TEyzIZP1760bw3jGVSaLABZMC56CKZxdgQRkyOOgpYtswe\naArFS8LgdV0YsX33XRnk3BmDxTJ+vPwNIyYNDXJO5/87jpj4ubm6usQKcuLui1IG4NOwTLq6CrdF\nxaTMdHbKD7tUYuK1Yl1QAVhQ3KS3N7zV4pzk0VBqy+TAA8WFESYO1N0tP8QgPzggP54wYlJXJ1lM\nZlLHpMQEAL78ZZnF2LQjjAXQ2CjTw5jgfRJi4oX5PxgXV1zLxyxkFUZMzGqFTuskzoBXjJvLTSlT\ng9OImfQZMSGis4IepWxk0nR2ykR8pRCTMWP8lz8txjIx/uBnn5W7/TAXa7ndXGvWSILByJHh4iaF\n7hoNo0ZlcgYrPzEBbFcXs7imDjpI9psAvJuwYtLSImux339/uOC7wRk3SSL4HhQz8ZraJQqDBsl1\n7Je+68YdN4nr5ooaM4mazVVJlkl3d/AM0FUhJgDOtB4XQwoXz7cePwPwz+k3LT06O2VQ6eqKv5hT\nEGaq77Fj5bnT9A+qJg6yTNatE1//PSHKRsOIybp1wCWX5L4nydTgAw6Qu+8wcZMw8QrAO2ZSSEza\n2yU91wwkQTGTMGICyHxht90mGWOFgu8GZ9wkLcvE9M/LL8ePlxhefz2cZQLkWyZxxKS+XgZq9wqe\nXm4ur88WG4CvNMsECD5WVYgJM1/EzBcBGADgcGY+m5nPhkz6WGQ5W2WxdasMqi0t6Vonxs1VXy+D\n+rp19mtBBWBBMZN//EPubm+6qfAFG0ZMnnsOuPfeXKFLwjLZvl1+DKauI6yYhBnId+wIFzMBbDFx\nT3Fu3Fzuu74wMRtDJiNZXzffXDheYkhaTPzqTLZuTc4yAYJTyd2404PjDHhE8r91B+G9BCpMzKTa\nLBMguD1VISYOxjGzc3al9QDGp9SekmAuqAkTSiMmQK6rq6tLfux+Lo4gy2TTJuC002SxKqd10tsL\nfP3r4sc3hBGTBQvkwncWcEZNDR40SOoFenrEKhkzRgaDsG6usGISNmYC2LUmbjFpbpaYitvlV4xl\nQiRrrMyZE15Mpk2TmMmuXenGTJYskcE26RqWMLjFJI5lAngP8JWczZVU0WJfFJOniOgvRDSbiGYD\n+BOAJ9NtVrqYC6oUlolZQMopJm1tEpj2KzTyWrrX+IM3bRKxuf562zrp7QUuuwx44gkRFBOgDysm\nAwfmptBGtUxM2m5np4iJCdwW4+YKM5BPnJgJLSaHHirTm69Yke/z94qbFCMmgCwjvO++4S2AwYPl\nJmbJkmTExG9urs7O5KySYkkyZgKEt0zC1pkEzTmnlkl0whQtXgbgx5BK+KMA3MXMl6fdsDQplWXi\nXB7VvS5E0BxHhSyTYcOkEHLqVODuu0VIFi4E/vpX4Oyzgf/8T3nv5s3B2VzM4nL5+MdzK8Wjiglg\nu7pM8B1I3s1VTMykqUlcbU8/nS8mXnGTYsWksVHiCaecEv4zJgifRvU7YLc/qXhJsSSZzQV4D/Bx\nYiZpTKeSRszEXON9QkyIqB8RzWPmh5n536zHw6VqXFqYGXEnTEi3Ct7PzVVoXYhCMZPhw2Xf9dcD\nV14pQvL44/KdbrwR+MUvJB22kJisXSt/P/zhZCwTwBYTp2WStJtr06bwMRNA4iavvOItJnEtE3Oc\nYtJvjz1WLMI0YyZAeS2Tcri5kpibq7ExvmWSVNFin7JMmHkPgF4iiji0VCalsEy2b5dBzgzmSVsm\ngFgnc+bYQgLIwH311bJee0ODfYEbnGKyYIHcJbsrxZO2TMK6ucKmBhcTMwHsSQRNWrDBS0zCtiEO\nxx4LPPOMJD2kcS6T8FEuyyRpMQnr5nITxTKpr88vugxDmtlchcQkTt8mSZiYyTbIolh3E9Ft5pF2\nw9LELSbOjJ7f/ha49db45zBWibljjWuZuGMmhvPPz/cBX3klsGpVfrwE8BcTt5srSmow4G+ZJOnm\nOvLI8DETQL7fsGH5VppzfXNnG+LMlRWGadNkga0kplLxipkQyfcKk6qcBu6YSancXIViJrt3yyPo\nWiGKFjdxWib9+0scs1hBctOnLBOL3wH4NoBnAMx3PKqS3l57Vth995UguLOi/M47ZZAthp07gY9+\nNHfmU2e8BCjeMuno8C5WcouJFw0Nkq7qNY2Gl5iMGSN9snmznLNQkDIIp5g4Yybr1wcXXwHF1ZmE\nLVoEpED18MPz9yfl5iqWffaRxIA0MrkM5bxbTbLOBEgum8tcX4UEPIqYOC0Tv4W5iqWrS8anoBhO\nVYkJM9/r9ShF49Kgq0s638wK63R1rVsnQWz30rOFWL0aePLJ3JlknfESQOIc3d3iRilkmdTXy8O5\n7kg2m0VPj3w+jNXw6U8DTz2Vv7+pSQbf3bttMSGyU2i3bcvtn2LxcnOZH7DX2uBOwg7kbW3FxUxO\nPhmYOzd/f7nEBBBXVxLB9yhztqVNudxchepMtm4N978tNj3YWCDO30xSYjJsWB+yTIjoECJ6kIiW\nEtEq8yhF49LAPVg4xeShh+RusdDiUW7M5597zt7nFhOz/OmSJXKRmSC6H6NG5QetzVxbYdcu8Fop\nkUi+/1tvSV8YUTNiErXGxLDPPnKMDRtyv38YV1cadSaAXfjmppxictJJ8SdgrFSSLFoEksvmKub6\nKsYycVolfueOgin6DWpLd3cViQlkvfc7IUv1ngLgPgD3p9moNAkSk//7P5kNtljLpLVVLmynmKxd\na9eYGMaPF8unpaWwqT16tL1KIyD+4DAurjAMGQLMm2dbJYAdhI8TfAekb1eulHY6f2BJislJJxUX\nM/FjyBBxezrrbkolJl/6EnDLLfGPE2Wdm7RJus4krJur0NxcYf+3xVomzniJIYnCxTBiUlWWCYBB\nzPwUAGLmNma+AcDH021WeviJydq1wKJF4h4q1jJpbQXOOivYMgFETJ55pvA62kC+mADh4iVhcIqJ\nIUkxWb7cDr4bvCwtN1HqTJjlx1xfX3xbifILF4uZTiUOdXWFZ0euVpKOmSSVzdUXLZNqE5OdRFQH\n4E0iuoyIPgWgBD+3dDA1JgYjJg8+CHziE/LPi2KZfPSjcoGbgclPTJ59NtyEeW4xyWaziYpJNpsr\nJsbNlYSYLFuWvxxxGMskbFru4sV2zGTnTrP6YrT2ul1dpbJMkqIaYiZpuLm6u4tfz6TUlklYMVm7\n1js5pasLGDGib4nJlQAaAVwB4FgAnwNwYZqNShM/y+Q3vwHOPTd/5tswmBmATzoJ+PvfZZ+fmGzZ\nEt0ycRYsxmHIEBnYnWJyyCGSTrxpU3wxWbs23zJJK2YS1cVlcFbBh1n/XSlMKYoWu7qiZXOVyjIJ\nW7jILPVAzjovQ18Ukw5m3sbMa6yZhM9m5hdSb1lKuC+olhYZRJcvl0pw40IplMbqxGRnTZ9uu7r8\nYiZANMsk6ZjJ4MEiIAazZsXChdFrTAC7b92WSVg3V5iB/NRTM4mJyWmnAd/9rvwPu7oKr/9eadRC\nzCSpubmKEZNSWSZtbfI7dy9419srQrHffn1LTH5ORG8R0a+J6FIiOiLswa1Cx/VE9Lpj375ENJeI\nVlgTSJa0ut59QTU2ygA9c6ZcEP37y8VbKI3VsH271GeMHm2Lyc6dch73wG/EpNwxk+ZmKZxzD5qT\nJgEvvhjfMgGiubmKdUP09sYXk3PPBb7yFeBDH5JMu2pycVUqpZibK2ydSVQ3V6liJs8/L3/ds1eb\nLK1CLreqEhNmngHgMAD/C2AogD8RUciFY3EPgI+59l0D4ElmngTgaQDXhm9ufNwxEwA44gjg85+3\nt816EGFoa5Mgbl2d1A4sWyZpt6NG5afwmjmcwlgmY8akFzPZd1/g6KPz90+eLBMQJiEmxbq5inEx\n/e1v2b13j3HFBJBp5K+8Evinf6o+F1elx0z27ImeIGFwi0lPj9xIuK0Br5hJKdxccSyTFywfj3u8\nManPhdpSSWJSMJ+EiE4G8AHrMRTAowD+FubgzPwsEbmz6T8JYIb1/F4AWYjAlASvC2ru3NwAbnNz\n+LiJswCxoUHWlv/97/PjJYD80+fPDzfNxejR9kSMhqRiJpdd5r1/0iQZnNOwTAq5uYp1MZkq+CTE\nBBAxYZZ5zpR4OMXEzB0VZ9oYt5vLWCWFjunl5nJfl14UG4CPa5lMnpxvmVSjmIRxc2UBzARwF4AM\nM3+ZmX8V45wjmXk9ADDzOgAjYxyraLzExH1RFhOEd6+YOH26ZIa54yUGL4vAi/32kwvaXEhJxkxG\nj/YWu8mT5W853FzFZFFlMpm9sa2kxAQArroKeOyxZI5VKio9ZpLEYOceUP1cXF51JqVIDY5qmWzf\nLq7VD32ob1gmYcRkOID/AHAigMeJ6Eki+s8E21BEqDs+YS6oYtxc7nm2pk8HXn3Ve7AuBiKZbsO5\n1G9SYuJHEmIybJjMg+V2Fw0bJpXxzvnLnBPhFTtbbxpiAsSfeFHJjZnEDb4D+QNqmOp3ILqbKynL\npFDR4oIFwGGHye+8L1gmBd1czPyeNX3KOABjAZyEeGvAryeiUcy8noj2BxAYlp09ezZarNF66NCh\nmDZt2t47EOMjLWb7rbeA008Pfn9zcwadneGO99JLwL/8i70tWWAZjB4drX3O7cGDs/jTn4DLLssg\nm82ivV1qQaZMif79g7aXLcuisREYMiTe8ZYsyX+9Xz+gqSmLP/wBOOusDJYvB048MYv77gPOPDNj\nWYJZZLOFjw8ATU0ZPPNMFtu3Aw0N6fRHNWwvXLgQV111VcW0BwBmzMigtxd48sks3n0XaGyMd7yG\nhgx27LC399svg8bG/Pf/8Ic/zBkfXn89a82+LdttbVmsWmVv+51v0KAMNmwI376engwGDsx9vb5e\nzh90Pd9/fxbjxsl4s3Jl7utdXcCuXVksXw5s3+5/fgnUy/gwx1qzuyVMhk8aMHPgA8AqAH8G8O8A\nTgYwsNBnXJ9vAbDIsf19AN+wnn8DwPcCPstJc+qpzI89Fvyez36W+b77wh3v/e9n/vvfc/cdcgjz\nXXdFa5+TmTOZH3xQnj/11Dzu14955874xw3iqquY165N59hTpzK/9po8P+ss5qYm5rvvlu1slvkD\nHwh3nHnz5vGppzI//jjzww8zf/KT6bS3Gpg3b165m+BJczPz5s3M8+czH310vGM9/TTzjBn29vPP\ny+/Ojbsvli5lPvRQe/v44+Wzhfjxj5m/+MXw7fvTn5hPPz1336WXMt96a/DnPv1p5vvvZ77nHuYL\nLsh97ZFHmM84g3nBAuajjvL+/K5dzP36eb9mjZ2hx+kkHmHcXAcz8z8x803M/Cwz7yr8EYGIHgDw\ndwCHEtFqIroIwPcAfJSIVgD4sLVdMsKYusUE4L2mk7/6apmpNi7O9OCjj85g8OB832zS/M//xHfR\n+WHiJi++CLz0EnD77cCvrOhbsTGTwYPTcXNVG5UYMwHsuEkabi6v6nfAu87E6eYKO2twEjGTMEWL\nzz8PnHBC7rIQhjBurkpycQEh3FwADiaiOwGMYuapRHQkgE8w83cKfZCZP+vz0keKaWSShI2ZhBGT\nri55n3sq8S9+MXr7nDjFJO14SSkwGV3f+Y4sOXzOOcAVV0hcqNhpTJzFpbUsJpWKiZskJSbO+EOY\n6negdNOpRMnmWrNGXp84UZJ4vALwTU3VJSZhLJOfQmpBegCAmV8HcF6ajUqTsJZJmAB8W5tMI55W\n0NYpJnPnZqteTEaOBO67TwRl9mz5IZx5pmS/FSMm2Ww2tQB8teGMI1USJj04iQGvoSFcNpe7LwZW\ncDbX888DJ54oY0chy8RP2KpRTBqZ+SXXvt2e76wCvIoW3YS1TAqtmBgXp5hs3Vr9lsnIkVLT893v\n2jPmzpolrq6olkmti0mlYsSklG4uN043F3P4jMFSVMC/8IK4uAAZj9xism1b9bm5wojJP4joIFgp\nvET0aQDtwR+pTMyStEmlBhdaMTEuTjEZMyaTSMFiOWlpkbuxT33K3veRj0iGWjFTmZg6kySLFquV\nWomZhHFzecVMzIC+fbsM+GGm/S/F3FzGMgHEMilUZ+I1V2A1ismlAH4CYDIRvQvgKgBfSrVVKbFz\np0xxUiiIHTYA7y5YTJq+FjM57zzg6adz3YIDBwJnny2zNqtl0ncwMZNSurncDBhgT71SjOWb9nom\nO3cCr70mswUDwW6u/v1lVohdHmlPlbTKIhBubq5VzPwRACMATGbmk5m5NfWWpUDYC6pS3FwjR8ps\norJee/XHTOrqvAf+WbPkh6Exk+Kp9JhJkm4uc3ceNmZSV2cLSjFikvZKiytWSKzVtKe+Xr6bU3yc\nhZl+bjczVU2lEGYN+BFERMzcxcydRDSLiBaXonFJEyZeAoQPwKft5urXT+biWr9e7lyqXUz8+MAH\nZPqZYiZZVDGpbJIUk/79RRjM7AlhK+AB20KoJMtk8+bcOfa8gvDO7+jXnqpxcxHRWUT0DwCvA3iH\niD5BRAsAfAbABaVqYJIkbZmk7eYCbFdXfX31x0z86NcPuOMOEZUwmJiJikllx0yScnMBua6usHNz\nAXZGVzksEz8x2bo1f8oidxC+GsUkKBx1A4ATmHklER0D4EUAn2LmR0vSshQoRkwKWSadnfLPHDEi\nmbb5YcSkL8RMgvjkJ4t7v4pJZWMG5O7u/OUIomAG1Obm4qwdk9FVasskqGhxy5Z8D4k7CF+NYhLk\n5trNzCsBgJkXAFhRzUIChL+gwgTgjVWS9sSARkza2qo/ZpIU2WxWK+AtaiFmAuRmdPm5ubz6Ioqb\nq6FBBMg5EWkQUSwTLzHpy5bJSCL6imN7qHObmW9Jr1npEDZmYtxczP5iUQoXF2CLSV+oM0kStUwq\nm8GDZeqcUrq5vHC6ucL89gH5zZs1VMLEZoqNmXiNQ+44rVtMvNxulSYmQZbJTwHs43i4t6uOsHcn\n/fvLxRDkN21rs5fhTRMjJp2dGRUTC42Z2FR6zCRJyyRKzCSKmwsoLm6SRMzEyzIxCSlVb5kw842l\nbEgpKOaCMtaJ353Ju++GW7UtLqNHAw8/LHdLlZQGWG5UTCobZ8wkaTdX2Ap4wB7Uw07y6Dxf2LhJ\nsZbJli35i+cVskyqQUzCFC32GYoVk6Ag/Nq1pROTxYtlLRBduEkwdSZaAV/5MZM03Fx+FfBefREl\nmwsobkqVNGImZjqVoLaomJSRsDEToHAQvpSWybp14dtdKwwcKJXNW7fWtphUKuVwc3kR1c1VzJQq\nxa60WEhMentzCxLVMqlAori5/CiVmJjp7Q88MJP+yaqETCYDInF1/eMftS0mtRQzKeTm8ouZRLFM\ninFzJVVnYjwhsnqoFGoGtaVPiIlVd1J1FHNBFaqCL5WY1NcD++2HPluwGIemJqm/qWUxqVRMzKSU\nbi4v4ri5jGVyxx2AtTKyJ1FiJkGWiTv1uU+LCap0osekLJPOTslBd99dpMXo0cCOHdnSnKwKML7x\npiZxCdSymFR6zCRpNxdz+Lm5gHhurq4u4JprgJtukiQYP4pdabFQanC1ikmYlRbzYOaE1hIsLcXE\nTIIC8MYqKVVAfMyY0glXNWF+cLUsJpVKWm6unh753bktAT+iurkaG4GvfU1muFiwQObg8ztGEnUm\nhSyTbdvyj1N1YuLj0toCoI2Zq2qRrGLdXH6WSalcXIbRo2UNeEUwvnGTh1/LYlLpMZOk3VxBkzwm\nNTcXIEtMT50K/PKXIiyTJwNLlwLHH5//Xi/LpH9/sZr37JG555wUqjPxEpONG/PPW3ViAuAOAMdA\nJnwkAFMBLAEwhIi+xMxzU2xfoiSVGlxqMbnuOrVMvDBiUl9f3nYo+TQ2yu9t4MD8wTQKxtVTrKUT\n1c116612ABwApkzxFxMvy4TItk6c7d25U0TGfc32BTdXmJjJWgBHM/P7mPlYAEcDWAXgowD+O83G\nJU21WiYHHQQsXpwt3QkrHGfMpL6+dO7GSqRSYyYDBsgjqUJb4+YKEpOk5uYCcoUEAA4/XFYD9cLL\nMnGe24lxcbmv2SDLxK/OpOoWxwJwKDPv7UZmXgpZJGtVes1Kh2JjJpUiJoo3TU217eKqdMyys0lg\n3FzFVL8DMshv3x5+ni0/pkzxFxMvywQIFhM3++wjcZHe3typVIC+ZZksIaI7iWiG9bgDwFIiqgfQ\nk3L7EmP3bvmnh+38oNTgcohJpfrGy4EzZlLrYlLJ18XgwclaJiZm4ndMvzqTTZukLXEsWOPm8sLP\nMjFLFzvxipcA4gpsbBRB6cturtkAVkLWfr8K4uKaDRGSU9JqWNJ0dsrgE/aCUsuk8lExqWwaG5MX\nkygxk02binNxedHSIgWyXmOCn2UyfLic24lXjYnBuLqcU6kAfUtMTgfwI2b+lPX4ATN3M3MvM3sk\nrFUmxfpMKykAD1Sub7wcOGMmtS4mlXxdJO3mMjETP3eV39xcGzfGn46ors7O6HLjZ5kMH56fhRXk\najfekL5smZwJ4A0i+gURnUFEkWpTyk0x8RLAPwC/e7dcIGaaE6V8qJhUNqV2c3lRXy8WRVzLBPB3\ndflZJiNGyLmdBI1DxjIJIyZ79shYVEmZjAXFhJkvAnAwgN8CmAXgLSL6WdoNS5oolomXmKxfL4tU\nhS2aSopK9o2XGo2Z2FTydZGGmAS5uYJiJkmIiV9GV5Bl4iUmfmn+QZaJe9JJY5VUUiZjqOlUmLkH\nwGMAfg1gPoCZaTYqDYoVE78A/Lvv5q9FoJSHwYNVTCqZxsbSurm8GDgwWcvES0yCYiZuMQkTMwlj\nmVSaiwsIISZEdDoRzQHwJoCzAfwMQNU5eaJaJsy5+8sVfK9k33ipMX0xfrzU4NQylXxdlNrN5Vdn\nUuzCWH74ubmSipkMGeJtmXjVmVSimISJf1wA4DcALmFmn9lmKp9iYyYDBsiUCM51BQDN5Kokjj/e\nuyJZqQzipuM6iZPNBSQjJs6MLufxio2Z+I0fzc193DJh5lnM/HsjJER0MhHdnn7TkqVYywTwDsKX\nS0wq2TdearQvbCq5L9LK5iomZmIshiTExCuji7k4N1dQzKSQm8vpJalKMQEAIjqaiG4molYA/wlg\neaqtSoGNG2VdkGLwCsKrZaIo4UgrAF9MzCRJywTId3Xt3i0eDC8LrNiYiV8Avn9/EbIeR4l4VYkJ\nER1KRNcT0TIAPwTQBoCY+RRm/t+StTAh3noLOPjg4j7jFYQv1drvbirZN15qtC9sKrkvLr4Y+MIX\nkjlWnJgJkJyYuDO6/OIlQLSYibFMnNOpAPmurqoSE4j1cSyAU5l5BjP/CMCe0jQred56q/hgrVom\nihKdlhZZByQJwri5vEjSzQXkZ3T5ubgA8YRs2SLWiyFKAB6ofjE5C0A3gGeI6MdE9CHIFPRVycqV\nxVsmXlXwGjMpP9oXNrXSFw0NYgW4pxpx4ldnAqTn5gqyTPr1A4YOBTZvtvcVqjPxmk4FyK81qSox\nsYLu50HWL3kGwL8BGGlN+nhqqRqYBJs3yz99xIjiPucOwJd6uV5FUQSzPkhHR/myuYD8ObqCLBMg\nP24Spc4EqH7LBADAzF3M/AAznwlgLIBXAXwj7omJqJWIXiOiV4nopbjHC8LES4pNU3S7uUq9XK+T\nSvaNlxrtC5ta6ouGBqlmLyZmkrSbq64OOPRQYMUK2Q6yTAC5gXXGTQrNzbVli7crz11rUpVi4oSZ\nNzPzXcz84QTO3Qsgw8xHM/P7EzieL1HiJUB+AF7jJYpSPgYNsqeTD0vSlgkATJpki0kxlsnOnZLe\n6zef1pAhMl1TfX3+6pR9wjJJESrV+aPESwB/y6Qc1IpvPAzaFza11BeDBgHvvVf83FxAemJSyDJx\niomJl/h5NoYMkfHGSyzdYrJ+PTByZLT2p0U5xYQBPEFELxPRF9M8UZS0YEAtE0WpJMw8bFGyueJO\nQe9k8mRguVVpV4xlEhQvAeT79e8fTkxaWyV+U0mUczr56czcTkQjIKKyjJmfdb9p9uzZaLF6bejQ\noZg2bdreOxDjIy20vXJlBp//fPj3m+21a7N44w0AkO2XXspi7Fh7u9jjxdl2+oNLcb5K3jb7KqU9\n5dxeuHAhrrrqqoppT5rbu3fL9uDB3q//8Ic/zBsfduwAgAz22Se59kyalMGKFbK9dCkwcKD/+7ds\nAXbskO2nn85aVon3+//61ywaG72/36BBwPz5WQwZItutrcDmzVlks/ZvYc6cOQCwd7wsOcxc9geA\n6wF8xWM/J8GYMcyrVxf/uV//mvmcc+ztM85gfvDBRJpUNPPmzSvPiSsQ7QubWuqLk09mBph7erxf\n95lTZWIAAAzeSURBVOqLPXuYv/CFZNvR2ck8aJAc+29/Y54+3f+9997L/LnPmfYxz5gRfOwJE5jf\n9778/RdeyPzzn9vbo0czv/OO/3GssbOk43hZ3FxE1EhETdbzwQBOBbDY671eK4wVQ3e3pBNGcU85\n3VxPPQXMnw/MmBGvPVExdy+K9oWTWuqLhgZxW/X38ad49UVdHfDTnybbjqYmWdNo9eriYyaF3G1D\nhhR2c+3YIYkIo0dHa39alMvNNQrAw0TEVht+ycxzvd64Zg1wyCHRT7RqlVTh1kWQTROA37gRuPBC\n4N575eJQFKX0DBqU3FxfcXEG4QvFTExqcKGYCRAsJqZocfVqYNy4/IyvclMWy4SZ32bmaSxpwUcw\n8/f83vvOO/HOtXJl9DUvTN73RRcB558PfPSj8doSB2e8oNbRvrCppb4YNCg4LbiUfTFpkgThw9SZ\nFGOZNDfnz8sF5FomlRh8B8qbzRWKuGISNZMLEMtkyRK5s/jOd+K1Q1GUeDQ0VI5lMnmyWCbFZHMF\nTaVi8LNMnEWLKiYRScIyiSomw4aJi+yBB0q/5rubWvKNF0L7wqaW+qKQm6uUfWHcXIUsk6YmEZzt\n28NbJoViJiomESm3mytq9byiKMlSyM1VSoybq5BlQmS7uuLGTFRMYlJONxdQnnm4vKgl33ghtC9s\naqkvCrm5StkX48ZJNX5HR7BlAtiurjCWyXnnycNNNYhJOYsWQxFHTHbtkqr1Aw9Mrj2KopSHSsrm\nqquTLNPFiwu7wJ1iUihmcuSR3vurQUz6tGXS1gaMHVv+eEcS1JJvvBDaFza11BeF3Fyl7otJk4BF\ni5K1TPwwYlKpNSZAFYjJrl35qx2GJU68RFGUyqKSLBNAMrrCWCZmGvowMRM/jJhUao0JUAViMm5c\ndOskbrykkqgl33ghtC9saqkvzjkHuPpq/9dL3ReTJslCVqWyTLq7K9fFBfRxMVHLRFH6DqNGyQBe\nKZi2JBkz8cPUmaiYxEAtE6GWfOOF0L6w0b6wKUfMBChtzETFJAZRxaSrC3jpJf/sCEVRlDg0NckE\nsmFiJmvWyHO/VRYLoWKSAFHF5Mc/Bj74Qalg7wvUkm+8ENoXNtoXNuXoi0mTwlkmb70Vb4EuFZME\niCIm3d3AD34AXHddOm1SFEUBgOnTC6fpDh8ObNgQPV4CVIeYkKyjUpkQES9ezDj7bHuZzDDccgvw\n3HPAQw+l1zZFUZQw7Nol7q2jjwYWLIh2jJ4eOcaAAXKzXCg1mIjAzCWdv6PiK+CNZcLsPbXJr34l\nud7XXSed3d0N3Hwz8PjjpW+roiiKm4EDxcUVx801YIBU3VdqjQlQBW6u5mZZWW3zZu/Xv/c94Mkn\ngeOPl+ni77oLOPFE4KijStvOtFHfuI32hY32hU0l98Xw4fHEBBBXV6W6uIAqsEwA2zrZb7/c/YsX\ny0RrbW3APfcAmQywZw/w9NNlaaaiKIonw4fHi5kAUmuiYhITIyZua+OBB4BZs8T8u/hiyd56+mlg\n2rTytDNNtJ7ARvvCRvvCppL7YsQItUwqAq+Mrt5eEZNHHrH3HXJIvPXiFUVR0qAW3FwVHzMBvMXk\n+edlBtFaKUqsZH9wqdG+sNG+sKnkvhg3TqaDiUNzc2VPD1U1lslTT+Xu++UvgfPPr5zFqxRFUfy4\n/vr4Y9Uf/yjuskql4utMmBlPPw189avACy9I+m9PDzBmjEyX0lcq3BVFUZJC60x8OPFE8RUecwww\nZ45Ukx56qAqJoihKpVAVMZNBg4Df/Q741reAM84ALr9cXFy1RCX7g0uN9oWN9oWN9kV5qQoxAcTf\nOGsW8NprwMyZwHnnlbtFiqIoiqEqYiaKoihKeMoRM6kay0RRFEWpXFRMqgT1B9toX9hoX9hoX5QX\nFRNFURQlNhozURRF6WNozERRFEWpSlRMqgT1B9toX9hoX9hoX5QXFRNFURQlNhozURRF6WNozERR\nFEWpSsomJkR0GhEtJ6I3iOgb5WpHtaD+YBvtCxvtCxvti/JSFjEhojoAPwLwMQBTAMwiosnlaEu1\nsHDhwnI3oWLQvrDRvrDRvigv5bJM3g/gTWZuY+YeAL8G8MkytaUqeO+998rdhIpB+8JG+8JG+6K8\nlEtMDgDgXIh3jbVPURRFqUI0AF8ltLa2lrsJFYP2hY32hY32RXkpS2owEZ0A4AZmPs3avgYAM/P3\nXe/TvGBFUZQIlDo1uFxi0g/ACgAfBtAO4CUAs5h5WckboyiKosSmLGvAM/MeIroMwFyIq+1uFRJF\nUZTqpaIr4BVFUZTqoCID8NVe0EhEdxPReiJ63bFvXyKaS0QriOgvRDTE8dq1RPQmES0jolMd+48h\notetfvihY/9AIvq19ZnniWi847ULrfevIKILHPtbiOgF67VfEVHqVikRjSWip4loCREtIqIrargv\n6onoRSJ61eqPm2q1LxznriOiBUT0B2u7JvuCiFqJ6DXr2njJ2ld9fcHMFfWACNxKAAcCGABgIYDJ\n5W5Xkd/hZADTALzu2Pd9AF+3nn8DwPes54cDeBXicmyxvruxGF8EcJz1/M8APmY9/xKAO6zn5wL4\ntfV8XwBvARgCYKh5br32GwDnWM/vBHBJCfphfwDTrOdNkDjZ5FrsC+tcjdbffgBeADC9VvvCOt+/\nAbgfwB9q9TdinWsVgH1d+6quL0py0RTZsScAeMyxfQ2Ab5S7XRG+x4HIFZPlAEZZz/cHsNzr+wF4\nDMDx1nuWOvafB+BO6/njAI63nvcDsMH9HsdFcK71fCOAOkcfP16GPvk9gI/Uel8AaIQknRxeq30B\nYCyAJwBkYItJrfbF2wCGufZVXV9UopurrxY0jmTm9QDAzOsAjLT2u7/vu9a+AyDf3eDsh72fYeY9\nALYQ0X5+xyKiYQA2M3Ov41hjEvpeoSCiFoi19gLkR1JzfWG5dV4FsA5AlpmXokb7AsD/ALgagDNo\nW6t9wQCeIKKXiegL1r6q64uyZHMpAHJ/RHEJk09e0pzznBMTNQF4EMCVzLyN8uuHaqIvrB/n0UTU\nDOAvRJRB/nfv831BRB8HsJ6ZF1p94Eef7wuL6czcTkQjAMwlohWowuuiEi2TdwGMd2yPtfZVO+uJ\naBQAENH+ADZY+98FMM7xPvN9/fbnfIakZqeZmTvg03fMvAnAEJIJNt3HShUrcPcggF8w8yPW7prs\nCwMzb4X4tN+H2uyL6QA+QUSrAPwKwIeI6BcA1tVgX4CZ262/GyGu4PejGq+LUvgEi/Qf9oMdgB8I\nCcAfVu52RfgeLQAWOba/D8vXCe+A2kAAE5AbUHvBurAIMvicZu3/MuyA2nnwDqiZ50Ot134D2x96\nJ4B/LVE/3AfgFte+musLAMNhBzcHAXgGUrRbc33h6pcZsGMm/11rfQGJnzVZzwcDeA7AqdV4XZTs\noimyg0+DZP68CeCacrcnQvsfALAWwE4AqwFcZP2znrS+11zzT7Pef611USwDcKpj/7EAFln9cKtj\nfz2A/7P2vwCgxfHabGv/GwAucOyfAMn2eMO6UAaUoB+mA9gDuSF4FcAC63+7Xw32xRHW938VwGsA\nvmbtr7m+cPWLU0xqri+sc5rfxyJY41019oUWLSqKoiixqcSYiaIoilJlqJgoiqIosVExURRFUWKj\nYqIoiqLERsVEURRFiY2KiaIoihIbFROlJiCiTuvvgUQ0K+FjX+vafjbJ4ytKNaBiotQKpqBqAoDP\nFvNBawqKIP4950TMJxdzfEXpC6iYKLXGfwE42VqU6UprJt//Jlm4aiERfREAiGgGET1DRI8AWGLt\ne9ia2XWRmd2ViP4LwCDreL+w9nWakxHRzdb7XyOizziOPY+IfmstcPSLEveBoiSOzhqs1BrXAPgq\nM38CACzxeI+ZjyeigQCeI6K51nuPBjCFmVdb2xcx83tE1ADgZSJ6iJmvJaJLmfkYxznYOvbZAI5k\n5iOIaKT1mb9a75kGmWdpnXXOk5j572l+cUVJE7VMlFrnVAAXWOuMvAiZE+kQ67WXHEICAFcR0ULI\n/EZjHe/zYzpkVlww8wYAWQDHOY7dzjKf0ULIxKCKUrWoZaLUOgTgcmZ+Imcn0QwAXa7tD0FWrNtJ\nRPMANDiOEfZchp2O53ugv0WlylHLRKkVzEDeCWAfx/6/APiyte4KiOgQImr0+PwQyOpzO4loMmQp\nU8Mu83nXuf4G4FwrLjMCwAcgy/UqSp9D74aUWsFkc70OoNdya81h5lutJYUXEBFBFiGa6fH5xwH8\nKxEtgUwL/rzjtbsAvE5E85n58+ZczPwwEZ0AmXK+F8DVzLyBiA7zaZuiVC06Bb2iKIoSG3VzKYqi\nKLFRMVEURVFio2KiKIqixEbFRFEURYmNiomiKIoSGxUTRVEUJTYqJoqiKEpsVEwURVGU2Px/UXYY\nagMKQHAAAAAASUVORK5CYII=\n",
337 |       "text/plain": [
338 |        "<matplotlib.figure.Figure at 0x119080f50>"
339 |       ]
340 |      },
341 |      "metadata": {},
342 |      "output_type": "display_data"
343 |     }
344 |    ],
345 |    "source": [
346 |     "plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode'])\n",
347 |     "plt.xlabel(\"Iteration\")\n",
348 |     "plt.ylabel(\"Avg. Reward per Episode\")\n",
349 |     "plt.grid(True)\n",
350 |     "#plt.savefig(settings['save_dir'] + '_' + \"evaluation_reward.svg\", bbox_inches='tight')\n",
351 |     "plt.show()\n",
352 |     "plt.close()"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": 14,
358 |    "metadata": {
359 |     "collapsed": false
360 |    },
361 |    "outputs": [
362 |     {
363 |      "data": {
364 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEPCAYAAACKplkeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8HfO9//HXR1zqmjQOSoNQd0qooi7NdneK0ouqKuJX\ndagq2iI4qtVDizrIT+q4p3Vrm5NWtYpQWW5NhCRbIiRBxDUkEknkInL5nD++szJr773W3mvvPWvN\nzF7v5+Mxj71m1qyZz/7stddnzfc78x1zd0RERJKwWtoBiIhIz6GiIiIiiVFRERGRxKioiIhIYlRU\nREQkMSoqIiKSmLoUFTO73czeN7OJJcs+bWYjzWyqmT1iZr3rEYuIiNROvY5U7gQOb7VsMPCYu28P\nPA5cVKdYRESkRqxeFz+a2ZbA39x912h+CjDQ3d83s88ABXffoS7BiIhITaTZp7Kxu78P4O7vARun\nGIuIiCQgSx31Gi9GRCTnVk9x3++b2SYlzV+zKq1oZio4IiJd4O5Wz/3V80jFoqnoAWBQ9PgU4K/t\nvdjdNblz2WWXpR5DViblQrlQLhxw5s+v9Fz91euU4nuBfwHbmdmbZnYq8GvgUDObChwczUsHZsyY\nkXYImaFcxJSLmHKRrro0f7n7dyo8dUg99i8iIvWRpY56qcKgQYPSDiEzlIuYchFTLtJVt+tUusPM\nPA9xiojUmxnMnw8bbFDuOcN7cEe9JKBQKKQdQmYoFzHlIqZcpEtFRUREEqPmLxGRDJs1CxYuhK23\nLv+8mr9ERKRqRx0Fn/tc2lFUT0UlZ9ReHFMuYspFrKflYsGCtCPoHBUVEZEaW7o07QjqR30qIiI1\nNG0abL89dPUjbIcdYOrUyq9Xn4qISAOZOzftCOpLRSVnelp7cXcoFzHlIqZcpEtFRUREEqM+FRGR\nGhozBr70JfWpiIhIghrle7GKSs6ovTimXMSUi1hWc3HPPWlHUB8qKiIidfDqq2lHUB8qKjnT1NSU\ndgiZoVzElItY1nNhBpMnV7/+1Km1i6UWVFREROosb4WiM1RUciar7cVpUC5iykVMuUiXioqIiCRG\nRSVnst5eXE/KRUy5iCkX6VJRERGRxKio5Izai2PKRUy5iDVyLubOhdGj041BRUVEpIcYPBj23Tfd\nGFRUckbtxTHlIqZcxPKci+9/H5Ys6frrV65MLpauUlEREcmI226D6dPTjqJ7VFRyppHbi1tTLmLK\nRSyrufjgg+S3mcXbFKuoiIjUwdChyW9zs82S32Z3pV5UzOwiM5tsZhPN7B4zWzPtmLIsz+3FSVMu\nYspFrJFykcVbFadaVMxsS+D7wO7uviuwOvDtNGMSEZGuS/tIZQHwCbCuma0OrAO8m25I2ZbV9uI0\nKBcx5SKW91xYXe/TmLzV09y5u39oZtcCbwKLgZHu/liaMYmIJOGtt2DRos6/Lu93iEy7+Wtr4Dxg\nS2AzYD0z+06aMWVdI7UXd0S5iCkXsVrn4h//gK99reP1DjwQdtyxpqFkUqpHKsCewDPuPhfAzP4M\n7Avc23rFQYMG0b9/fwD69OnDgAEDVr15ioe7mte85jVf6/nrry/w6KPw1FNNHHBA5fWXLg3z48eH\neWiKfhZ48UX4+tfLb3/ffQvstBOMHh2vX/r61utDgaeegiOPbGLmzAIwjEGDWPV5WXfuntoE7AZM\nAj4FGDAMOKvMei7BqFGj0g4hM5SLmHIRq3UuTjrJHdxPOaX99fr1C+uNHh1+Fj/GwH3EiPKvKa63\n1lptl7W3/vz5Yf5732u5bvTZWdfP9VSbv9z9BeD3wDjgBUJhuSXNmEREpOvSbv7C3a8Brkk7jryI\nD3lFuYgpFzHlIl1pn1IsIpI5CxbAihXJb3fmzOS3mTUqKjlT7KQT5aKUchFLIhe9e8OVV3Y/ltZO\nPjn5bWaNioqISBmvv578Nmtx9JM1Kio5o/bimHIRUy5iykW6VFRERMrI2pXtp50Gy5enHUXHVFRy\nRm3nMeUiplzEekIuyhW022+HOXPqH0tnqaiIiHRBtUcyQ4bUNo6sUVHJGbUXx5SLmHIRSyoXXWn+\ncocbbmi57L77EgkHgNGjYcSIys8//XRy++oqFRURkYR88gmce27l50eN6t72Tz0VvvnNys9Pndq9\n7SdBRSVnekJ7cVKUi5hyEVMu0qWiIiJSRlebvxqdikrOqO08plzElItYI+ciC3eNVFEREamzIUPg\nnXfSjqI2VFRyRu3FMeUiplzEkspFLZu/nngCttwS3n675zWZqaiIiNTA/PntP79iBWy+OTz+eJh/\n4IHax1QPKio508jtxa0pF7FGysX558MVV1R+vl65KHeEUbrso4+q286CBeHnMce0v+28UFERkVz5\nzW/g6qtrv580P9iXLcvvlfgqKjmjtvOYchFTLmK1yMW0abDXXuFxe8WmK2dfVXrNww8nt616UlER\nEenA00/Dc8/Bgw/C3XeHZe01f734YvXbzkIhSJKKSs40Utt5R5SLmHIRq+XYX0cdFT8uFpdynngi\nkRByafW0AxARScO994YzsE46KZntffnLcOSR4fGYMdW/Tkcqkiq1nceUi1ij5aK9D+Jqc3HSScne\nM/6pp+DPfw6Phw/v/vYeeqjtsjwUIBUVEZEyunP219Kl1a+bh0LRGSoqOaO285hyEVMuYmnmIu3r\nS7JQoFRURCRzVqyAlSvTjqI+slAIkqSikjN5aDtfuhQee6z2+8lDLuqlp+Vil13guOMqP59En0ot\nPsy7ep1Kc3P39jt3bvdenyQVFUncvffCoYemHYXk2ZQp4da5STr0UJg1q/r1O9OU9dZbnX9N0VFH\nwe67d/51pbbaquX87NnpHQGpqORMHtrO69VskYdc1ItyESuXi7vuCkfPEybEy1asiB+ffHIYU6zU\nu+/Gj6dNa3+fW2zR+Thr6eij09u3iookrppvSJ05O0Ya1+c/Dx98UP3606bBlVe2Xd7RqcN33QV3\n3NFyWekFjFddVX0MWfDss+ntO/WiYma9zWy4mb1sZpPNbO+0Y8qyrLSdf/ABnHhi11//qU/Fo7N2\nVVZykQU9NRcvvtjxUUKpm26CSy4pVHy+mi88771X/f5a09lfGSgqwA3AP9x9R2A34OWU45EqjB0b\n+k7KqfaNraMVqUa5D+qufniWe93HH7ec33TTrm0bki8qrTvgP/ww2e3XQqpFxcw2AA5w9zsB3H25\nu3fz+2vPloe283p9W8pDLupFuQjCe6+pw/VKP5x32qn0tS2tWNG593PSX5RGjEh2e/WQ9pHKVsAH\nZnanmY03s1vMbO2UY5JuysIhuPQcnfn239F7r/j8tdfGy15/PfycM6ftSSalnfnV6MzoxLWQhf+9\ntAeUXB3YAzjL3Z83s+uBwcBlrVccNGgQ/fv3B6BPnz4MGDBg1bezYntyI8yXtp2nGc/EiVD8Rtj6\n+ZdfLsbY/vY6er6j+eKyLP190ppvbm7m3HPPzUw8ScwX3x8TJhRYvrz0aKzAsmXx86WvDx+q11Mo\ntP18KF3/jTfieYifHzas5Xzr58vNt95+Z1/flflCoWU+AFaubGLZsgIwLFren1S4e2oTsAkwvWR+\nf+BvZdZzCUaNGpV2CO7u/uCD7pX+LL/7XeXnisB91qzuxZCVXGRBT8sFuG+6afj55JNtn+vbt+Wy\nxx93X7jQ/dhj3WFU2e2B+2OPuS9Z4n7JJfGy0uk//qPl/NKl5dcrTq23n/R0663V7/eqq8qti3ud\nP9dTbf5y9/eBt8xsu2jRwcBLKYaUeXloOw/fAjvW3UP1POSiXvKWi//6L/j975Pb3kEHwXXXwf33\nQ/Eb/UUXwW67tVzvjjtg7bUrv/duvjm5mOrttdfSjiBIu08F4EfAPWbWTDj7q8xZ5pK2xYthk02q\nW/f222sbi+TfpZfCz34WHrvDRx9VXrdcn8qSJeHaklKt+z8eeYSomTaWlQ/eak2eXP26Tz5Zuzg6\no8OiYmZXm9kGZraGmf3TzGab2XeTCsDdX3D3L7r7AHf/urvPT2rbPVFpf0I9zZoVpu2373jdao9A\nunv6ZVq5yKI85+Kee2CDDTr3miVLwgWNr7wCb7/d+tkCUP79FfpistGhXY35nfg0nDKldnF0RjVH\nKod5OM33KGAGsA1wfruvkB6rMxeiiVSjOG5WJcXicPDBbZtWt9su3HGxtUceiR//5Cfx4/HjuxZj\nWjozVllWVHP2V3GdI4Hh7j7f8lLme6CstJ3/539Wfk7XqdRfnnNR7fvl8cfhmWfaLi+eEhxr4uyz\nw1EMwH//d9f32ZFly5K7HXE5Dz5Yu23XSjVF5e9mNgVYApxpZhsBH3fwGunhSgfmE+mO0g94s7YX\n/JU2Y7XX91KqWFAqWby4uu10dJ3KmmtWt51G0mHzl7sPBvYF9nT3ZcBi4JhaBybl5aHtvNpvgd39\ntpiHXNRLnnMxeHDL+fY6p884o/Jzr75afFTocJ+/+U2HqwDhgsgsmj0bFi1KO4ryqumoXwf4AXBT\ntGgzYM9aBiU9yy9+kb9RXiU9rb9sVHtCx913Jx/LOeckv80kbLwxnHBC2lGUV01H/Z3AJ4SjFYB3\ngP+qWUTSrjy2nf/853BZmzESWlq8GL7znc5tN4+5qJWekIvWTU3Fo4Tbb4ezz+7MlpoSigj+/OfE\nNpW4tme9ZUM1ReVz7n41sAzA3RcD6qlvYE8/3f7z5Zq1Ovq2+dprcN99XY9J8u+UU1rOf/JJ+HnP\nPXDjjfWPR7qmmqLySTTIowOY2ecADVqekiy0nR9wQNoRBFnIRVakmYuPPur6qa+HHRY/Hjs2/Oz+\nmVmF7m5AuqGaonIZ8DCwuZndA/wTuKCmUUmP090LHSW7vvGN6kdbaO3RR9su0xUL+VbN2V+PAl8H\nBgH3Ec4CK9Q2LKkka23n3/pWy/mPP67fUBhZycWQIdCrV3LbO+IIeP/9zr0mjVy4w/Ll8M475Z//\n5JPqT90tbi8ZTUltSLqgmrO/vgzsDHwELAB2ipZJD3PvvW3vJ9GR4cNbznfnVqx59fzznc9bex55\nBMaNS257tTJ4MKy7bjxfvD30T38abjf9rW/BFluEZW+80XHRKD4/enTysUr9VNP8dX7JdCnwN+Dn\nNYxJ2lHLtvMTT4QZM2qz7Vo0f7WXi5Ur430uXx7OHurJTXBp9KlMmBCORl6KxhXv3Tv8vPZa2Gij\ncMOqOXNg4EDo3x8eeqjl61sPuVI84vn737sbWaG7G8iFji7wTEs1zV9Hl0yHArsAObhTsjSqW24J\nzVHFYcwnTgxnD11+ebpx5c348e0X4vhiw1jpUUaxGbQ4eu6RR4YhTcKNsNpqfa94ad/ChWlHUJ55\nJ7++WRj4a7K771SbkMru0zsbp3SeGZx1VjiT509/avncjBmw1VblX1f6p3n9ddh665bLzcKH/PLl\nLfd1xhlh+PNNN4VJk2DXXbt3NLFyZRjVtW/feNknn7QcSmPmzPDPuM02Xd9PayefHIZhT+otahbG\nfPrKV5LZXrXGjg2F4uij244a3Pp3GzcO9tQl0DlguHtdT33ocOwvM/v/RKcTE45sBgA5G+tTqjVs\nWPeGfzj22Pjx7NmhGaS1b34z/Pyf/wlTUh/GvXvDcce1XNZ6bKYdd4R580IB0llGsXnzYO+9w+OX\nytwm74wzwt/qpJNCs1Zzc33jk/yopk/leWBcNI0GLnT3xO6nIp1T67bzSh/w3/te5de8/npoupgz\np+VNkbbZJtz3AlpeLd16wMBSf/tby1jefTc+zF91g9RI61wsXAh33ll52xA+PAF237399ert3Xe7\nV+S6+7647rr48cCBbZ+/+eYQ391356GgFNIOoKF1eKTi7r+rRyCSnuXLYY01wuNyp4Duumtonqqk\n2NzV2oIFsM468XxTEzzxRNv1zOL7X3z1q/GpqsWYSm2ySTJnmL3wQhhGfcYMeOAB+OMfu7/N7rj4\n4vDzF78IP4tXk9fLaiVfL2fPru++pYepdPN6YBIwscw0CZjY3o3vk55CmJK0I45wf/JJ9/XXLx4D\nxJO7+8qV7n/8Y9vnaj1df337z//jH+V/n+7ssztOOqnr2xg8uLqYHnkkLLvppnjZkCHuc+d2Pe6i\nZ56p/99YU70m3L1+n9Uedlvxg3zL9qa6BtnV/1hpY/ly9xdfDAWjvTeju3uvXmn/Q7QfX9GwYcls\n81e/cl+xovM5raaozJvnPmZMy2Wvv95xTIsXuy9Z4t63b9vfHdxvvbXz8Za65Zb0/5aaajnh7hkp\nKlmaVFRio0aN6tbrq30zFgpp/zO0P111lfuIEaM6PKrp7DR/fuXcfelL7suWxfNz54bXrLNO+Llg\ngfsee7ifemrb155zTljn2Wfdp01zHzu2+ph22aXlfOnfcujQ7r0v0v471mYalYEYsjLh7hkrKsA+\nwHPAQsIQ+CuABXUNsvQ/qcFV++Fx/fXuV14Zzy9f3vHRSf6mUYlvc+hQ90WLyue0uI67+wknuD/1\nVOXtVHptUlPpNj/6yP3vfx/ls2aF5ffeG5aVWrHC/aWXWi57/PG0/375eV/kd8Lds1dUnge2ASYA\nvYBTgV/VNcjif5FUrfTDZ+jQtN/Y+Zquu65lLleudB83rnPbKO3rqEWMX/lKy/nNNw8/S/vAHnww\nxH3ZZfGyK690nzq1uqY3TT1hwt3rW1Q6vPjRzJ539z3NbKK77xotm+DudTspUxc/ds7cubDhhuHx\nO+/AZz+bbjx5c8EF4eyz6dPDTZrOPTcMGtlZO+4YznbbeOPkYxSpTv0vfqymqDwJHALcBrwHzAQG\nuftutQ9vVQwqKpFCoVBxRNp77oFp08JFbEceWd+40lGg1iPSjhoFBx5Y010kpIBG5y0qoFwUZfCK\neuAkwkWSPwTOAzYHvlHLoKRrvqtLUhNXOkKAiHSs4pGKmZ0P3Ofuqd8JWUcqlZ1xBuyzD5x6atqR\niOTfjTfCD3+YdhRJqv+RSnvDtGwGjDazp8zsB2ZWZhQnSZN7GD5DBUUa1T//mez2zjqr/KgPSbri\nitpuP20Vi4q7nwdsAfwn8Hlgopk9bGanmNn69QpQWiod4+nMM9OLIxsKaQeQIYW0A0jFTiVjpR91\nVPFRAQi3QFiyBO6/P16nOJjpwQfDttuG++w891wYnn/atPDcl7/ccqiaq6/uWmy33ho/3mcfOO20\n8Hj//ePlxxzTtW1nWrWniRFOJz6ccGrx4iRPQSMUt/HAAxWer3DibOMpvU5lt93SPl0x7WlUBmLI\nytSYuXAP12CVuvXWkIvitTpvvx3WHTHC/dVXw+MpU9r/Pyte0/Xuu2G+vRgqXcj64Yfx47fecn/t\ntTACwuTJcexFP/xhrXKEu2fslGIAM/s88G3geOADQl/LDUkVNjM7D/gCsIG7f7XM815NnI1k4UJY\nX8eL0uAqfSysWBHu4ZOU4gjSX/taOFX8D38Ip5xDuBnZ4MHhTMEPPohfs3Il/Pa3YZDUzTdvub2P\nP4ZPfar8Pvr0iUfTTiByPCt9Kma2rZldamaTgXuARcBh7r5PwgWlH/AVwinLUqXnn087ApF0uMPS\npfDRR5XXSbKglDrooNAn8tprocAAbLlluKndP/8ZrmfaccewvHjTu9YFBdoWlKIhQ+DDD2GttWoT\nfz2011H/MLAWcLy77+ruV7r79BrEcB1wPqBDkSpcdFGBiy/Oy7UTtVZIO4AMKaQdQEWt7yLaHdtt\nF36uuSast175dWpxz6HdoqvySu95M3x4uEVBsYDtumvoo6l0K4hqFLfV+mZzeVLxOhV3/1zrZWZ2\nlLv/Pamdm9mRwPvu3mxmTUDFw7RBgwbRv39/APr06cOAAQNWXQRYfBP1xPmhQ2HEiAI/+1mY//Wv\nIf4AaYp+Nuo8HTzfSPPNqez//vvh2GPbX3/ddcP8QQc18fjjXd/fgQeG13f0/9Mc3UUsyf/Ha6+F\n+fObOPzwls/36tV2/f32K0TFoXP7e/rpJvbcM8xvv33H+Sg/XwCGRfP9SUVnOmCA8Ul26ABXAm8C\n0wlX6i8Efl9mvfZ71Xqot96KO9zGj69VR17yk3v8+JRT0onhS18KQ81/8kmIZ9Ei93790s9N3qYx\nY9p/furU8LNXL3ezts9vv33I/5NPhvHQ5s2rvK3Zs91HjXK/4YZ42S23uJ9+ehjHrNEUO/S7N+Hu\n9e2o79zKMKFmgcBAdPZXC2l/oLSeRo50/+532y4//vj48frrh9jnz3f/zGfi3+XGG8PZaldcEb3r\n3P2ii9zXWKP9fR5wgPuf/tT5WO+6q3xOFyxIP495mqZNCz9vu63tc8XbBJx0Uiguy5a5/+Uv7jNm\nhOVf+1rId2sff+z+ve+5T58etjNhQth+qUceCWdLSfs3civ+L7nHN3Jzd7/wwnATvjwUlb1qFoiK\nShvl30SjUvlwGTiwZWwjR8bPXXttGFZ98ODwTbM906eH9UqNHx+/rnSfP/6x+/33h+UvvBD201Eu\nDjnE/fnn249hu+3qn7/aT8m+L446yv2VV+K/9ccfh8dNTeHLQpb/Jbt7z6GsmTXLffhw93XXdd92\n25Z/p1LLl7s/+mjLZZkrKsBGwM+B4cD9wC+BLeoeZJbfwTVSrw+P4nTBBe0/f9xxLeMrDgW/cGHX\n7pZYyZlnhu3ecEM1eWmZi0qvaW3OnPCPumiR+6WXhoKWdlH4t3/r7ja6977YYQf3DTcM13ZAaHqp\n5Je/dBWVFHz8cTgarFRUykmjqLR3SvF+hHupGHAXcAewEnjCzPY1s+u62Z0jZSxbBre1e3J1U032\naxbOu6805Msaa7Sc32OPcErnuuvCau2dQ9hJQ4fCiy+Gs2jKefjhcJZN0ATAmDFhLl7evr59YaON\nYJ114PLLocKgz1Xp3Tv8HDgw/Nxvv85v4+23wxXcK1e2v17xiu/ymjq/48guu8D48TBjRuUzqkpd\nckl4n2ZVpVG8826ttWD11eGaa8L/3pIlaUdUQaVqA4wBdi+zfAAwH/hdvSofWf5alLB6fCt+/PHQ\nnPTFL7rffLN7nz6hXdu9ZbPWxRfHj2fPTjcv5YD7JZeEe7hPmRKugu6qp55yP/TQ9vN2+unuAwaE\n/prSb4pLlsTxdHQ3xWJH9XnnhZ+bblo+ntY3VjvnnHgfSU/z5rXc949/HDd3Sb6RwpFKex/kL7Xz\n3CvAanULsocXlfvuC3fiO+aYaj4ERnX7Q6QjK1e6P/NMePzWW2FoiyyqRTPHmmuWz9nOO7dc79ln\n2752/Pi4sJXbRrGPwj2s16dP5TiWLw8drcVmsaLS7Q0Z0v33RVNT13OVVT21+asr0igq7TVcmJl9\nuszCvsByd+/gYF3KMYNhw1ouO+EE2Gor+Otfk9vPHXdAv37Q3BwGzCsaOrS6GPfdNzzu1w8+1+aK\npZ5r6dLyy6NLH1bZa6+26+y+e3xxXLFJrsgdttkmnjcLV05X0qsXPPRQuHPnM8/Ey+NBE0MT4aGH\nhmarUiNHhiaSanSluU6kXZWqDXA68BzhrKz1o6kJeBY4vZ6Vj5weqUyb5v6vf4UB53beOZynX/yG\nuOGGYZ1aNGfssEPbWMD97LPr+/vn2fDh7gceWP3RXb0UBzosFxOEU7dL5yE0ZZ14Ystlxenyy+sT\nt6SDLDV/hXg4CngSmEMYSPJJ4Oi6B5ml/+oqNTd3/OH/k58kX1C+8IXy8SxaFM4ckeqVXqiXJeC+\nzz5tl594ovvMmfH8v/9729iXL3d/4IGw/Jpr3N98s7axSrrSKCpVjVKctjyNUrx8eTg75vbbYc6c\nWuyhQOszfebODWc0AXzhC40z2GShUKj5mT6rrRaX7Kzo2zfcOnrIkHhZuVzMmgUzZ8bjVjWKerwv\n8sIsm/eolw6MGxdOsX3uOdh773RicG852J0kY9y4bBUUCKcfV/O33njjMInUk45UumHx4vCBU825\n/bW0dGkYtfW++8IIqWkVNhHJljSOVFRUuiELRwYZTIuIZEQaRaVL10Kb2R5JB5In6baxF1Y9Wrgw\nrRiyoRb3zcgr5SKmXKSrqwNsnJloFDmyeHHovE1yaJKuWnfdtCMQEWlJzV9VWrQIXnopjNP09a/X\nd9/nnAM33BDuMtd6DC4RkUoy2adSoalrPvCGuy+vSVRtY0i9qJx2WjhNuN5+8YtwinKt7rktIj1X\nVvtUfksYXPIW4FZgNGEo/KlmdlgNY8uEF16AV15Jp6BccQVcemnLgqL24phyEVMuYspFuqopKu8S\nRive092/AOxOuP3vocDVtQwuTWPHwtFHw4ABsN129d33ypXh+oiLL87GGWYiItWqpvnrRXffpdwy\nM2t29wE1jZB0mr/694c33qjrLlfJQTeXiORAVpu/JpvZTWY2MJp+C7xkZmsBGb5VT9eMHRtGfk2j\noJxxhgqKiORbNUVlEPAqcG40TY+WLQMOrFVgadl7b3jssfruc/PN4fTT4be/7XhdtRfHlIuYchFT\nLtJVzdhf/w7c6O7XlnmuR11+V8v+izPPhAsvhE02gbXXjpdPngw77VS7/YqI1FM1fSp3AgcRhr3/\nI/BwvU4lLomh5n0qM2aEG2Ul6fLL4YgjwqiyrW90VSxgau4SkVrJ5HUqAGa2BuGI5Xhgf+BRdz+t\nxrGV7r+mRWXBAujdO9ltbrVVGLV4ww3LP//d78KKFWEQSBGRWshqRz3uvgx4CPgDMA44tpZB1dPh\nhydTUNzD0c6IEfDwwzB9euWCAnD33V0rKGovjikXMeUiplykq8M+FTMrHqE0EUYzvA34Vk2jqpP3\n3gv3807KlluGSUSkUVXTp3IfoS/lIXdfWpeo2saQaPPXhx/CoEHwwAPd2860aXDcceGqe/WNiEjW\nZLL5y91PcPf7iwXFzPY3s6G1D612Jk3qXkEp3pxr221h112Ti0tEJO+q6lMxs93N7BozmwH8EphS\n06hqaOrU7p067N7ylOCbb4b33+9+XNVSe3FMuYgpFzHlIl0V+1TMbDvgBODbwCzCIJLm7rm+4HGH\nHWDw4OrXP+UUOOigcEpwuZtirb12yyIjItLIKvapmNlK4O/AWe7+VrRsurtvndjOzfoBvwc2AVYC\nt7r7kDLWCWcpAAAMJUlEQVTrJdan0tmjlGXLYPVqLhEVEcmYrPWpfB1YDDxpZv9jZgcBSQe3HPix\nu+8MfAk4y8x2SHgfq7z+evXrzpkTmrpUUEREqlexqESd898GdiFcTX8esHE0uGQi91Fx9/fcvTl6\nvBB4GfhsEtsumjQpHJ2YwdadOMbq2zfJKJKj9uKYchFTLmLKRbqqOftrkbvf6+5HA/2ACcCFSQdi\nZv2BAcCzSW1z8mTYc8/Ov+6vf00qAhGRxtKpxh13/5BwB8hbkgzCzNYD/hc4JzpiaWPQoEH0798f\ngD59+jBgwACampqA+JtJcX748ALXXAPPPdcUvboQ/ex4/pprYIMNChQKVNx+mvNNTU2Zikfz2Zkv\nyko8ac0Xl2UlnnrOFwoFhg0bBrDq87Leqhr7q6YBmK1OOCHgIXe/ocI6qzrqP/gANtoovthw5kxY\nujTcVGvcuHCnxq72g4wcCYccorstikjPkLWO+nq5A3ipUkEptWABvPRSPD9nDmy2WRi80Sw0dXWn\nY/3QQ7NfUFp/K21kykVMuYgpF+lK9dwmM9sPOBGYZGYTAAcudveHy62///6h4z28NtlYLr882e2J\niDSi1Ju/qmFm/u67zmab1W4fOUiDiEinZPZ+KmkzMw8HMbWRgxSIiHRao/apSCeovTimXMSUi5hy\nka6GLyoHH5x2BCIiPUfDN3/Nm5f8rYRFRLJAzV91tu22KigiIklq2KKy887wbGIDwtSP2otjykVM\nuYgpF+lq2DF4X3wx7QhERHqe3PapuId7nSxYADNmhNsDv/kmRMPerPLTn8KPfgT9+sGKFRrKXkQa\nh65TqaBYVNxh9mzo0wfWWKPy+i+8ALvtVr/4RESySB317bjyyvBzo43aLyjQswuK2otjykVMuYgp\nF+nKTVG54IK0IxARkY7kpvkrD3GKiGSJmr9ERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJD\nqU9FRERyTUUlZ9ReHFMuYspFTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmo\niIhIYtSnIiLSQ6lPRUREck1FJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPVRD9qmY2RFmNsXMppnZ\nhWnHIyIiXZdqUTGz1YAbgcOBnYETzGyHNGPKOrUXx5SLmHIRUy7SlfaRyl7AK+7+hrsvA/4AHJNy\nTCIi0kWp9qmY2TeAw9399Gj+u8Be7v6jVuupT0VEpJMask9FRER6jtVT3v87wBYl8/2iZW0MGjSI\n/v37A9CnTx8GDBhAU1MTELehNsJ8aXtxFuJJc764LCvxpDnf3NzMueeem5l40py//vrrG/rzYdiw\nYQCrPi/rLe3mr17AVOBgYCYwFjjB3V9utZ6avyKFQmHVm6nRKRcx5SKmXMTSaP5K/ToVMzsCuIHQ\nFHe7u/+6zDoqKiIindSQRaUaKioiIp2njnrpUGl/QqNTLmLKRUy5SJeKioiIJEbNXyIiPZSav0RE\nJNdUVHJG7cUx5SKmXMSUi3SpqIiISGLUpyIi0kOpT0VERHJNRSVn1F4cUy5iykVMuUiXioqIiCRG\nfSoiIj2U+lRERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJDqU9FRERyTUUlZ9ReHFMuYspF\nTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmoiIhIYtSnIiLSQ6lPRUREck1F\nJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPZT6VEREJNdSKypmdrWZvWxmzWY2wsw2SCuWPFF7cUy5\niCkXMeUiXWkeqYwEdnb3AcArwEUpxpIbzc3NaYeQGcpFTLmIKRfpSq2ouPtj7r4ymh0D9EsrljyZ\nN29e2iFkhnIRUy5iykW6stKn8v+Ah9IOQkREumf1Wm7czB4FNildBDhwibv/LVrnEmCZu99by1h6\nihkzZqQdQmYoFzHlIqZcpCvVU4rNbBDwfeAgd1/azno6n1hEpAvqfUpxTY9U2mNmRwDnA19ur6BA\n/ZMiIiJdk9qRipm9AqwJzIkWjXH3H6QSjIiIJCIXV9SLiEg+ZOXsr7LM7Agzm2Jm08zswrTj6Swz\nu93M3jeziSXLPm1mI81sqpk9Yma9S567yMxeiS4KPaxk+R5mNjHKw/Uly9c0sz9ErxltZluUPHdK\ntP5UMzu5ZHl/MxsTPXefmdW8CdTM+pnZ42Y22cwmmdmPGjgXa5nZs2Y2IcrHlY2ai5J9r2Zm483s\ngWi+IXNhZjPM7IXovTE2Wpa/XLh7JidCwXsV2BJYA2gGdkg7rk7+DvsDA4CJJcuuAi6IHl8I/Dp6\nvBMwgdDP1T/63YtHks8CX4we/wM4PHp8JvDb6PHxwB+ix58GXgN6A32Kj6Pn/ggcFz2+CfiPOuTh\nM8CA6PF6wFRgh0bMRbSvdaKfvQjXaO3XqLmI9ncecDfwQKP+j0T7mg58utWy3OWiLm+aLiZ4H+Ch\nkvnBwIVpx9WF32NLWhaVKcAm0ePPAFPK/X6E63b2jtZ5qWT5t4GboscPA3tHj3sBs1qvU/JmOD56\nPBtYrSTHD6eQk/uBQxo9F8A6wNjoA6Ihc0G46PlRoIm4qDRqLl4HNmy1LHe5yHLz12eBt0rm346W\n5d3G7v4+gLu/B2wcLW/9+74TLfss4XcvKs3Dqte4+wpgvpn1rbQtM9sQ+NDjkQzeBjZL6Peqipn1\nJxy9jSH8szRcLqLmngnAe0DB3V+iQXMBXEc4C7S0c7dRc+HAo2b2nJmdFi3LXS5SO6VYVknyTIlq\nTr1O7fRsM1sP+F/gHHdfaG2vP2qIXET/pLtbGET1ETNrou3v3uNzYWZHAu+7e3OUg0p6fC4i+7n7\nTDPbCBhpZlPJ4fsiy0cq7wBblMz3i5bl3ftmtgmAmX0GmBUtfwfYvGS94u9baXmL15hZL2ADd59L\nhdy5+xygt5mtVmZbNRV18P0vcJe7/zVa3JC5KHL3BYQ27z1pzFzsB3zVzKYD9wEHmdldwHsNmAvc\nfWb0czahiXgv8vi+qEdbYRfbF3sRd9SvSeio3zHtuLrwe/QHJpXMX0XUFkr5jrc1ga1o2fE2JnqD\nGeFD6Iho+Q+IO96+TfmOt+LjPtFzfyRuL70JOKNOefg98N+tljVcLoB/I+4EXRt4Eji4EXPRKi8D\niftUrm60XBD619aLHq8LPAMclsf3Rd3eNF1M9BGEM4VeAQanHU8X4r8XeBdYCrwJnBr90R6Lfq+R\nxT9etP5F0ZvjZeCwkuVfACZFebihZPlawJ+i5WOA/iXPDYqWTwNOLlm+FeHskGnRG2aNOuRhP2AF\n4YvBBGB89Lft24C5+Hz0+08AXgB+Gi1vuFy0yktpUWm4XET7LP5/TCL6vMtjLnTxo4iIJCbLfSoi\nIpIzKioiIpIYFRUREUmMioqIiCRGRUVERBKjoiIiIolRUZGGYmYfRT+3NLMTEt72Ra3mn05y+yJ5\noKIijaZ4YdZWwHc688JoaIv2XNxiR+77d2b7Ij2Bioo0ql8B+0c3hzonGjn4ags30Go2s+8DmNlA\nM3vSzP4KTI6W/SUaSXZScTRZM/sVsHa0vbuiZR8Vd2Zm10Trv2Bm3yrZ9igzGx7daOmuOudAJHEa\npVga1WDgJ+7+VYCoiMxz973NbE3gGTMbGa27O7Czu78ZzZ/q7vPM7FPAc2Y2wt0vMrOz3H2Pkn14\ntO1vALu6++fNbOPoNU9E6wwgjOP0XrTPfd39X7X8xUVqSUcqIsFhwMnRfU6eJYy5tG303NiSggJw\nrpk1E8ZP6leyXiX7EUbhxd1nAQXgiyXbnulhvKRmwgCkIrmlIxWRwICz3f3RFgvNBgKLWs0fRLiD\n3lIzGwV8qmQb1e6raGnJ4xXof1JyTkcq0miKH+gfAeuXLH8E+EF03xfMbFszW6fM63sT7oa31Mx2\nINxiteiT4utb7esp4Pio32Yj4ADCbYRFehx9K5JGUzz7ayKwMmruGubuN0S3Oh5vZka4GdKxZV7/\nMHCGmU0mDEc+uuS5W4CJZjbO3U8q7svd/2Jm+xCGul8JnO/us8xsxwqxieSWhr4XEZHEqPlLREQS\no6IiIiKJUVEREZHEqKiIiEhiVFRERCQxKioiIpIYFRUREUmMioqIiCTm/wDm731G+4AB2QAAAABJ\nRU5ErkJggg==\n",
365 |       "text/plain": [
366 |        "<matplotlib.figure.Figure at 0x1190c8350>"
367 |       ]
368 |      },
369 |      "metadata": {},
370 |      "output_type": "display_data"
371 |     }
372 |    ],
373 |    "source": [
374 |     "plt.plot(train_stats['Iteration'], train_stats['Average Q-Value'])\n",
375 |     "plt.xlabel(\"Iteration\")\n",
376 |     "plt.ylabel(\"Avg. Q-Values\")\n",
377 |     "plt.grid(True)\n",
378 |     "#plt.savefig(settings['save_dir'] + '_' + \"training_q_values.svg\", bbox_inches='tight')\n",
379 |     "plt.show()\n",
380 |     "plt.close()"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "# Evaluating the best policy"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "Let's load the network that collected the highest reward per game episode"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": 15,
400 |    "metadata": {
401 |     "collapsed": true
402 |    },
403 |    "outputs": [],
404 |    "source": [
405 |     "best_iteration_index = np.argmax(eval_stats['Reward per Episode'])\n",
406 |     "best_iteration = str(int(eval_stats['Iteration'][best_iteration_index]))"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": 16,
412 |    "metadata": {
413 |     "collapsed": false
414 |    },
415 |    "outputs": [
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "'4500000'"
420 |       ]
421 |      },
422 |      "execution_count": 16,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "best_iteration "
429 |    ]
430 |   },
431 |   {
432 |    "cell_type": "code",
433 |    "execution_count": 17,
434 |    "metadata": {
435 |     "collapsed": true
436 |    },
437 |    "outputs": [],
438 |    "source": [
439 |     "agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p')"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": 18,
445 |    "metadata": {
446 |     "collapsed": false
447 |    },
448 |    "outputs": [],
449 |    "source": [
450 |     "r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True)"
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": 19,
456 |    "metadata": {
457 |     "collapsed": false
458 |    },
459 |    "outputs": [
460 |     {
461 |      "data": {
462 |       "text/plain": [
463 |        "17.6"
464 |       ]
465 |      },
466 |      "execution_count": 19,
467 |      "metadata": {},
468 |      "output_type": "execute_result"
469 |     }
470 |    ],
471 |    "source": [
472 |     "r_per_episode"
473 |    ]
474 |   },
475 |   {
476 |    "cell_type": "code",
477 |    "execution_count": null,
478 |    "metadata": {
479 |     "collapsed": true
480 |    },
481 |    "outputs": [],
482 |    "source": []
483 |   }
484 |  ],
485 |  "metadata": {
486 |   "kernelspec": {
487 |    "display_name": "Python 2",
488 |    "language": "python",
489 |    "name": "python2"
490 |   },
491 |   "language_info": {
492 |    "codemirror_mode": {
493 |     "name": "ipython",
494 |     "version": 2
495 |    },
496 |    "file_extension": ".py",
497 |    "mimetype": "text/x-python",
498 |    "name": "python",
499 |    "nbconvert_exporter": "python",
500 |    "pygments_lexer": "ipython2",
501 |    "version": "2.7.11"
502 |   }
503 |  },
504 |  "nbformat": 4,
505 |  "nbformat_minor": 0
506 | }
507 | 


--------------------------------------------------------------------------------
/examples/mountain_car_test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a place holder for real unit testing.
  3 | Right now we just overfit a simple control problem:
  4 |     - the agent tries to get to the top right corner (1,1) of a 2D map
  5 |     - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
  6 |     - action 1 is optimal for all states
  7 | """
  8 | 
  9 | from chimp.learners.chainer_backend import ChainerBackend
 10 | from chimp.learners.dqn_learner import DQNLearner
 11 | from chimp.utils.policies import DQNPolicy
 12 | 
 13 | from chimp.simulators.mdp.mountain_car import MountainCar
 14 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
 15 | 
 16 | import numpy as np
 17 | import pickle
 18 | import pylab as p
 19 | 
 20 | import chainer
 21 | import chainer.functions as F
 22 | import chainer.links as L
 23 | from chainer import Chain
 24 | 
 25 | settings = {
 26 | 
 27 |     # agent settings
 28 |     'batch_size' : 32,
 29 |     'print_every' : 1000,
 30 |     'save_dir' : 'results',
 31 |     'iterations' : 2000000,
 32 |     'eval_iterations' : 100,
 33 |     'eval_every' : 1000,
 34 |     'save_every' : 20000,
 35 |     'initial_exploration' : 50000,
 36 |     'epsilon_decay' : 0.000001, # subtract from epsilon every step
 37 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 38 |     'epsilon' : 1.0,  # Initial exploratoin rate
 39 |     'learn_freq' : 1,
 40 |     'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 41 |     'model_dims' : (1,2),
 42 | 
 43 |     # simulator settings
 44 |     'viz' : False,
 45 | 
 46 |     # replay memory settings
 47 |     'memory_size' : 100000,  # size of replay memory
 48 |     'n_frames' : 1,  # number of frames
 49 | 
 50 |     # learner settings
 51 |     'learning_rate' : 0.00001,
 52 |     'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
 53 |     'discount' : 0.95, # discount rate for RL
 54 |     'clip_err' : False, # value to clip loss gradients to
 55 |     'clip_reward' : False, # value to clip reward values to
 56 |     'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations
 57 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 58 |     'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 59 |     'gpu' : False,
 60 |     'reward_rescale': False,
 61 | 
 62 |     # general
 63 |     'seed_general' : 1723,
 64 |     'seed_simulator' : 5632,
 65 |     'seed_agent' : 9826,
 66 |     'seed_memory' : 7563
 67 | 
 68 |     }
 69 | 
 70 | class TestNet(Chain):
 71 | 
 72 |     def __init__(self):
 73 |         super(TestNet, self).__init__(
 74 |             l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
 75 |             l2=F.Linear(20, 10, bias=0.0),
 76 |             bn1=L.BatchNormalization(10),
 77 |             l3=F.Linear(10, 10),
 78 |             l4=F.Linear(10, 10),
 79 |             bn2=L.BatchNormalization(10),
 80 |             lout=F.Linear(10, simulator.n_actions)
 81 |         )
 82 |         self.train = True
 83 |         # initialize avg_var to prevent divide by zero
 84 |         self.bn1.avg_var.fill(0.1),
 85 |         self.bn2.avg_var.fill(0.1),
 86 | 
 87 |     def __call__(self, ohist, ahist):
 88 |         h = F.relu(self.l1(ohist))
 89 |         h = F.relu(self.l2(h))
 90 |         h = self.bn1(h, test=not self.train)
 91 |         h = F.relu(self.l3(h))
 92 |         h = F.relu(self.l4(h))
 93 |         h = self.bn2(h, test=not self.train)
 94 |         output = self.lout(h)
 95 |         return output
 96 | 
 97 | 
 98 | def car_sim(nsteps, simulator, policy, verbose=False):
 99 |     mdp = simulator.model
100 | 
101 |     # re-initialize the model
102 |     simulator.reset_episode()
103 | 
104 |     rtot = 0.0
105 |     xpos = np.zeros(nsteps)
106 |     vel = np.zeros(nsteps)
107 |     # run the simulation
108 |     input_state = np.zeros((1,2), dtype=np.float32)
109 |     for i in xrange(nsteps):
110 |         state = simulator.get_screenshot()
111 |         input_state[0] = state
112 |         a = policy.action((input_state,None))
113 |         simulator.act(a)
114 |         r = simulator.reward()
115 |         rtot += r
116 |         xpos[i], vel[i] = state
117 |         if simulator.episode_over():
118 |             break
119 |     return rtot, xpos, vel
120 | 
121 | 
122 | mdp = MountainCar()
123 | simulator = MDPSimulator(mdp)
124 | 
125 | net = pickle.load(open("../chimp/pre_trained_nets/mountain_car.net", "rb")) 
126 | backend = ChainerBackend(settings)
127 | backend.set_net(net)
128 | learner = DQNLearner(settings, backend)
129 | 
130 | policy = DQNPolicy(learner)
131 | 
132 | r, xtrace, vtrace = car_sim(300, simulator, policy, verbose=True)
133 | 
134 | p.plot(xtrace); p.plot(10.0*vtrace)
135 | p.show()
136 | 


--------------------------------------------------------------------------------
/examples/run_atari.py:
--------------------------------------------------------------------------------
  1 | # be sure to have run ' python setup.py ' from chimp directory
  2 | 
  3 | 
  4 | # # Training DeepMind's Atari DQN with Chimp
  5 | 
  6 | # First, we load all the Chimp modules.
  7 | 
  8 | from chimp.memories import ReplayMemoryHDF5
  9 | 
 10 | from chimp.learners.dqn_learner import DQNLearner
 11 | from chimp.learners.chainer_backend import ChainerBackend
 12 | 
 13 | from chimp.simulators.atari import AtariSimulator
 14 | 
 15 | from chimp.agents import DQNAgent
 16 | 
 17 | 
 18 | # Then we load Python packages.
 19 | 
 20 | import matplotlib.pyplot as plt
 21 | 
 22 | import numpy as np
 23 | import chainer
 24 | import chainer.functions as F
 25 | import chainer.links as L
 26 | from chainer import Chain
 27 | import os
 28 | 
 29 | import pandas as ps
 30 | 
 31 | 
 32 | # Finally, we set training parameters in a params dictionary that will be passed to the modules.
 33 | 
 34 | # Define training settings
 35 | 
 36 | settings = {
 37 | 
 38 |     # agent settings
 39 |     'batch_size' : 32,
 40 |     'print_every' : 10000,
 41 |     'save_dir' : './results_atari',
 42 |     'iterations' : 5000000,
 43 |     'eval_iterations' : 5000,
 44 |     'eval_every' : 50000,
 45 |     'save_every' : 50000,
 46 |     'initial_exploration' : 50000,
 47 |     'epsilon_decay' : 0.000005, # subtract from epsilon every step
 48 |     'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions
 49 |     'epsilon' : 1.0,  # Initial exploratoin rate
 50 |     'learn_freq' : 4,
 51 |     'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 52 |     'model_dims' : (84,84),
 53 |     
 54 |     # Atari settings
 55 |     'rom' : "Breakout.bin",
 56 |     'rom_dir' :  './roms',
 57 |     'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square
 58 |     'action_history' : True,
 59 | 
 60 |     # simulator settings
 61 |     'viz' : True,
 62 |     'viz_cropped' : False,
 63 | 
 64 |     # replay memory settings
 65 |     'memory_size' : 500000,  # size of replay memory
 66 |     'frame_skip' : 4,  # number of frames to skip
 67 | 
 68 |     # learner settings
 69 |     'learning_rate' : 0.00025, 
 70 |     'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used
 71 |     'discount' : 0.99, # discount rate for RL
 72 |     'clip_err' : False, # value to clip loss gradients to
 73 |     'clip_reward' : 1, # value to clip reward values to
 74 |     'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations
 75 |     'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 76 |     'gpu' : True, # NO GPU FOR THIS EXAMPLE 
 77 |     'reward_rescale': False,
 78 | 
 79 |     # general
 80 |     'seed_general' : 1723,
 81 |     'seed_simulator' : 5632,
 82 |     'seed_agent' : 9826,
 83 |     'seed_memory' : 7563
 84 | 
 85 |     }
 86 | 
 87 | 
 88 | # set random seed
 89 | np.random.seed(settings["seed_general"])
 90 | 
 91 | 
 92 | # initialize the simulator
 93 | 
 94 | simulator = AtariSimulator(settings)
 95 | 
 96 | # Define the network
 97 | class Convolution(Chain):
 98 | 
 99 |     def __init__(self):
100 |         super(Convolution, self).__init__(
101 |             l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
102 |             l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
103 |             l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
104 |             l4=F.Linear(3136, 512, wscale = np.sqrt(2)),
105 |             l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)),
106 |         )
107 | 
108 |     def __call__(self, ohist, ahist):
109 |         if len(ohist.data.shape) < 4:
110 |             ohist = F.reshape(ohist,(1,4,84,84))
111 |         h1 = F.relu(self.l1(ohist/255.0))
112 |         h2 = F.relu(self.l2(h1))
113 |         h3 = F.relu(self.l3(h2))
114 |         h4 = F.relu(self.l4(h3))
115 |         output = self.l5(h4)
116 |         return output
117 | 
118 | net = Convolution()
119 | 
120 | 
121 | # initialize the learner + chainer backend, replay memory, and agent modules
122 | 
123 | backend = ChainerBackend(settings)
124 | backend.set_net(net)
125 | learner = DQNLearner(settings, backend)
126 | 
127 | memory = ReplayMemoryHDF5(settings)
128 | 
129 | agent = DQNAgent(learner, memory, simulator, settings)
130 | 
131 | # launch training
132 | 
133 | agent.train()
134 | 
135 | 
136 | # Visualizing results
137 | 
138 | train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None)
139 | train_stats.columns = ['Iteration','MSE Loss','Average Q-Value']
140 | 
141 | eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None)
142 | eval_stats.columns = ['Iteration','Total Reward','Reward per Episode']
143 | 
144 | 
145 | plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode'])
146 | plt.xlabel("Iteration")
147 | plt.ylabel("Avg. Reward per Episode")
148 | plt.grid(True)
149 | plt.savefig(settings['save_dir'] + '_' + "evaluation_reward.svg", bbox_inches='tight')
150 | #plt.show()
151 | plt.close()
152 | 
153 | 
154 | plt.plot(train_stats['Iteration'], train_stats['Average Q-Value'])
155 | plt.xlabel("Iteration")
156 | plt.ylabel("Avg. Q-Values")
157 | plt.grid(True)
158 | plt.savefig(settings['save_dir'] + '_' + "training_q_values.svg", bbox_inches='tight')
159 | #plt.show()
160 | plt.close()
161 | 
162 | 
163 | # Evaluating the best policy
164 | 
165 | # load the network that collected the highest reward per game episode
166 | 
167 | best_iteration_index = np.argmax(eval_stats['Reward per Episode'])
168 | best_iteration = str(int(eval_stats['Iteration'][best_iteration_index]))
169 | 
170 | agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p')
171 | 
172 | 
173 | # evaluate policy performance
174 | 
175 | r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True)
176 | 
177 | r_per_episode
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/examples/run_cartpole.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a place holder for real unit testing.
  3 | Right now we just overfit a simple control problem:
  4 |     - the agent tries to get to the top right corner (1,1) of a 2D map
  5 |     - action 0 takes it towards (0,0), action 1 takes it toward (1,1)
  6 |     - action 1 is optimal for all states
  7 | """
  8 | 
  9 | # Memory
 10 | from chimp.memories import ReplayMemoryHDF5
 11 | 
 12 | # Learner (Brain)
 13 | from chimp.learners.dqn_learner import DQNLearner
 14 | from chimp.learners.chainer_backend import ChainerBackend
 15 | 
 16 | # Agent Framework
 17 | from chimp.agents import DQNAgent
 18 | 
 19 | # Simulator
 20 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
 21 | from chimp.simulators.mdp.cart_pole import CartPole 
 22 | 
 23 | # Rollout Policy
 24 | from chimp.utils.policies import RandomPolicy
 25 | 
 26 | import numpy as np
 27 | import pickle
 28 | import pylab as p
 29 | 
 30 | import chainer
 31 | import chainer.functions as F
 32 | import chainer.links as L
 33 | from chainer import Chain
 34 | 
 35 | settings = {
 36 | 
 37 |     # agent settings
 38 |     'batch_size' : 32,
 39 |     'print_every' : 1000,
 40 |     'save_dir' : 'results/cartpole-1',
 41 |     'iterations' : 10000,
 42 |     'eval_iterations' : 200,
 43 |     'eval_every' : 1000,
 44 |     'save_every' : 1000,
 45 |     'initial_exploration' : 10000,
 46 |     'epsilon_decay' : 0.000005, # subtract from epsilon every step
 47 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 48 |     'epsilon' : 1.0,  # Initial exploratoin rate
 49 |     'learn_freq' : 1,
 50 |     'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 51 |     'model_dims' : (1,4),
 52 | 
 53 |     # simulator settings
 54 |     'viz' : False,
 55 | 
 56 |     # replay memory settings
 57 |     'memory_size' : 10000,  # size of replay memory
 58 |     'n_frames' : 1,  # number of frames
 59 | 
 60 |     # learner settings
 61 |     'learning_rate' : 0.00001,
 62 |     'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used
 63 |     'discount' : 0.99, # discount rate for RL
 64 |     'clip_err' : False, # value to clip loss gradients to
 65 |     'clip_reward' : False, # value to clip reward values to
 66 |     'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations
 67 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 68 |     'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 69 |     'gpu' : False,
 70 |     'reward_rescale': False,
 71 | 
 72 |     # general
 73 |     'seed_general' : 1723,
 74 |     'seed_simulator' : 5632,
 75 |     'seed_agent' : 9826,
 76 |     'seed_memory' : 7563
 77 | 
 78 |     }
 79 | 
 80 | mdp = CartPole()
 81 | simulator = MDPSimulator(mdp)
 82 | 
 83 | class CartNet(Chain):
 84 | 
 85 |     def __init__(self):
 86 |         super(CartNet, self).__init__(
 87 |             l1=F.Linear(4, 20, bias=0.0),
 88 |             l2=F.Linear(20, 10, bias=0.0),
 89 |             bn1=L.BatchNormalization(10),
 90 |             l3=F.Linear(10, 10),
 91 |             l4=F.Linear(10, 10),
 92 |             bn2=L.BatchNormalization(10),
 93 |             lout=F.Linear(10, simulator.n_actions)
 94 |         )
 95 |         self.train = True
 96 |         # initialize avg_var to prevent divide by zero
 97 |         self.bn1.avg_var.fill(0.1),
 98 |         self.bn2.avg_var.fill(0.1),
 99 | 
100 |     def __call__(self, ohist, ahist):
101 |         h = F.relu(self.l1(ohist))
102 |         h = F.relu(self.l2(h))
103 |         h = self.bn1(h, test=not self.train)
104 |         h = F.relu(self.l3(h))
105 |         h = F.relu(self.l4(h))
106 |         h = self.bn2(h, test=not self.train)
107 |         output = self.lout(h)
108 |         return output
109 | 
110 | 
111 | def pole_sim(nsteps, simulator, policy, verbose=False):
112 |     mdp = simulator.model
113 | 
114 |     # re-initialize the model
115 |     simulator.reset_episode()
116 | 
117 |     rtot = 0.0
118 |     xpos = np.zeros(nsteps)
119 |     thetas = np.zeros(nsteps)
120 |     # run the simulation
121 |     input_state = np.zeros((1,4), dtype=np.float32)
122 |     for i in xrange(nsteps):
123 |         state = simulator.get_screenshot()
124 |         input_state[0] = state
125 |         #a = policy.action((input_state,None))
126 |         a = policy.action(state)
127 |         simulator.act(a)
128 |         r = simulator.reward()
129 |         rtot += r
130 |         xpos[i], thetas[i] = state[0], state[2]
131 |         print state, r
132 |         if simulator.episode_over():
133 |             break
134 |     return rtot, xpos, thetas
135 | 
136 | 
137 | class PoleCartHeuristic():
138 | 
139 |     def __inti__(self):
140 |         self.a = 0
141 | 
142 |     def action(self, state):
143 |         if state[2] > 0:
144 |             return 1
145 |         else:
146 |             return 0
147 | 
148 | 
149 | net = CartNet()
150 | 
151 | # Initialize Learner with a Chainer backend
152 | backend = ChainerBackend(settings)
153 | backend.set_net(net)
154 | learner = DQNLearner(settings, backend)
155 | 
156 | # Initialize memory
157 | memory = ReplayMemoryHDF5(settings)
158 | 
159 | # Initialize Agent Framework
160 | agent = DQNAgent(learner, memory, simulator, settings)
161 | 
162 | # Start training
163 | agent.train(verbose=True)
164 | 
165 | #policy = RandomPolicy(simulator.n_actions)
166 | #policy = PoleCartHeuristic()
167 | 
168 | #r, xs, ts = pole_sim(100, simulator, policy, verbose=True)
169 | 
170 | #p.plot(xs); p.plot(10.0*ts)
171 | #p.show()
172 | 


--------------------------------------------------------------------------------
/examples/run_mountain_car.py:
--------------------------------------------------------------------------------
  1 | """
  2 | File to initialize training.
  3 | Contains settings, network definition for Chainer.
  4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training.
  5 | """
  6 | 
  7 | import numpy as np
  8 | 
  9 | import chainer
 10 | import chainer.functions as F
 11 | import chainer.links as L
 12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils
 13 | from chainer import Link, Chain, ChainList
 14 | 
 15 | # Memory
 16 | from chimp.memories import ReplayMemoryHDF5
 17 | 
 18 | # Learner (Brain)
 19 | from chimp.learners.dqn_learner import DQNLearner
 20 | from chimp.learners.chainer_backend import ChainerBackend
 21 | 
 22 | # Agent Framework
 23 | from chimp.agents import DQNAgent
 24 | 
 25 | # Simulator
 26 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator
 27 | from chimp.simulators.mdp.mountain_car import MountainCar 
 28 | 
 29 | # Rollout Policy
 30 | from chimp.utils.policies import RandomPolicy
 31 | 
 32 | 
 33 | settings = {
 34 | 
 35 |     # agent settings
 36 |     'batch_size' : 32,
 37 |     'print_every' : 1000,
 38 |     'save_dir' : 'results/mountain_car',
 39 |     'iterations' : 200000,
 40 |     'eval_iterations' : 100,
 41 |     'eval_every' : 1000,
 42 |     'save_every' : 20000,
 43 |     'initial_exploration' : 50000,
 44 |     'epsilon_decay' : 0.000001, # subtract from epsilon every step
 45 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 46 |     'epsilon' : 1.0,  # Initial exploratoin rate
 47 |     'learn_freq' : 1,
 48 |     'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r)
 49 |     'model_dims' : (1,2), 
 50 | 
 51 |     # simulator settings
 52 |     'viz' : False,
 53 | 
 54 |     # replay memory settings
 55 |     'memory_size' : 100000,  # size of replay memory
 56 |     'n_frames' : 1,  # number of frames
 57 | 
 58 |     # learner settings
 59 |     'learning_rate' : 0.00001, 
 60 |     'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
 61 |     'discount' : 0.95, # discount rate for RL
 62 |     'clip_err' : False, # value to clip loss gradients to
 63 |     'clip_reward' : False, # value to clip reward values to
 64 |     'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
 65 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 66 |     'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"'
 67 |     'gpu' : False,
 68 |     'reward_rescale': False,
 69 | 
 70 |     # general
 71 |     'seed_general' : 1723,
 72 |     'seed_simulator' : 5632,
 73 |     'seed_agent' : 9826,
 74 |     'seed_memory' : 7563
 75 | 
 76 |     }
 77 | 
 78 | mdp = MountainCar()
 79 | simulator = MDPSimulator(mdp)
 80 | 
 81 | class CarNet(Chain):
 82 | 
 83 |     def __init__(self):
 84 |         super(CarNet, self).__init__(
 85 |             l1=F.Linear(settings['model_dims'][1], 20, bias=0.0),
 86 |             l2=F.Linear(20, 10, bias=0.0),
 87 |             bn1=L.BatchNormalization(10),
 88 |             l3=F.Linear(10, 10),
 89 |             l4=F.Linear(10, 10),
 90 |             bn2=L.BatchNormalization(10),
 91 |             lout=F.Linear(10, simulator.n_actions)
 92 |         )
 93 |         self.train = True
 94 |         # initialize avg_var to prevent divide by zero
 95 |         self.bn1.avg_var.fill(0.1),
 96 |         self.bn2.avg_var.fill(0.1),
 97 | 
 98 | 
 99 |     def __call__(self, ohist, ahist):
100 |         h = F.relu(self.l1(ohist))
101 |         h = F.relu(self.l2(h))
102 |         h = self.bn1(h, test=not self.train)
103 |         h = F.relu(self.l3(h))
104 |         h = F.relu(self.l4(h))
105 |         h = self.bn2(h, test=not self.train)
106 |         output = self.lout(h)
107 |         return output
108 | 
109 | 
110 | net = CarNet()
111 | 
112 | # Initialize Learner with a Chainer backend
113 | backend = ChainerBackend(settings)
114 | backend.set_net(net)
115 | learner = DQNLearner(settings, backend)
116 | 
117 | # Initialize memory
118 | memory = ReplayMemoryHDF5(settings)
119 | 
120 | # Initialize Agent Framework
121 | agent = DQNAgent(learner, memory, simulator, settings)
122 | 
123 | # Start training
124 | agent.train(verbose=True)
125 | 


--------------------------------------------------------------------------------
/examples/run_tiger.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | File to initialize training.
  3 | Contains settings, network definition for Chainer.
  4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training.
  5 | '''
  6 | 
  7 | import numpy as np
  8 | 
  9 | import chainer
 10 | import chainer.functions as F
 11 | import chainer.links as L
 12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils
 13 | from chainer import Link, Chain, ChainList
 14 | from memories import ReplayMemoryHDF5
 15 | 
 16 | from learners import Learner
 17 | from agents import DQNAgent
 18 | 
 19 | from simulators.pomdp import POMDPSimulator
 20 | from simulators.pomdp import TigerPOMDP
 21 | 
 22 | print('Setting training parameters...')
 23 | # Set training settings
 24 | settings = {
 25 |     # agent settings
 26 |     'batch_size' : 32,
 27 |     'print_every' : 5000,
 28 |     'save_dir' : 'results/nets_tiger_observation',
 29 |     'iterations' : 500000,
 30 |     'eval_iterations' : 5000,
 31 |     'eval_every' : 5000,
 32 |     'save_every' : 5000,
 33 |     'initial_exploration' : 10000,
 34 |     'epsilon_decay' : 0.0001, # subtract from epsilon every step
 35 |     'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions
 36 |     'epsilon' : 1.0,  # Initial exploratoin rate
 37 |     'model_dims': (1,1),
 38 |     'learn_freq' : 1,
 39 | 
 40 |     # simulator settings
 41 |     'viz' : False,
 42 | 
 43 |     # replay memory settings
 44 |     'memory_size' : 100000,  # size of replay memory
 45 |     'n_frames' : 5,  # number of frames
 46 | 
 47 |     # learner settings
 48 |     'learning_rate' : 0.001, 
 49 |     'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used
 50 |     'discount' : 0.95, # discount rate for RL
 51 |     'clip_err' : False, # value to clip loss gradients to
 52 |     'clip_reward' : False, # value to clip reward values to
 53 |     'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations
 54 |     'double_DQN' : False, # use Double DQN (based on Deep Mind paper)
 55 |     'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA" and "SGD"'
 56 |     'gpu' : False,
 57 |     'reward_rescale': False,
 58 | 
 59 |     # general
 60 |     'seed_general' : 1723,
 61 |     'seed_simulator' : 5632,
 62 |     'seed_agent' : 9826,
 63 |     'seed_memory' : 7563
 64 | 
 65 |     }
 66 | 
 67 | print(settings)
 68 | 
 69 | np.random.seed(settings["seed_general"])
 70 | 
 71 | print('Setting up simulator...')
 72 | pomdp = TigerPOMDP( seed=settings['seed_simulator'] )
 73 | simulator = POMDPSimulator(pomdp, robs=True)
 74 | 
 75 | settings['model_dims'] = simulator.model_dims
 76 | 
 77 | print('Initializing replay memory...')
 78 | memory = ReplayMemoryHDF5(settings)
 79 | 
 80 | print('Setting up networks...')
 81 | 
 82 | class Linear(Chain):
 83 | 
 84 |     def __init__(self):
 85 |         super(Linear, self).__init__(
 86 |             l1=F.Bilinear(settings["n_frames"], settings["n_frames"], 200),
 87 |             l2=F.Linear(200, 100, wscale=np.sqrt(2)),
 88 |             l3=F.Linear(100, 100, wscale=np.sqrt(2)),
 89 |             l4=F.Linear(100, 50, wscale=np.sqrt(2)),
 90 |             l5=F.Linear(50, simulator.n_actions, wscale = np.sqrt(2))
 91 |         )
 92 | 
 93 |     def __call__(self, s, action_history):
 94 |         h1 = F.relu(self.l1(s,action_history))
 95 |         h2 = F.relu(self.l2(h1))
 96 |         h3 = F.relu(self.l3(h2))    
 97 |         h4 = F.relu(self.l4(h3))
 98 |         output = self.l5(h4)
 99 |         return output
100 | 
101 | net = Linear()
102 | 
103 | print('Initializing the learner...')
104 | learner = Learner(settings)
105 | learner.load_net(net)
106 | 
107 | print('Initializing the agent framework...')
108 | agent = DQNAgent(settings)
109 | 
110 | print('Training...')
111 | agent.train(learner, memory, simulator)
112 | 
113 | print('Loading the net...')
114 | learner = agent.load(settings['save_dir']+'/learner_final.p')
115 | 
116 | ind_max = learner.val_rewards.index(max(learner.val_rewards))
117 | ind_net = settings['initial_exploration'] + ind_max * settings['eval_every']
118 | agent.load_net(learner,settings['save_dir']+'/net_%d.p' % int(ind_net))
119 | 
120 | np.random.seed(settings["seed_general"])
121 | 
122 | print('Evaluating DQN agent...')
123 | print('(reward, MSE loss, mean Q-value, episodes - NA, time)')
124 | reward, MSE_loss, mean_Q_value, episodes, time, paths, actions, rewards = agent.evaluate(learner, simulator, 50000)
125 | print(reward, MSE_loss, mean_Q_value, episodes, time)
126 | 


--------------------------------------------------------------------------------
/logos/chimp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/chimp.png


--------------------------------------------------------------------------------
/logos/monkey_text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/monkey_text.png


--------------------------------------------------------------------------------
/roms/README.md:
--------------------------------------------------------------------------------
1 | # Put roms here


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import numpy
 4 | 
 5 | """
 6 | This script creates a symbolic link to the chimp source code in your python's site-packages directory
 7 | """
 8 | 
 9 | np_path = numpy.__file__
10 | source_path = os.path.dirname(os.path.realpath("setup.py")) + "/chimp"
11 | 
12 | np_split = np_path.split("/")
13 | target_path = '/'.join(np_split[:-2]) + "/chimp"
14 | 
15 | # symlink to the site packages dir
16 | cmd = "ln -s " + source_path + " " + target_path
17 | 
18 | subprocess.call([cmd], shell=True)
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------