├── .gitignore ├── LICENSE ├── README.md ├── chimp ├── __init__.py ├── agents │ ├── __init__.py │ ├── agent_test.py │ └── dqn_agent.py ├── learners │ ├── __init__.py │ ├── chainer_backend.py │ ├── chainer_test.py │ └── dqn_learner.py ├── memories │ ├── __init__.py │ ├── mem_test.py │ ├── memory.py │ └── replay_memory.py ├── pre_trained_nets │ └── mountain_car.net ├── simulators │ ├── __init__.py │ ├── atari │ │ ├── __init__.py │ │ └── atari.py │ ├── gym │ │ ├── __init__.py │ │ └── gym_wrapper.py │ ├── mdp │ │ ├── __init__.py │ │ ├── cart_pole.py │ │ ├── mdp_simulator.py │ │ └── mountain_car.py │ └── pomdp │ │ ├── __init__.py │ │ ├── models │ │ ├── __init__.py │ │ ├── rock_sample.py │ │ ├── rock_test.py │ │ ├── simulator.py │ │ ├── tiger.py │ │ └── tools │ │ │ ├── __init__.py │ │ │ ├── belief.py │ │ │ ├── belief_momdp.py │ │ │ └── distributions.py │ │ └── sim_loop.py └── utils │ ├── __init__.py │ ├── distributions.py │ └── policies.py ├── examples ├── atari_tutorial.ipynb ├── mountain_car.ipynb ├── mountain_car_test.py ├── run_atari.py ├── run_cartpole.py ├── run_mountain_car.py └── run_tiger.py ├── logos ├── chimp.png └── monkey_text.png ├── roms └── README.md └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | *.pyc 10 | 11 | # Packages # 12 | ############ 13 | *.7z 14 | *.dmg 15 | *.gz 16 | *.iso 17 | *.jar 18 | *.rar 19 | *.tar 20 | *.zip 21 | 22 | # Logs and databases # 23 | ###################### 24 | *.log 25 | *.sql 26 | *.sqlite 27 | *.hdf5 28 | 29 | # OS generated files # 30 | ###################### 31 | *.DS_Store 32 | *.DS_Store? 33 | *._* 34 | *.Spotlight-V100 35 | *.Trashes 36 | *ehthumbs.db 37 | *Thumbs.db 38 | 39 | # Data files # 40 | ############## 41 | *.csv 42 | *.jld 43 | *.mat 44 | *.p 45 | 46 | # Images # 47 | ########## 48 | *.jpg 49 | *.jpeg 50 | *.bitmap 51 | 52 | # Documents # 53 | ############# 54 | *.eps 55 | *.pdf 56 | 57 | # Misc # 58 | ######## 59 | *.swp 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | Chimp is a general purpose framework for deep reinforcement learning developed at the [Stanford Intelligent Systems Laboratory](http://sisl.stanford.edu/). 6 | Chimp is based on a simple four-part architecture to allow plug-and-play like capabilities for deep reinforcement 7 | learning experiments. 8 | This package was inspired by the Google DeepMind [paper](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html) (V. Mnih, et al). 9 | Many of the architectural ideas were taken from DeepMind's 10 | [GORILA](http://arxiv.org/abs/1507.04296) framework and from the 11 | [paper](http://arxiv.org/abs/1508.04186) on distributed Deep Q-Learning by Ong, et al. 12 | 13 | # Installation 14 | 15 | First clone Chimp: 16 | ``` 17 | git clone https://github.com/sisl/Chimp 18 | ``` 19 | Then add the source directory to your `PYTHONPATH`. 20 | 21 | ``` 22 | cd Chimp 23 | export PYTHONPATH=$(pwd):$PYTHONPATH 24 | ``` 25 | 26 | You will also need numpy and scipy installed, as well as a deep learning backend. Currently only [Chainer](https://github.com/pfnet/chainer) is supported (TensorFlow coming soon). 27 | 28 | Once you have the dependencies installed you should be able to run the framework using a CPU. To use the GPU, you will need CUDA and a supported graphcis card. 29 | 30 | # Getting Started 31 | 32 | If you are interested in using it for your own reinforcement learning problems check out the [mountain car tutorial](https://github.com/sisl/Chimp/blob/master/examples/mountain_car.ipynb) to get an idea of how to write your own simulator class. If you would like to use Chimp with the Atari Learning Environemnt check out the [Atari tutorial](https://github.com/sisl/Chimp/blob/master/examples/atari_tutorial.ipynb) to get started. 33 | 34 | # Architecture 35 | 36 | Chimp consists of four main modules: Agent, Learner, Simulator, and Memory. Such decomposition leads to a very powerful and flexible framework for reinforcement learning experiments, where one can quickly switch between simulators, replay memory implementations, and various deep learning backends. 37 | 38 | Chimp is also powerful in its flexible handling of inputs to the deep neural network. 39 | The user can specify the history lengths for observations, actions, and even rewards that they want to use as inputs to the model and Chimp will handle the rest. 40 | 41 | The specification of the input size is in the form of a tuple ```(s_size, a_size, r_size)```. For the DeepMind Atari experiments, this setting would look like (4,0,0): they use four image frames per input and no action or reward history. 42 | 43 | # Components 44 | 45 | * Memory (implements experience replay) 46 | * Currently, we support in-memory numpy arrays and HDF5 allocated storage 47 | 48 | * Learner ("brain" of the algorithm that does forward and backward passes in a neural net) 49 | * We support DQN with arbitrary observation/action history lengths as input 50 | * Planning to add LSTM + actor-critic framework 51 | 52 | * Simulator (environment for the agent to interact with) 53 | * Single-player Arcade Learning Environment 54 | * MDPs 55 | 56 | * Agent (general framework that handles all interactions between a learner, a memory, and a simulator) 57 | 58 | # Dependencies 59 | 60 | Chimp relies on existing deep learning back-ends. Currently only [Chainer](http://chainer.org/) is supported. 61 | 62 | Required Python packages: 63 | * [Chainer](https://github.com/pfnet/chainer) 64 | * NumPy 65 | * SciPy 66 | 67 | Recommended libraries (some functionality will be absent without them): 68 | * Pygame 69 | * CUDA 70 | * Arcade Learning Environment 71 | 72 | # Authors 73 | 74 | The original authors of this software are: Yegor Tkachenko, Max Egorov, Hao Yi Ong. 75 | 76 | # License 77 | 78 | The software is distributed under the Apache License 2.0 79 | -------------------------------------------------------------------------------- /chimp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/chimp/__init__.py -------------------------------------------------------------------------------- /chimp/agents/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Implements Agent ''' 2 | 3 | from dqn_agent import DQNAgent -------------------------------------------------------------------------------- /chimp/agents/agent_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a place holder for real unit testing. 3 | Right now we just overfit a simple control problem: 4 | - the agent tries to get to the top right corner (1,1) of a 2D map 5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1) 6 | - action 1 is optimal for all states 7 | """ 8 | 9 | from chimp.learners.chainer_learner import ChainerLearner 10 | from chimp.learners.dqn_learner import DQNLearner 11 | from chimp.learners.dqn_learner import DQNPolicy 12 | 13 | from chimp.agents.dqn_agent import DQNAgent 14 | 15 | from chimp.simulators.mdp.mountain_car import MountainCar 16 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator 17 | 18 | from chimp.memories.replay_memory import ReplayMemoryHDF5 19 | 20 | from chimp.utils.policies import * 21 | 22 | import numpy as np 23 | 24 | import chainer 25 | import chainer.functions as F 26 | import chainer.links as L 27 | from chainer import Chain 28 | 29 | settings = { 30 | 31 | # agent settings 32 | 'batch_size' : 32, 33 | 'print_every' : 1000, 34 | 'save_dir' : 'results', 35 | 'iterations' : 3000, 36 | 'eval_iterations' : 200, 37 | 'eval_every' : 1000, 38 | 'save_every' : 1000, 39 | 'initial_exploration' : 10000, 40 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step 41 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 42 | 'epsilon' : 1.0, # Initial exploratoin rate 43 | 'learn_freq' : 1, 44 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 45 | 'model_dims' : (1,2), 46 | 47 | # simulator settings 48 | 'viz' : False, 49 | 50 | # replay memory settings 51 | 'memory_size' : 20000, # size of replay memory 52 | 'n_frames' : 1, # number of frames 53 | 54 | # learner settings 55 | 'learning_rate' : 0.0001, 56 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used 57 | 'discount' : 0.95, # discount rate for RL 58 | 'clip_err' : False, # value to clip loss gradients to 59 | 'clip_reward' : False, # value to clip reward values to 60 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations 61 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 62 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 63 | 'gpu' : False, 64 | 'reward_rescale': False, 65 | 66 | # general 67 | 'seed_general' : 1723, 68 | 'seed_simulator' : 5632, 69 | 'seed_agent' : 9826, 70 | 'seed_memory' : 7563 71 | 72 | } 73 | 74 | 75 | mdp = MountainCar() 76 | simulator = MDPSimulator(mdp) 77 | 78 | 79 | class TestNet(Chain): 80 | 81 | def __init__(self): 82 | super(TestNet, self).__init__( 83 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0), 84 | l2=F.Linear(20, 10, bias=0.0), 85 | bn1=L.BatchNormalization(10), 86 | l3=F.Linear(10, 10), 87 | l4=F.Linear(10, 10), 88 | bn2=L.BatchNormalization(10), 89 | lout=F.Linear(10, simulator.n_actions) 90 | ) 91 | self.train = True 92 | # initialize avg_var to prevent divide by zero 93 | self.bn1.avg_var.fill(0.1), 94 | self.bn2.avg_var.fill(0.1), 95 | 96 | def __call__(self, ohist, ahist): 97 | h = F.relu(self.l1(ohist)) 98 | h = F.relu(self.l2(h)) 99 | h = self.bn1(h, test=not self.train) 100 | h = F.relu(self.l3(h)) 101 | h = F.relu(self.l4(h)) 102 | h = self.bn2(h, test=not self.train) 103 | output = self.lout(h) 104 | return output 105 | 106 | 107 | 108 | net = TestNet() 109 | custom_learner = ChainerLearner(settings) 110 | custom_learner.set_net(net) 111 | learner = DQNLearner(settings, custom_learner) 112 | 113 | memory = ReplayMemoryHDF5(settings) 114 | 115 | agent = DQNAgent(learner, memory, simulator, settings) 116 | 117 | agent.train(verbose=True) 118 | -------------------------------------------------------------------------------- /chimp/agents/dqn_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from copy import deepcopy 4 | import pickle 5 | from timeit import default_timer as timer 6 | 7 | from chimp.utils.policies import RandomPolicy 8 | from chimp.utils.policies import DQNPolicy 9 | 10 | class DQNAgent(object): 11 | 12 | def __init__(self, learner, memory, simulator, settings, dqn_policy=None, rollout_policy=None): 13 | 14 | """ 15 | The learning agent is responsible for communicating and moving 16 | data between the three modules: Learner, Simulator, Memory 17 | Inputs: 18 | - learner: containes the neural network and the optimizer to train it 19 | - memory: expereince replay memory that can be minibatch sampled 20 | - simulator: simulates the environemnt 21 | - settings: hyper parameters for training 22 | - rollout_policy: rollout policy, random by default 23 | """ 24 | 25 | self.learner = learner 26 | self.memory = memory 27 | self.simulator = simulator # for populating the experience replay 28 | self.evaluator = deepcopy(simulator) # for evaluation 29 | 30 | self.dqn_policy = dqn_policy 31 | if dqn_policy is None: 32 | self.dqn_policy = DQNPolicy(learner) 33 | 34 | self.rollout_policy = rollout_policy 35 | if rollout_policy is None: 36 | self.rollout_policy = RandomPolicy(simulator.n_actions) 37 | 38 | self.set_params(settings) 39 | 40 | self.n_epochs = self.iterations / float(memory.memory_size) 41 | self.iteration = [] 42 | self.loss = [] 43 | self.q_ave = [] 44 | self.eval_iteration = [] 45 | self.r_eval = [] 46 | self.r_per_episode_eval = [] 47 | 48 | def policy(self, obs, epsilon): 49 | """ 50 | e-greedy policy with customazible rollout 51 | """ 52 | if self.random_state.rand() < epsilon: 53 | return self.rollout_policy.action(obs) 54 | else: 55 | return self.dqn_policy.action(obs) 56 | 57 | 58 | def save(self,obj,name): 59 | ''' function to save a file as pickle ''' 60 | # TODO: don't you need to close the I/O stream? 61 | pickle.dump(obj, open(name, "wb")) 62 | 63 | def load(self,name): 64 | ''' function to load a pickle file ''' 65 | return pickle.load(open(name, "rb")) 66 | 67 | 68 | def train(self, verbose=True): 69 | """ 70 | Trains the network 71 | """ 72 | learner = self.learner 73 | memory = self.memory 74 | simulator = self.simulator 75 | 76 | if self.viz: 77 | simulator.init_viz_display() 78 | 79 | # run initial exploration and populate the experience replay 80 | self.populate_memory(self.initial_exploration) 81 | 82 | # add initial observation to observatin history 83 | iobs = simulator.get_screenshot().copy() 84 | self.initial_obs(iobs) 85 | 86 | iteration = 0 # keeps track of all training iterations, ignores evaluation 87 | run_time = 0.0 88 | start_time = timer() # mark the global beginning of training 89 | last_print = timer() 90 | 91 | while iteration < self.iterations: # for the set number of iterations 92 | 93 | # perform a single simulator step 94 | self.step() 95 | # minibatch update for DQN 96 | if iteration % self.learn_freq == 0: 97 | loss, qvals = self.batch_update() 98 | self.iteration.append(iteration) 99 | self.loss.append(loss) 100 | self.q_ave.append(np.mean(qvals)) 101 | 102 | if iteration % self.print_every == 0 and verbose: 103 | print "Iteration: %d, Loss: %.3f, Average Q-Values: %.2f, Time since print: %.2f, Total runtime: %.2f, epsilon: %.2f" % (iteration, loss, np.mean(qvals), timer() - last_print, timer() - start_time, self.epsilon) 104 | last_print = timer() 105 | 106 | if iteration % self.save_every == 0: 107 | # saving the net, the training history, and the learner itself 108 | learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration))) 109 | np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T) 110 | 111 | if iteration % self.eval_every == 0: # evaluation 112 | sim_r, sim_r_per_episode, sim_time = self.simulate(self.eval_iterations, self.eval_epsilon) 113 | self.eval_iteration.append(iteration) 114 | self.r_eval.append(sim_r) 115 | self.r_per_episode_eval.append(sim_r_per_episode) 116 | 117 | if verbose: 118 | print "Evaluation, total reward: %.2f, Reward per episode: %.2f" % (sim_r, sim_r_per_episode) 119 | 120 | np.savetxt('%s/evaluation_history.csv' % self.save_dir, np.asarray([self.eval_iteration, self.r_eval, self.r_per_episode_eval]).T) 121 | 122 | if iteration % self.target_net_update == 0: 123 | learner.copy_net_to_target_net() 124 | 125 | self.epsilon -= self.epsilon_decay 126 | self.epsilon = 0.1 if self.epsilon < 0.1 else self.epsilon 127 | 128 | iteration += 1 129 | 130 | memory.close() 131 | 132 | learner.save_net('%s/net_%d.p' % (self.save_dir,int(iteration))) 133 | np.savetxt('%s/training_history.csv' % self.save_dir, np.asarray([self.iteration, self.loss, self.q_ave]).T) 134 | 135 | run_time = timer() - start_time 136 | print('Overall training + evaluation time: '+ str(run_time)) 137 | 138 | 139 | 140 | def step(self): 141 | """ 142 | Performs a single step with the DQN and updates the replay memory 143 | """ 144 | loss = 0.0 145 | 146 | simulator = self.simulator 147 | 148 | obs = simulator.get_screenshot().copy() 149 | a = self.policy((self.ohist, self.ahist), self.epsilon) 150 | simulator.act(a) 151 | r = simulator.reward() 152 | 153 | term = False 154 | obsp = None 155 | if simulator.episode_over(): 156 | term = True 157 | obsp = obs.copy() 158 | simulator.reset_episode() 159 | iobs = simulator.get_screenshot().copy() 160 | self.empty_history() 161 | self.initial_obs(iobs) 162 | else: 163 | obsp = simulator.get_screenshot().copy() 164 | self.update_history(obsp, a) 165 | 166 | if self.viz: # move the image to the screen / shut down the game if display is closed 167 | simulator.refresh_viz_display() 168 | 169 | self.memory.store_tuple(obs, a, r, obsp, term) 170 | 171 | 172 | def batch_update(self): 173 | """ 174 | Performs a mini-batch update on the DQN 175 | """ 176 | ohist, ahist, rhist, ophist, term = self.memory.minibatch() 177 | # take the last as our action and reward 178 | a = ahist[:,-1] 179 | r = rhist[:,-1] 180 | t = term[:,-1] 181 | oahist = None 182 | # TODO: this indexing is a hack to deal with single sample history 183 | # Using the first history entry of the minibatch (there is only one) - could do this with reshape as well 184 | if self.ahist_size == 0 or self.ohist_size == 1: 185 | oahist = (ohist[:,0], None) 186 | oaphist = (ophist[:,0], None) 187 | else: 188 | oahist = (ohist, ahist[:self.ahist_size]) 189 | oaphist = (ophist, ahist[1:self.ahist_size]) 190 | loss, qvals = self.learner.update(oahist, a, r, oaphist, t) 191 | return loss, qvals 192 | 193 | 194 | ################################################################# 195 | ################### Some Utility Functions ###################### 196 | ################################################################# 197 | 198 | def simulate(self, nsteps, epsilon, viz=False): 199 | """ 200 | Simulates the DQN policy 201 | """ 202 | simulator = self.evaluator # use a different simulator to prevent breaks 203 | simulator.reset_episode() 204 | # add initial observation to observation history 205 | iobs = simulator.get_screenshot().copy() 206 | self.initial_eval_obs(iobs) 207 | 208 | if self.viz: 209 | simulator.init_viz_display() 210 | 211 | rtot = 0.0 212 | r_per_episode = 0.0 213 | episode_count = 0 214 | start_sim = timer() 215 | for i in xrange(nsteps): 216 | # generate reward and step the simulator 217 | ohist, ahist = self.eval_ohist, self.eval_ahist 218 | a = self.policy((ohist, ahist), epsilon) 219 | 220 | simulator.act(a) 221 | r = simulator.reward() 222 | rtot += r 223 | if simulator.episode_over(): 224 | simulator.reset_episode() 225 | iobs = simulator.get_screenshot().copy() 226 | self.empty_eval_history() 227 | self.initial_eval_obs(iobs) 228 | episode_count += 1 229 | r_per_episode = rtot 230 | else: 231 | obsp = simulator.get_screenshot().copy() 232 | self.update_eval_history(obsp, a) 233 | 234 | if self.viz: # move the image to the screen / shut down the game if display is closed 235 | simulator.refresh_viz_display() 236 | 237 | if episode_count > 0: 238 | r_per_episode /= episode_count 239 | else: 240 | r_per_episode = rtot 241 | runtime = timer() - start_sim 242 | return rtot, r_per_episode, runtime 243 | 244 | 245 | def populate_memory(self, nsamples): 246 | # TODO: do we need to copy obs and obsp? 247 | memory = self.memory 248 | simulator = self.simulator 249 | 250 | simulator.reset_episode() 251 | for i in xrange(nsamples): 252 | # generate o, a, r, o' tuples 253 | obs = simulator.get_screenshot().copy() 254 | a = self.rollout_policy.action(obs) 255 | simulator.act(a) 256 | r = simulator.reward() 257 | obsp = simulator.get_screenshot().copy() 258 | term = False 259 | if simulator.episode_over(): 260 | term = True 261 | simulator.reset_episode() # reset 262 | # store the tuples 263 | memory.store_tuple(obs, a, r, obsp, term) 264 | simulator.reset_episode() 265 | 266 | 267 | def plot_loss(self): 268 | try: 269 | from matplotlib import pyplot 270 | except ImportError: 271 | "Can not plot loss, matplotlib required" 272 | pyplot.plot(self.loss[1:]) 273 | pyplot.xlabel("Iteration") 274 | pyplot.ylabel("Loss") 275 | pyplot.show() 276 | 277 | def plot_per_sim_reward(self): 278 | try: 279 | from matplotlib import pyplot 280 | except ImportError: 281 | "Can not plot reward, matplotlib required" 282 | pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_eval) 283 | pyplot.xlabel("Iteration") 284 | pyplot.ylabel("Reward") 285 | pyplot.title("Total Reward Per Evaluation") 286 | pyplot.show() 287 | 288 | def plot_per_episode_reward(self): 289 | try: 290 | from matplotlib import pyplot 291 | except ImportError: 292 | "Can not plot loss, matplotlib required" 293 | pyplot.plot(self.eval_every * np.arange(len(self.r_eval)), self.r_per_episode_eval) 294 | pyplot.xlabel("Reward") 295 | pyplot.ylabel("Loss") 296 | pyplot.title("Average Reward Per Episode") 297 | pyplot.show() 298 | 299 | 300 | 301 | def set_params(self, settings): 302 | # set up the setting parameters 303 | self.random_state = np.random.RandomState(settings.get('seed_agent', None)) # change to a new random seed 304 | 305 | self.batch_size = settings.get('batch_size', 32) 306 | self.n_frames = settings.get('n_frames', 1) 307 | self.iterations = settings.get('iterations', 1000000) 308 | 309 | self.epsilon = settings.get('epsilon', 1.0) # exploration 310 | self.epsilon_decay = settings.get('epsilon_decay', 0.00001) # decay in 311 | self.eval_epsilon = settings.get('eval_epsilon', 0.0) # exploration during evaluation 312 | self.initial_exploration = settings.get('initial_exploration', 10000) # of iterations during initial exploration 313 | 314 | self.viz = settings.get('viz', False) # whether to visualize the state/observation, False when not supported by simulator 315 | 316 | self.eval_iterations = settings.get('eval_iterations', 500) 317 | self.eval_every = settings.get('eval_every', 5000) 318 | self.print_every = settings.get('print_every', 5000) 319 | self.save_every = settings.get('save_every', 5000) 320 | self.save_dir = settings.get('save_dir', '.') 321 | # create the directory if it doesnt exist 322 | if not os.path.isdir(self.save_dir): 323 | os.makedirs(self.save_dir) 324 | 325 | self.learn_freq = settings.get('learn_freq', 1) # how frequently to do back prop on a minibatch 326 | self.target_net_update = settings.get('target_net_update', 5000) 327 | 328 | self.ohist_size, self.ahist_size, self.rhist_size = settings.get('history_sizes', (1,0,0)) 329 | self.ahist_size = 1 if self.ahist_size == 0 else self.ahist_size 330 | self.ohist_size = 1 if self.ohist_size == 0 else self.ohist_size 331 | 332 | self.ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32) 333 | self.ahist = np.zeros(self.ahist_size, dtype=np.int32) 334 | self.rev_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32) 335 | self.rev_ahist = np.zeros(self.ahist_size, dtype=np.int32) 336 | 337 | self.eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32) 338 | self.eval_ahist = np.zeros(self.ahist_size, dtype=np.int32) 339 | self.rev_eval_ohist = np.zeros((self.ohist_size,) + self.simulator.model_dims, dtype=np.float32) 340 | self.rev_eval_ahist = np.zeros(self.ahist_size, dtype=np.int32) 341 | 342 | ################################################################# 343 | ################# History utility functions ##################### 344 | ################################################################# 345 | """ 346 | These are messy, and could be optimized 347 | """ 348 | 349 | def update_history(self, obs, a): 350 | # roll the histories forward and replace the first entry 351 | # keep a reversed history so we can easily roll though it 352 | self.rev_ohist = np.roll(self.rev_ohist, 1, axis=0) 353 | self.rev_ahist = np.roll(self.rev_ahist, 1, axis=0) 354 | self.rev_ahist[0] = a 355 | self.rev_ohist[0] = obs 356 | 357 | # reverse to get history in [s0, s1, s2,...,sn] format 358 | self.ohist = np.flipud(self.rev_ohist) 359 | self.ahist = np.flipud(self.rev_ahist) 360 | 361 | 362 | def update_eval_history(self, obs, a): 363 | # roll the histories forward and replace the first entry 364 | self.rev_eval_ohist = np.roll(self.rev_eval_ohist, 1, axis=0) 365 | self.rev_eval_ahist = np.roll(self.rev_eval_ahist, 1, axis=0) 366 | self.rev_eval_ahist[0] = a 367 | self.rev_eval_ohist[0] = obs 368 | 369 | self.eval_ohist = np.flipud(self.rev_eval_ohist) 370 | self.eval_ahist = np.flipud(self.rev_eval_ahist) 371 | 372 | def initial_obs(self, obs): 373 | self.rev_ohist[0] = obs 374 | self.ohist[-1] = obs 375 | 376 | def initial_eval_obs(self, obs): 377 | self.rev_eval_ohist[0] = obs 378 | self.eval_ohist[-1] = obs 379 | 380 | 381 | def empty_history(self): 382 | self.ohist.fill(self.memory._emptyfloat) 383 | self.ahist.fill(self.memory._emptyint) 384 | self.rev_ohist.fill(self.memory._emptyfloat) 385 | self.rev_ahist.fill(self.memory._emptyint) 386 | 387 | def empty_eval_history(self): 388 | self.eval_ohist.fill(self.memory._emptyfloat) 389 | self.eval_ahist.fill(self.memory._emptyint) 390 | self.rev_eval_ohist.fill(self.memory._emptyfloat) 391 | self.rev_eval_ahist.fill(self.memory._emptyint) 392 | 393 | -------------------------------------------------------------------------------- /chimp/learners/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Implements Learner ''' 2 | -------------------------------------------------------------------------------- /chimp/learners/chainer_backend.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (Double) Deep Q-Learning Algorithm Implementation 3 | Supports double deep Q-learning with on either GPU and CPU 4 | 5 | ''' 6 | 7 | import numpy as np 8 | import chainer 9 | import chainer.functions as F 10 | from chainer import optimizers 11 | from chainer import cuda 12 | from copy import deepcopy 13 | 14 | import pickle # used to save the nets 15 | 16 | class ChainerBackend(object): 17 | 18 | def __init__(self, settings, net = None): 19 | 20 | self.set_params(settings) 21 | 22 | self.source_net = None 23 | self.target_net = None 24 | if net is not None: 25 | self.set_net(net) 26 | 27 | 28 | def update(self, obs, a, r, obsp, term): 29 | """ 30 | Performs a single mini-batch update 31 | """ 32 | 33 | self.source_net.zerograds() # reset gradient storage to zero 34 | 35 | # compute loss and qval output layer 36 | loss, qvals = self.forward_loss(obs, a, r, obsp, term) 37 | 38 | qvals.backward() # propagate the loss gradient through the net 39 | self.optimizer.update() # carry out parameter updates based on the distributed gradients 40 | if self.gpu: 41 | return loss, qvals.data.get() 42 | else: 43 | return loss, qvals.data 44 | 45 | 46 | def forward_loss(self, obs, a, r, obsp, term): 47 | """ 48 | Computes the loss and gradients 49 | """ 50 | if self.gpu: 51 | return self.forward_loss_gpu(obs, a, r, obsp, term) 52 | else: 53 | return self.forward_loss_cpu(obs, a, r, obsp, term) 54 | 55 | 56 | def forward_loss_gpu(self, obs, a, r, obsp, term): 57 | # unpack 58 | ohist, ahist = obs 59 | ophist, aphist = obsp 60 | 61 | # move to GPU 62 | ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist) 63 | ophist, aphist = self.to_gpu(ophist), self.to_gpu(aphist) 64 | 65 | # transfer inputs into Chainer format 66 | ohist, ophist = chainer.Variable(ohist), chainer.Variable(ophist, volatile = True) 67 | ahist, aphist = chainer.Variable(ahist), chainer.Variable(aphist, volatile = True) 68 | 69 | # get target Q 70 | target_q_all = self.target_net(ophist, aphist) # forward prop 71 | target_q_max = np.max(target_q_all.data.get(), axis=1) # max Q for each entry in mini-batch 72 | 73 | # compute the target values for each entry in mini-batch 74 | target_q_vals = r + self.discount * target_q_max * np.invert(term) 75 | 76 | # compute the source q-vals 77 | source_q_all = self.source_net(ohist, ahist) # forward prop 78 | source_q_vals = source_q_all.data.get()[np.arange(source_q_all.data.shape[0]), a] 79 | 80 | # compute the loss grads 81 | qdiff = source_q_vals - target_q_vals 82 | 83 | # distribute the loss gradient into the shape of the net's output 84 | dQ = np.zeros(source_q_all.data.shape, dtype=np.float32) 85 | dQ[np.arange(dQ.shape[0]), a] = qdiff 86 | 87 | # set as the output grad layer 88 | source_q_all.grad = self.to_gpu(dQ) 89 | 90 | # compute loss 91 | loss = np.mean(dQ**2) 92 | 93 | return loss, source_q_all 94 | 95 | 96 | def forward_loss_cpu(self, obs, a, r, obsp, term): 97 | # unpack 98 | ohist, ahist = obs 99 | ophist, aphist = obsp 100 | 101 | # transfer inputs into Chainer format 102 | ohist, ophist = self.chainer_var(ohist), self.chainer_var(ophist, volatile = True) 103 | ahist, aphist = self.chainer_var(ahist), self.chainer_var(aphist, volatile = True) 104 | 105 | # get target Q 106 | target_q_all = self.target_net(ophist, aphist) 107 | target_q_max = np.max(target_q_all.data, axis=1) 108 | 109 | # compute the target values for each entry in mini-batch 110 | target_q_vals = r + self.discount * target_q_max * np.invert(term) 111 | 112 | # compute the source q-vals 113 | source_q_all = self.source_net(ohist, ahist) # forward prop 114 | source_q_vals = source_q_all.data[np.arange(source_q_all.data.shape[0]),a] 115 | 116 | # compute the loss 117 | qdiff = source_q_vals - target_q_vals 118 | 119 | # distribute the loss gradient into the shape of the net's output 120 | dQ = np.zeros(source_q_all.data.shape, dtype=np.float32) 121 | dQ[np.arange(dQ.shape[0]), a] = qdiff 122 | 123 | # set as the output grad layer 124 | source_q_all.grad = dQ 125 | 126 | # compute loss 127 | loss = np.mean(dQ**2) 128 | 129 | return loss, source_q_all 130 | 131 | 132 | def forward(self, obs): 133 | """ 134 | Returns the Q-values for the network input obs 135 | """ 136 | # turn train off for bn, dropout, etc 137 | self.source_net.train = False 138 | if self.gpu: 139 | return self.forward_gpu(obs) 140 | else: 141 | return self.forward_cpu(obs) 142 | 143 | 144 | def forward_cpu(self, obs): 145 | """ 146 | Performs forward pass on CPU, returns Q values 147 | """ 148 | # unpack 149 | ohist, ahist = obs 150 | # transfer inputs into Chainer format 151 | ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True) 152 | # evaluate 153 | qvals = self.source_net(ohist, ahist) 154 | return qvals.data 155 | 156 | def forward_gpu(self, obs): 157 | """ 158 | Performs forward pass on CPU, returns Q values 159 | """ 160 | # unpack 161 | ohist, ahist = obs 162 | # move to gpu 163 | ohist, ahist = self.to_gpu(ohist), self.to_gpu(ahist) 164 | # transfer inputs into Chainer format 165 | ohist, ahist = self.chainer_var(ohist, volatile=True), self.chainer_var(ahist, volatile=True) 166 | # evaluate 167 | qvals = self.source_net(ohist, ahist) 168 | return qvals.data.get() 169 | 170 | ################################################################# 171 | #################### Utility Functions ########################## 172 | ################################################################# 173 | 174 | def to_gpu(self, var): 175 | if var is None: 176 | return None 177 | return cuda.to_gpu(var) 178 | 179 | def chainer_var(self, var, volatile=False): 180 | if var is None: 181 | return None 182 | return chainer.Variable(var, volatile=volatile) 183 | 184 | def set_net(self, net): 185 | self.source_net = deepcopy(net) 186 | self.target_net = deepcopy(net) 187 | if self.gpu: 188 | cuda.get_device(0).use() 189 | self.source_net.to_gpu() 190 | self.target_net.to_gpu() 191 | self.optimizer.setup(self.source_net) 192 | self.target_net.train = False 193 | 194 | 195 | def params(self): 196 | ''' collect net parameters (coefs and grads) ''' 197 | self.source_net.params() 198 | 199 | 200 | def set_params(self, params): 201 | 202 | self.gpu = params.get('gpu',False) 203 | self.learning_rate = params.get('learning_rate',0.00025) 204 | self.decay_rate = params.get('decay_rate',0.95) 205 | self.discount = params.get('discount',0.95) 206 | self.clip_err = params.get('clip_err',False) 207 | self.target_net_update = params.get('target_net_update',10000) 208 | self.double_DQN = params.get('double_DQN',False) 209 | 210 | # setting up various possible gradient update algorithms 211 | opt = params.get('optim_name', 'ADAM') 212 | if opt == 'RMSprop': 213 | self.optimizer = optimizers.RMSprop(lr=self.learning_rate, alpha=self.decay_rate) 214 | 215 | elif opt == 'ADADELTA': 216 | print("Supplied learning rate not used with ADADELTA gradient update method") 217 | self.optimizer = optimizers.AdaDelta() 218 | 219 | elif opt == 'ADAM': 220 | self.optimizer = optimizers.Adam(alpha=self.learning_rate) 221 | 222 | elif opt == 'SGD': 223 | self.optimizer = optimizers.SGD(lr=self.learning_rate) 224 | 225 | else: 226 | print('The requested optimizer is not supported!!!') 227 | exit() 228 | 229 | if self.clip_err is not False: 230 | self.optimizer.add_hook(chainer.optimizer.GradientClipping(self.clip_err)) 231 | 232 | self.optim_name = params['optim_name'] 233 | -------------------------------------------------------------------------------- /chimp/learners/chainer_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a place holder for real unit testing. 3 | Right now we just overfit a simple control problem: 4 | - the agent tries to get to the top right corner (1,1) of a 2D map 5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1) 6 | - action 1 is optimal for all states 7 | """ 8 | 9 | from chimp.learners.chainer_backend import ChainerBackend 10 | from chimp.learners.dqn_learner import DQNLearner 11 | from chimp.learners.dqn_learner import DQNPolicy 12 | 13 | import numpy as np 14 | 15 | import chainer 16 | import chainer.functions as F 17 | import chainer.links as L 18 | from chainer import Chain 19 | 20 | settings = { 21 | 22 | # agent settings 23 | 'batch_size' : 32, 24 | 'print_every' : 500, 25 | 'save_dir' : 'results/nets_rocksample_belief_rmsprop', 26 | 'iterations' : 100000, 27 | 'eval_iterations' : 100, 28 | 'eval_every' : 1000, 29 | 'save_every' : 500, 30 | 'initial_exploration' : 500, 31 | 'epsilon_decay' : 0.00001, # subtract from epsilon every step 32 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 33 | 'epsilon' : 1.0, # Initial exploratoin rate 34 | 'learn_freq' : 1, 35 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 36 | 'model_dims' : (1,2), 37 | 38 | # simulator settings 39 | 'viz' : False, 40 | 41 | # replay memory settings 42 | 'memory_size' : 1000, # size of replay memory 43 | 'n_frames' : 1, # number of frames 44 | 45 | # learner settings 46 | 'learning_rate' : 0.00025, 47 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used 48 | 'discount' : 0.95, # discount rate for RL 49 | 'clip_err' : False, # value to clip loss gradients to 50 | 'clip_reward' : 1, # value to clip reward values to 51 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations 52 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 53 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 54 | 'gpu' : False, 55 | 'reward_rescale': False, 56 | 57 | # general 58 | 'seed_general' : 1723, 59 | 'seed_simulator' : 5632, 60 | 'seed_agent' : 9826, 61 | 'seed_memory' : 7563 62 | 63 | } 64 | 65 | n_actions = 2 66 | o_dims = settings['model_dims'] 67 | n_samples = settings['batch_size'] 68 | 69 | class TestNet(Chain): 70 | 71 | def __init__(self): 72 | super(TestNet, self).__init__( 73 | #l1=F.Bilinear(settings["history_sizes"][0], settings["history_sizes"][1], 20), 74 | l1=F.Linear(o_dims[1], 20, bias=0.0), 75 | l2=F.Linear(20, 10, bias=0.0), 76 | bn1=L.BatchNormalization(10), 77 | lout=F.Linear(10, n_actions) 78 | ) 79 | self.train = True 80 | # initialize avg_var to prevent divide by zero 81 | self.bn1.avg_var.fill(0.1), 82 | 83 | def __call__(self, ohist, ahist): 84 | h = F.relu(self.l1(ohist)) 85 | h = F.relu(self.l2(h)) 86 | h = self.bn1(h, test=not self.train) 87 | output = self.lout(h) 88 | return output 89 | 90 | def make_batch(n_samples, o_dims, n_actions): 91 | obs = np.zeros((n_samples,)+o_dims, dtype=np.float32) 92 | obsp = np.zeros((n_samples,)+o_dims, dtype=np.float32) 93 | a = np.zeros(n_samples, dtype=np.int32) 94 | r = np.zeros(n_samples, dtype=np.float32) 95 | term = np.zeros(n_samples, dtype=np.bool) 96 | for i in xrange(n_samples): 97 | obs[i] = np.random.uniform(0.0, 1.0, o_dims) 98 | a[i] = np.random.randint(n_actions) 99 | obsp[i] = (obs[i] + 0.25) if a[i] == 1 else (obs[i] - 0.25) 100 | obsp[i] = np.clip(obsp[i], 0.0, 1.0) 101 | r[i] = np.sum(obs[i]) 102 | return obs, a, r, obsp, term 103 | 104 | 105 | net = TestNet() 106 | custom_learner = ChainerBackend(settings) 107 | custom_learner.set_net(net) 108 | 109 | learner = DQNLearner(settings, custom_learner) 110 | 111 | policy = DQNPolicy(learner) 112 | 113 | obst, a, r, obsp, term = make_batch(10, o_dims, n_actions) 114 | 115 | for i in xrange(10): 116 | ohist = (obst[i], None) 117 | a = policy.action(ohist) 118 | print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None)) 119 | 120 | print "TRAINING" 121 | for i in xrange(3000): 122 | obs, a, r, obsp, term = make_batch(n_samples, o_dims, n_actions) 123 | ohist = (obs, None) 124 | ophist = (obsp, None) 125 | #loss, q_all = custom_learner.forward_loss(ohist, a, r, ophist, term) 126 | loss, q_all = learner.update(ohist, a, r, ophist, term) 127 | if i % 500 == 0: 128 | print loss 129 | 130 | 131 | for i in xrange(10): 132 | ohist = (obst[i], None) 133 | a = policy.action(ohist) 134 | print "Test: ", i, " ", obst[i], " ", a, " ", learner.forward((obst[i], None)) 135 | 136 | 137 | -------------------------------------------------------------------------------- /chimp/learners/dqn_learner.py: -------------------------------------------------------------------------------- 1 | ''' 2 | (Double) Deep Q-Learning Algorithm Implementation 3 | Supports double deep Q-learning with on either GPU and CPU 4 | 5 | ''' 6 | 7 | import numpy as np 8 | import pickle # used to save the nets 9 | from copy import deepcopy 10 | 11 | class DQNLearner(object): 12 | 13 | def __init__(self, settings, backend): 14 | 15 | """ 16 | Functions that must be defined by the custom learner: 17 | - forward_loss(obs, a, r, obsp, term) # computes scores and loss 18 | - forward(obs) # computes scores 19 | - update(obs, a, r, obsp) # update the params 20 | - get_net() # returns the network object 21 | - set_net(net) # sets the source and target nets and moves to gpu (if needed) 22 | Fields owned by the learner: 23 | - source_net: generates source Q-vals 24 | - target_net: generates target Q-vals 25 | """ 26 | 27 | self.backend = backend 28 | 29 | self.clip_reward = settings.get('clip_reward', False) 30 | self.reward_rescale = settings.get('reward_rescale', False) 31 | self.r_max = 1 # keep the default value at 1 32 | 33 | 34 | def update(self, obs, a, r, obsp, term): 35 | r = self.pre_process_reward(r) 36 | return self.backend.update(obs, a, r, obsp, term) 37 | 38 | def forward_loss(self, obs, a, r, obsp, term): 39 | return self.backend.forward_loss(obs, a, r, obsp, term) 40 | 41 | def forward(self, obs): 42 | return self.backend.forward(obs) 43 | 44 | def copy_net_to_target_net(self): 45 | ''' update target net with the current net ''' 46 | self.backend.target_net = deepcopy(self.backend.source_net) 47 | 48 | def save(self,obj,name): 49 | pickle.dump(obj, open(name, "wb")) 50 | 51 | def load(self,name): 52 | return pickle.load(open(name, "rb")) 53 | 54 | def save_net(self,name): 55 | ''' save a net to a path ''' 56 | self.save(self.backend.source_net,name) 57 | 58 | def load_net(self,net): 59 | ''' load in a net from path or a variable''' 60 | if isinstance(net, str): # if it is a string, load the net from the path 61 | net = self.load(net) 62 | self.backend.set_net(net) 63 | 64 | 65 | def save_training_history(self, path='.'): 66 | ''' save training history ''' 67 | train_hist = np.array([range(len(self.train_rewards)),self.train_losses,self.train_rewards, self.train_qval_avgs, self.train_episodes, self.train_times]).T 68 | eval_hist = np.array([range(len(self.val_rewards)),self.val_losses,self.val_rewards, self.val_qval_avgs, self.val_episodes, self.val_times]).T 69 | # TODO: why is this here and not in agent? 70 | np.savetxt(path + '/training_hist.csv', train_hist, delimiter=',') 71 | np.savetxt(path + '/evaluation_hist.csv', eval_hist, delimiter=',') 72 | 73 | def params(self): 74 | """ 75 | Returns an iterator over netwok parameters 76 | Note: different back-ends will return different param containers 77 | """ 78 | # TODO: return a dictionary here? 79 | self.backend.params() 80 | 81 | 82 | def pre_process_reward(self, r): 83 | """ 84 | Clips and re-scales the rewards 85 | """ 86 | if self.clip_reward: 87 | r = np.clip(r,-self.clip_reward,self.clip_reward) 88 | if self.reward_rescale: 89 | self.r_max = max(np.amax(np.absolute(r)),self.r_max) 90 | r = r / self.r_max 91 | return r 92 | 93 | -------------------------------------------------------------------------------- /chimp/memories/__init__.py: -------------------------------------------------------------------------------- 1 | ''' Implements Experience Replay Memory ''' 2 | 3 | from replay_memory import ReplayMemoryHDF5 4 | from memory import ReplayMemory -------------------------------------------------------------------------------- /chimp/memories/mem_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from replay_memory import ReplayMemoryHDF5 3 | 4 | 5 | settings = { 6 | 'save_dir' : 'results/test', 7 | 'seed_memory' : 1, 8 | 'history_sizes' : (5, 2, 0), 9 | 'memory_size' : 1000, 10 | 'model_dims' : (1,20), 11 | 'batch_size' : 32 12 | } 13 | 14 | mem = ReplayMemoryHDF5(settings) 15 | 16 | o_dims = settings['model_dims'] 17 | 18 | for i in xrange(1000): 19 | obs = np.random.random(o_dims) + i # random obs 20 | a = np.random.randint(10) + i# 10 actions 21 | r = np.random.rand() + i 22 | obsp = np.random.random(o_dims) + i 23 | term = bool(np.random.binomial(1,0.1)) # 10% chance reach terminal state 24 | mem.store_tuple(obs, a, r, obsp, term) 25 | 26 | o,a,r,op,terms=mem.minibatch() 27 | #mem.close() 28 | -------------------------------------------------------------------------------- /chimp/memories/memory.py: -------------------------------------------------------------------------------- 1 | ''' 2 | An alternative replay memory that does not utilize HDF5 - less efficient 3 | ''' 4 | 5 | import numpy as np 6 | 7 | class ReplayMemory(object): 8 | 9 | def __init__(self, settings): 10 | 11 | self.random_state = np.random.RandomState(settings['seed_memory']) 12 | self.memory_size = settings['memory_size'] 13 | self.model_dims = settings['model_dims'] 14 | self.n_frames = settings['n_frames'] 15 | self.data = [np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32), 16 | np.zeros((self.memory_size, self.n_frames), dtype=np.float32), 17 | np.zeros(self.memory_size, dtype=np.int32), 18 | np.zeros(self.memory_size, dtype=np.float32), 19 | np.zeros((self.memory_size, self.n_frames, self.model_dims[0], self.model_dims[1]), dtype=np.float32), 20 | np.zeros((self.memory_size, self.n_frames), dtype=np.float32), 21 | np.zeros(self.memory_size, dtype=np.bool)] 22 | self.counter = 0 23 | 24 | # function to sample a mini-batch 25 | def minibatch(self, batch_size): 26 | # sampling a mini-batch of the given size with replacement 27 | ind = self.random_state.randint(0,min(self.counter,self.memory_size),batch_size) 28 | return self.data[0][ind], self.data[1][ind], self.data[2][ind], self.data[3][ind], self.data[4][ind], self.data[5][ind], self.data[6][ind] 29 | 30 | # function to store the observed experience and keep the count within the replay memory 31 | def store_tuple(self, s0, ahist0, a, r, s1, ahist1, episode_end_flag = False): 32 | 33 | # keep the most recent observations within the limit of the memory 34 | ind = self.counter % self.memory_size 35 | 36 | self.data[0][ind] = s0 37 | self.data[1][ind] = ahist0 38 | self.data[2][ind] = a 39 | self.data[3][ind] = r 40 | 41 | if not episode_end_flag: 42 | self.data[4][ind] = s1 43 | self.data[5][ind] = ahist1 44 | 45 | self.data[6][ind] = episode_end_flag 46 | 47 | self.counter += 1 48 | -------------------------------------------------------------------------------- /chimp/memories/replay_memory.py: -------------------------------------------------------------------------------- 1 | ''' Implements class for reading/writing experiences to the replay dataset. 2 | 3 | We assume 4 | (1) Actions and rewards for the full history fit comfortably in memory, 5 | (2) The belief state representation for the full history does not, 6 | (3) A single sample of belief states fits comfortably in memory. 7 | 8 | For instance, if the replay dataset stores the last 1 million experiences, 9 | then the history of actions is 1 byte x 1 M = 1 MB. The same holds for the 10 | history of rewards. However, a modest belief state representation might be 11 | a dense vector with a maximum of 1,000 Float64 elements (typical state spaces 12 | are on the order of millions). In this case the full history of 1 million 13 | states would be (1,000 elem x 8 bytes x 1 M = 8 GB). 14 | 15 | N.B.! 16 | Memory is organized as (a, r, s', end_of_game_flag). We refer to s' 17 | simply as "state". To sample (s, a, r, s', end_of_game_flag) 18 | we take s' from the current location in memory, and (a, r, s', end_of_game_flag) 19 | from the location one step forward. 20 | ''' 21 | 22 | import numpy as np 23 | import h5py 24 | import os 25 | 26 | class ReplayMemoryHDF5(object): 27 | ''' Wrapper around a replay dataset residing on disk as HDF5. ''' 28 | 29 | def __init__(self, settings, filename='memory.hdf5', overwrite=True, empty=-1): 30 | 31 | if not os.path.exists(settings['save_dir']): 32 | os.makedirs(settings['save_dir']) 33 | 34 | filename = settings['save_dir'] + '/' + filename 35 | self.random_state = np.random.RandomState(settings['seed_memory']) 36 | self.ohist_size, self.ahist_size, self.rhist_size = settings['history_sizes'] 37 | 38 | self.ahist_size = 1 if self.ahist_size is 0 else self.ahist_size 39 | self.rhist_size = 1 if self.rhist_size is 0 else self.rhist_size 40 | 41 | self.max_size = max(settings['history_sizes']) 42 | self.batch_size = settings['batch_size'] 43 | 44 | if overwrite: 45 | self.fp = h5py.File(filename, 'w') 46 | else: 47 | self.fp = h5py.File(filename, 'a') 48 | 49 | if all(x in self.fp for x in ('observations', 'actions', 'rewards', 'next_observations', 'terminals')): 50 | self.observations = self.fp['observations'] 51 | self.memory_size = self.observations.shape[0] 52 | 53 | self.actions = np.empty(self.memory_size, dtype=np.int32) 54 | self.fp['actions'].read_direct(self.actions) 55 | 56 | self.rewards = np.empty(self.memory_size, dtype=np.float32) 57 | self.fp['rewards'].read_direct(self.rewards) 58 | 59 | self.next_observations = self.fp['next_observations'] 60 | 61 | self.terminals = np.empty(self.memory_size, dtype=bool) 62 | self.fp['terminals'].read_direct(self.terminals) 63 | 64 | if self.memory_size != settings['memory_size']: 65 | print("Warning: dataset loaded from %s is of size %d, " 66 | "not %d as indicated in |settings|. Using existing size." 67 | % (filename, self.memory_size, settings['memory_size'])) 68 | 69 | else: 70 | self.memory_size = settings['memory_size'] 71 | obs_shape = settings['model_dims'] 72 | 73 | self.observations = self.fp.create_dataset('observations', (self.memory_size,) + obs_shape, dtype=np.float32) 74 | self.next_observations = self.fp.create_dataset('next_observations', (self.memory_size,) + obs_shape, dtype=np.float32) 75 | 76 | self.fp.create_dataset('actions', (self.memory_size,), dtype='int32') 77 | self.fp.create_dataset('rewards', (self.memory_size,), dtype='float32') 78 | self.fp.create_dataset('terminals', (self.memory_size,), dtype=bool) 79 | 80 | self.actions = np.empty(self.memory_size, dtype=np.int32) 81 | self.rewards = np.empty(self.memory_size, dtype=np.float32) 82 | self.terminals = np.empty(self.memory_size, dtype=np.bool) 83 | 84 | self.observations.attrs['head'] = 0 85 | self.observations.attrs['valid'] = 0 86 | 87 | # index of current "write" location 88 | self.head = self.observations.attrs['head'] 89 | 90 | # greatest index of any valid experience; i.e., [0, self.valid) 91 | self.valid = self.observations.attrs['valid'] 92 | 93 | # initialize histories 94 | self.ohist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32) 95 | self.ophist = np.zeros((self.batch_size, self.ohist_size) + obs_shape, dtype=np.float32) 96 | self.ahist = np.zeros((self.batch_size, self.ahist_size), dtype=np.int32) 97 | self.rhist = np.zeros((self.batch_size, self.rhist_size), dtype=np.float32) 98 | self.thist = np.zeros((self.batch_size, self.ohist_size), dtype=np.bool) 99 | 100 | self._emptyint = np.int32(empty) 101 | self._emptyfloat = np.float32(empty) 102 | 103 | def minibatch(self): 104 | ''' Uniformly sample (o,a,r,o') experiences from the replay dataset. 105 | 106 | Args: 107 | batch_size: size of mini-batch 108 | 109 | Returns: 110 | Five numpy arrays that corresponds to o, a, r, o', and the terminal 111 | state indicator. 112 | ''' 113 | batch_size = self.batch_size 114 | if batch_size >= self.valid: 115 | raise ValueError("Can't draw sample of size %d from replay dataset of size %d" 116 | % (batch_size, self.valid)) 117 | 118 | ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size 119 | max_hist = self.max_size 120 | 121 | indices = self.get_indices(batch_size) 122 | 123 | self.clear_history() 124 | 125 | # TODO: can we get rid of this loop by sorting inidces and then reshaping? 126 | for i in xrange(batch_size): 127 | # all end on the same index 128 | endi = indices[i] 129 | starti = endi - max_hist 130 | # starting indecies if no terminal states 131 | starto, starta, startr = endi-ohist_size, endi-ahist_size, endi-rhist_size 132 | 133 | # look backwards and find first terminal state 134 | termarr = np.where(self.terminals[starti:endi-1]==True)[0] 135 | termidx = starti 136 | if termarr.size is not 0: 137 | termidx = endi - (endi-starti - termarr[-1]) + 1 138 | 139 | # if starts before terminal, change start index 140 | starto = termidx if starto < termidx else starto 141 | starta = termidx if starta < termidx else starta 142 | startr = termidx if startr < termidx else startr 143 | 144 | ohl, ahl, rhl = (endi - starto), (endi - starta), (endi - startr) 145 | 146 | # load from memory 147 | self.ohist[i, ohist_size-ohl:] = self.observations[xrange(starto, endi)] 148 | self.ophist[i, ohist_size-ohl:] = self.next_observations[xrange(starto, endi)] 149 | self.ahist[i, ahist_size-ahl:] = self.actions[xrange(starta, endi)] 150 | self.rhist[i, rhist_size-rhl:] = self.rewards[xrange(startr, endi)] 151 | self.thist[i, ohist_size-ohl:] = self.terminals[xrange(starto, endi)] 152 | 153 | return self.ohist, self.ahist, self.rhist, self.ophist, self.thist 154 | 155 | 156 | def get_indices(self, batch_size): 157 | ohist_size, ahist_size, rhist_size = self.ohist_size, self.ahist_size, self.rhist_size 158 | max_hist = self.max_size 159 | 160 | # want to sample from valid history sets 161 | start_shift = self.random_state.randint(max_hist) 162 | 163 | # indices corresponding to ranges from which to sample 164 | indices = self.random_state.choice(xrange(1,self.valid/max_hist), size=batch_size, replace=False) 165 | # shift all the indices and offset 166 | indices *= max_hist 167 | indices += start_shift 168 | 169 | return indices 170 | 171 | 172 | def store_tuple(self, obs, action, reward, obsp, terminal): 173 | ''' Stores an experience tuple into the replay dataset, i.e., a 174 | triple (obs, action, reward, obsp, terminal) where |obsp| is the observation 175 | made when the agent takes |action| and recieves |reward| 176 | while |obs| is the observation made prior to taking |action|. 177 | The observation |obs| is assumed to be at index (self.head). 178 | 179 | Args: 180 | obs: observation made at time t of shape provided by user (obs_shape) 181 | action: index of action chosen 182 | reward: float value of reward recieved after taking action a 183 | or None if the input action ended the game 184 | terminal: indicates if obsp is terminal 185 | 186 | ''' 187 | self.actions[self.head] = action 188 | self.rewards[self.head] = reward 189 | self.terminals[self.head] = terminal 190 | self.observations[self.head] = obs 191 | self.next_observations[self.head] = obsp 192 | 193 | # update head and valid pointers 194 | self.head = (self.head + 1) % self.memory_size 195 | self.valid = min(self.memory_size, self.valid + 1) 196 | 197 | def clear_history(self): 198 | self.ohist.fill(self._emptyfloat) 199 | self.ophist.fill(self._emptyfloat) 200 | self.ahist.fill(self._emptyint) 201 | self.rhist.fill(0.0) 202 | self.thist.fill(False) 203 | 204 | def close(self): 205 | ''' Stores the memory dataset into the file when program ends. ''' 206 | self.fp['actions'][:] = self.actions 207 | self.fp['rewards'][:] = self.rewards 208 | self.fp['terminals'][:] = self.terminals 209 | self.observations.attrs['head'] = self.head 210 | self.observations.attrs['valid'] = self.valid 211 | self.fp.close() 212 | 213 | def __del__(self): 214 | try: 215 | self.close() 216 | except: 217 | pass # already closed 218 | -------------------------------------------------------------------------------- /chimp/pre_trained_nets/mountain_car.net: -------------------------------------------------------------------------------- 1 | ccopy_reg 2 | _reconstructor 3 | p0 4 | (c__main__ 5 | TestNet 6 | p1 7 | c__builtin__ 8 | object 9 | p2 10 | Ntp3 11 | Rp4 12 | (dp5 13 | S'_persistent' 14 | p6 15 | (lp7 16 | sS'name' 17 | p8 18 | NsS'_children' 19 | p9 20 | (lp10 21 | S'bn2' 22 | p11 23 | aS'bn1' 24 | p12 25 | aS'lout' 26 | p13 27 | aS'l4' 28 | p14 29 | aS'l2' 30 | p15 31 | aS'l3' 32 | p16 33 | aS'l1' 34 | p17 35 | asg11 36 | g0 37 | (cchainer.links.normalization.batch_normalization 38 | BatchNormalization 39 | p18 40 | g2 41 | Ntp19 42 | Rp20 43 | (dp21 44 | g6 45 | (lp22 46 | S'avg_mean' 47 | p23 48 | aS'avg_var' 49 | p24 50 | aS'N' 51 | p25 52 | asg23 53 | cnumpy.core.multiarray 54 | _reconstruct 55 | p26 56 | (cnumpy 57 | ndarray 58 | p27 59 | (I0 60 | tp28 61 | S'b' 62 | p29 63 | tp30 64 | Rp31 65 | (I1 66 | (I10 67 | tp32 68 | cnumpy 69 | dtype 70 | p33 71 | (S'f4' 72 | p34 73 | I0 74 | I1 75 | tp35 76 | Rp36 77 | (I3 78 | S'<' 79 | p37 80 | NNNI-1 81 | I-1 82 | I0 83 | tp38 84 | bI00 85 | S'\xc7\xfac;\x08\x15B<\x00\x00\x00\x00\x8b-u;k\x18\xc9:\x07\x8bE<\x18\xa6\xff:\xc4\x83!=\x0b<\x89<\xe3\x08\x958' 86 | p39 87 | tp40 88 | bsg8 89 | g11 90 | sS'decay' 91 | p41 92 | F0.9 93 | sS'eps' 94 | p42 95 | F1e-05 96 | sS'_cpu' 97 | p43 98 | I01 99 | sS'beta' 100 | p44 101 | cchainer.variable 102 | Variable 103 | p45 104 | (g26 105 | (g27 106 | (I0 107 | tp46 108 | g29 109 | tp47 110 | Rp48 111 | (I1 112 | (I10 113 | tp49 114 | g36 115 | I00 116 | S'\n{\x1e\xbeZ\xd7\x87\xbe\x9er\x00\xbf\x0fp\xe5\xbe\xd2\xe1\xd2\xbe`\x1b\x05\xbfOte>`s\x10\xbfY\x8f\x06\xbf\x8f8\x87\xbe' 117 | p50 118 | tp51 119 | bcchainer.flag 120 | Flag 121 | p52 122 | (Ntp53 123 | Rp54 124 | g44 125 | tp55 126 | Rp56 127 | sS'_params' 128 | p57 129 | (lp58 130 | S'gamma' 131 | p59 132 | ag44 133 | asg25 134 | I0 135 | sg24 136 | g26 137 | (g27 138 | (I0 139 | tp60 140 | g29 141 | tp61 142 | Rp62 143 | (I1 144 | (I10 145 | tp63 146 | g36 147 | I00 148 | S'\x97\xa4\xb9=\xf74\xbd=vR\xb8=&\x95\xb9=\x88\x9a\xb8=\xeb\x1b\xc3=\\\xcd\xb8=\xb8\xa0\xf0=\r\x94\xc7=NS\xb8=' 149 | p64 150 | tp65 151 | bsg59 152 | g45 153 | (g26 154 | (g27 155 | (I0 156 | tp66 157 | g29 158 | tp67 159 | Rp68 160 | (I1 161 | (I10 162 | tp69 163 | g36 164 | I00 165 | S'N\x80\x91?k!\xa2?\xfe1\xae?\x1d\xa7\xb9?\r\xf7\xb8? O\xae?V\x8f\xc4?C}\x94?\x84\xbe\xa8?\xb5n\x9f?' 166 | p70 167 | tp71 168 | bg54 169 | g59 170 | tp72 171 | Rp73 172 | sbsg12 173 | g0 174 | (g18 175 | g2 176 | Ntp74 177 | Rp75 178 | (dp76 179 | g6 180 | (lp77 181 | g23 182 | ag24 183 | ag25 184 | asg23 185 | g26 186 | (g27 187 | (I0 188 | tp78 189 | g29 190 | tp79 191 | Rp80 192 | (I1 193 | (I10 194 | tp81 195 | g36 196 | I00 197 | S'\x05\xb2\xd6;\x00\x00\x00\x00B\xecL;\x8b+\xc2<\x00\x00\x00\x00\xab\xbc\xf8;OJ\x1e8\x18\xec\xfa<\x7f\x1d\x87\x08>\x87\x17'>\xa4\xa9\xb6=s\x83o=\xda\xff\x01>\xb7\xb9\x92=\x91G\x1d\xbe" 223 | p88 224 | tp89 225 | bg54 226 | g44 227 | tp90 228 | Rp91 229 | sg57 230 | (lp92 231 | g59 232 | ag44 233 | asg25 234 | I0 235 | sg24 236 | g26 237 | (g27 238 | (I0 239 | tp93 240 | g29 241 | tp94 242 | Rp95 243 | (I1 244 | (I10 245 | tp96 246 | g36 247 | I00 248 | S'\x7f\x89\xb8=vR\xb8=p\\\xb8=\xe0\xa3\xbc=vR\xb8=\xb0\xc7\xb8=\xb3R\xb8=M\xc5\xbd=H\xfc\xb9=:\xa6\xb9=' 249 | p97 250 | tp98 251 | bsg59 252 | g45 253 | (g26 254 | (g27 255 | (I0 256 | tp99 257 | g29 258 | tp100 259 | Rp101 260 | (I1 261 | (I10 262 | tp102 263 | g36 264 | I00 265 | S'\xf6T\xa1?c\xd6\x7f?0\xb5\xdc?\xe0\xa8\x8f?\x00\x00\x80?#"\xba?2\xd1\xae?*\xfa\x8d?_\xa0\x80?\t\x08\xa5?' 266 | p103 267 | tp104 268 | bg54 269 | g59 270 | tp105 271 | Rp106 272 | sbsg13 273 | g0 274 | (cchainer.links.connection.linear 275 | Linear 276 | p107 277 | g2 278 | Ntp108 279 | Rp109 280 | (dp110 281 | g6 282 | (lp111 283 | sg8 284 | g13 285 | sS'W' 286 | p112 287 | g45 288 | (g26 289 | (g27 290 | (I0 291 | tp113 292 | g29 293 | tp114 294 | Rp115 295 | (I1 296 | (I3 297 | I10 298 | tp116 299 | g36 300 | I00 301 | S'%\x0b!\xbf\x83\xc2V>V\xd6\xe3\xbe^\xb0v\xbeQv?>\x90\xf1\x19\xbfk\xcbe?\xf2\xdf\x8b\xbe\xb1h\xf7\xbe\xb0\xce\x97>\xd4\xc1Q\xbe\xf2\xa8O\xbfY\xf5\xee\xbe\xad\x08(\xbf\x98?,\xbf\xa8\x862\xbfMj%?\x19YN\xbe\x9aF\x06\xbf\x96\xb0\x08\xbf3-\xc7\xbe\xab\xe2<\xbd\xb2\x85\xa7\xbe-i\x85\xbe,\xb2\x18\xbf\x94\xd5"\xbf\xc3\xc38?\xa4\xce(\xbeW28\xbf\xd6\xea\xd2\xbe' 302 | p117 303 | tp118 304 | bg54 305 | g112 306 | tp119 307 | Rp120 308 | sg29 309 | g45 310 | (g26 311 | (g27 312 | (I0 313 | tp121 314 | g29 315 | tp122 316 | Rp123 317 | (I1 318 | (I3 319 | tp124 320 | g36 321 | I00 322 | S"Y'6=\x85\xeb\x12?H\x1c\x93>" 323 | p125 324 | tp126 325 | bg54 326 | g29 327 | tp127 328 | Rp128 329 | sg57 330 | (lp129 331 | g112 332 | ag29 333 | asg43 334 | I01 335 | sbsg43 336 | I01 337 | sg14 338 | g0 339 | (g107 340 | g2 341 | Ntp130 342 | Rp131 343 | (dp132 344 | g6 345 | (lp133 346 | sg8 347 | g14 348 | sg112 349 | g45 350 | (g26 351 | (g27 352 | (I0 353 | tp134 354 | g29 355 | tp135 356 | Rp136 357 | (I1 358 | (I10 359 | I10 360 | tp137 361 | g36 362 | I00 363 | S'\x0fq\x99>z\xb6\x08>]?\x8b\xbe\x93<\xfd\xbep\x98p\xbd\xb0\xf4\xe4\xbd\xec\n\xa6\xbe\xe2\x1f\x9c<*\x18\xb4;\x03\xa3e\xbe\xb1\x02$>\n&\xaa\xbd`\xfb{>\x8b\x02\xb4>\x10v\xb9\xbel\x15\xfb\xbd.\x88D\xbd`Vy\xbe\xee$ ?0\xc8\x8c=\xb6G\x17>\xd4\xcb\xe6\xbe\xb6\xa2\n\xbf\xed\x94\xa1\xbd\xd9\x19\x1a?\xd7\xf6\x9c\xbe\x96\xca\xd4\xbdE\x13\xda\xbd\xb8\x8d8\xbfl\x02\xab=t@\xc9\xbdT\xa4\xf6\xbcP\xd5\x83\xbd>\xf8Q\xbe\xeb~\xae\xbdk \x8f=4\x81\xcf\xber\xa0,>\xdb2\xa3>\x0bk\x08=\xdb4\x97\xbd/\x15\xab<\xcb=\xa4\xbc\x15(\x1e=\x1a\x12\xd1\xben\x90\x0c\xbeF\xd7\xf8\xbd\x15?\xca=\xbbj|\xbe\xda\xc5\xb0\xbe\xcaLv\xbe\xb37\xd9=\x07a\x0e\xbfS\x0b\x82>\xf3\xe9\x07?e\x7f2\xbfC\xd2\x08?\x0e\xb0\xac\xbe\xf0c\x97\xbf,b\x1a?4\xd8\xa5\xbe*Z\xa9=\xa6\xc5\x85>\x91\xc94\xbf\xed\xda\x14\xbe\xe8u9?7&;\xbe\xaf9e=\xcca\xc7=\xc9\x08\x1f\xbf\xc1\x9f\x94\xbe\\\x8b\x89\xbe\x9be\xd2\xbd\xb4\xb9;?[\x80\xab>X\xf8$\xbf\xc7\x85+\xbdtY\xff\xbdz\x1fA\xbf\x02\xe8&?\xb6\xbd\xdc\xbe\xf1\x91\xd0\xbcz\xea\x03\xbe\xd5;#>\xe0\xe1\x97\xbd\x8e\xc82\xbf\xdf\xe0.?\xdeZ\x84>\xa8\xb7]\xbf\x1a\x82\x1e?\xa2\xff\x8f\xbesM\xcf\xba\nv\xb7=\xbe\xc4\x88\xbeh\x93a\xbe$i\xe0>\xea\xf50\xbf\xe8u\x8e\xbe\xb7\xf9\x01\xbc\x17\x8d\xf3\xbd' 364 | p138 365 | tp139 366 | bg54 367 | g112 368 | tp140 369 | Rp141 370 | sg29 371 | g45 372 | (g26 373 | (g27 374 | (I0 375 | tp142 376 | g29 377 | tp143 378 | Rp144 379 | (I1 380 | (I10 381 | tp145 382 | g36 383 | I00 384 | S'\xfbK\xaa9.\xa1\xbc\xbbb\xa4\xfa=\xc6\xf1\xd0\xb62Q\xf2:\xfb-\xb2=\x80\x00I=\xc6F=\xbe\xc8\x88\x15>\xa6\xb1u\xbb' 385 | p146 386 | tp147 387 | bg54 388 | g29 389 | tp148 390 | Rp149 391 | sg57 392 | (lp150 393 | g112 394 | ag29 395 | asg43 396 | I01 397 | sbsS'train' 398 | p151 399 | I00 400 | sg15 401 | g0 402 | (g107 403 | g2 404 | Ntp152 405 | Rp153 406 | (dp154 407 | g6 408 | (lp155 409 | sg8 410 | g15 411 | sg112 412 | g45 413 | (g26 414 | (g27 415 | (I0 416 | tp156 417 | g29 418 | tp157 419 | Rp158 420 | (I1 421 | (I10 422 | I20 423 | tp159 424 | g36 425 | I00 426 | S'pqy>\x08\xde\x8f\xbd\x90\x8a\xb5>\xcd\xb8F?X\xdfF\xbe\xe0\xde\xf3=\x06\t\x1a?p\xff\xef=eu|>q\xb9\xac>\xe5k\x89\xbe\x96\xf2q\xbc)\xdd\xc0\xbe\x02\x84w\xbe\xd2\x04\xad>)\xab\x85\xbd\x89-\x14>\xae\x94\x08\xbes\x80\x9e=\x9c\x7f\x05>%N=>C;\x03?\xf55\x16\xbfB\xc8\x83\xbe\xcb\xa5\xed\xbd\x9f\x13`>\xca$c\xbcY\xe9+>\x8b\xff\xc5\xbdyh\x93\xbe\xf3\xd9S\xbeF@\x15>\xdc7]\xbe\x12\xb3\xce\xbe*yv\xbd\xc1X;\xbe\x10\xec\xa6>\xcf\xe8I\xbe\x00\xf5}\xbd\xe9\x15\x01>\xd2\xa8\x02\xbe\xb2\xc0\xae\xbe\xe2\xe7\xd7<\xf5\x91\xdd\xbe\x875\x19\xbc}\xdb\xdf<:_\xc0\xbd\xc55\x97>\x12b >\n\xfcS\xbd\xa1\xe1\x0c>\x16\x04\xec>\xe2c\x14>\x7f\xbfw>\xa6\xe1k>\x1al\x03>}W\xb0\xbesw\xd7\xbed5\x8c\xbe\x06\x99\x07>\xa4@=>%\x81\x11?\x89\x98\x05\xbe\x04e%\xbf\xc4@\x0b<\xa0|\x8b\xbe^5L\xbf\xd1\x1e\x05>X\x96+>\rZd\xbe\xf5\xb1\x8e>\xcaRP\xbe\xc3\xb3\xce>\x9d\xd8\xdd>+\x13\xae>p\xf0\x06>1\xf6w\xbe\x0f\xd3\xf1>Z\xb9\x9a\xbd\\\x1c4>^\x84*\xbe\x12\x90\x00>\xe0o\x91>\xb0#U\xbc8D\xad\xbe\xf0\xa4\xd1\xbd\xd3\xae\x81\xbe\xc6\xf5\xd0\xbeL\xd2\x12\xbd\xd4<\x95>\xdf `=\x8aS\xe2\xbdP\t\xc5=U1\x9e\xbe\xbc\xb7\xaf\xbe\x0eo\x8f\xbe\x92\xc0\x9c\xbe\x1c\xe1\x88\xbe\xff\x17\x96>rM{<[%~\xbeYpi=\xc4Y\x00\xbf\xb5?x\xbd^\xa8\xb0>s\x13\x01\xbf\xb2\xc4\x0c\xbe\x85\x01\xcc=\xa68\xcf>\xf2\x8d*\xbd\x13\xd6x>A\xde~\xbc\xa1K\x99>\xff}?=O\xdb<>\x0c:%?\xd4\xbb\xb8;\xc1\xae\x1f\xbc\rD\xfb\xbd\x95\xc9\xfc L\xb8\xbd\x9c\x84\xdf=mc\x9f\xbe\xe9\x08\x16=\xad\xad!?^s\x95\xbe\xdd\xd0r=\xe4+\xd1>\xeep6>\x9a[\x98\xbd[\xed\xa8>\x1aP\x06>\xea\xa1\x8b\xbcC\xa7\x83>\x95Ya>)r7\xbf68\xe3\xbd\xe4\xea4>a\xbb\x82>?\x08\xb2\xbd33\x03?=\xc5\x08?\xedq\x85>\xabjY>F\r|=|z\x89\xbe$\xdf\xd8\xbdT\xf7\x8f\xbe\xa0\x82\xc6>\xeaS\x1b>\xe2\t\xd3;\xee\xef\n>\x84-\xf0>\x95\x9a\x0c\xbe\xf9\x07\x90=\xea\xac\x00\xbfe\xf8%\xbdw\xf8\x11\xbd\x95\x10\xdb\xbcl\x08;\xbe\xe2\xd9e>9\xb5{>\x8ekF>\x9b\xbcN>\x03S\xe9=&\xc3>\xbd\x1a\xbd\xe0=\x99\x90\x8a\xbc*\xbdQ\xbe?,\xff=\xde0\xae\xbe{\xa7\xec>\xb5|\xeb>\x1c\x84W\xbc\x06\xd6\x0b?i%`?\x0ej\xa0\xbe\xf5\x0c9\xbe?+\xe2\xbd\x82\xf9\x84\xbd\xa8%5\xbc\x80\x1f)\xbcZ\x1a\xde\xbc<\x88\xcc\xbc\xc5\xbf\xe7\xbe+\xa0\xfb\xbd$\x85\x9d\xbc4j\x91>\x9c\xb2d\xbd' 427 | p160 428 | tp161 429 | bg54 430 | g112 431 | tp162 432 | Rp163 433 | sg29 434 | g45 435 | (g26 436 | (g27 437 | (I0 438 | tp164 439 | g29 440 | tp165 441 | Rp166 442 | (I1 443 | (I10 444 | tp167 445 | g36 446 | I00 447 | S'C\x8dn=X\x8b&\xbaoM\xc7\xbd/U\n>\x00\x00\x00\x00\xee(\xd4=\xbf\xcd\xc9=\xa7g\x04>\xe8w\x8e=6\x01G\xbd' 448 | p168 449 | tp169 450 | bg54 451 | g29 452 | tp170 453 | Rp171 454 | sg57 455 | (lp172 456 | g112 457 | ag29 458 | asg43 459 | I01 460 | sbsg16 461 | g0 462 | (g107 463 | g2 464 | Ntp173 465 | Rp174 466 | (dp175 467 | g6 468 | (lp176 469 | sg8 470 | g16 471 | sg112 472 | g45 473 | (g26 474 | (g27 475 | (I0 476 | tp177 477 | g29 478 | tp178 479 | Rp179 480 | (I1 481 | (I10 482 | I10 483 | tp180 484 | g36 485 | I00 486 | S'\xd1\xf7\xe0\xbd\xf5\xedz\xbd\'\x07\x82\xbfCx\xa1\xbe\n\xc8g=r\xba\x97>Q\xd4/\xbe\x15\x1e\xea\xbd\xea5\xf2>Tm\x9e\xbd\xc7 \xbf>}\xd7\x0e?\x82\xcb\x93\xbe\xbd[\x1a\xbf\x04f0>?\xa7c\xbeh\xf8\x8a\xbd_\x05\x88\xbe\x0c\x03\x8e\xbf\xe45\xc5\xbduL\xe2\xbd~L\xb1\xbe\n\xcb\x90?\xc0x\xaf\xbeI\xd3==Td\x08\xbe]n7\xbd\xa4\x82\xa3>,\x9a\x8b>\xbeb\x82?\xc0\xf0\xee=.[\x84\xbez!\x1e>\x82^\x83\xbd\xaa\xd9/\xbe;\xe4F=\xca\xa1\xae>\x93\xc5w\xbf3\xfa/\xbd\xbf\xc6\xb7=^-I\xbe\x0c\xb84>\xa8~t\xbf\x075\xb6>\xe2\x8e;>\x1c\x05\xdb>\xac\xfb*?\x18C\x1b?\xc4\xad;\xbe\xd9\xbc~\xbe\x1e\xce\xf0>\x12\xf4\xf7\xbe\x93\x92E?j\x80\n\xbed\xb3U>\xe4\xfd\xf7\xbe%\xde\x8e\xbf\xfbF\xe3>\xcf\x96\x19>\x1e\xea\x14>\xd8\xc7\x0c\xbf\x84=\x91?\\5\x80\xbf\x9e\xe7q\xbd%v\xb1>\xddV\x11>G\xe7y>\x0c\xdb\xd4>\'\xed\xc7>\xc2\x1aA\xbe\xd0\xa4\xb2\xbc\x1f\xef\x0e\xbe\x97^R?Y\xf0T=\xb1\x00\x87\xbe\xd0\x85c=\xb2."\xbf\xa0\xc6\xe6>/\xf19>xv\x02?\x92\xff2\xbe\xca\x01\x12\xbf(\xeb\xce\xbd\xf1\x10\x1f>TF\r?\xe4!>?\xe3\xea3\xbd\xef\x96N\xbe\xc7V\xc4\xbe\xfea|\xbe\x89\xd0\x04\xbf\xe2W\x94>P[\xb1\xbf\x9f\xa5[?F\x14\xc9=\xe3\xd2j?M\x19\x1d\xbex\x91\x92\xbe\xcf\xe9\n>\xf9\xd3\x8b\xbf' 487 | p181 488 | tp182 489 | bg54 490 | g112 491 | tp183 492 | Rp184 493 | sg29 494 | g45 495 | (g26 496 | (g27 497 | (I0 498 | tp185 499 | g29 500 | tp186 501 | Rp187 502 | (I1 503 | (I10 504 | tp188 505 | g36 506 | I00 507 | S'E\ro\xbd6P\xc9;\xde\x82\x03\xbeR\xa0"<\xb9\x89\xaa=#\x91\x14\xbd\x83\xbc\xd4=L\xf3)\xbd\xe7i\x97=O\xff\n>' 508 | p189 509 | tp190 510 | bg54 511 | g29 512 | tp191 513 | Rp192 514 | sg57 515 | (lp193 516 | g112 517 | ag29 518 | asg43 519 | I01 520 | sbsg17 521 | g0 522 | (g107 523 | g2 524 | Ntp194 525 | Rp195 526 | (dp196 527 | g6 528 | (lp197 529 | sg8 530 | g17 531 | sg112 532 | g45 533 | (g26 534 | (g27 535 | (I0 536 | tp198 537 | g29 538 | tp199 539 | Rp200 540 | (I1 541 | (I20 542 | I2 543 | tp201 544 | g36 545 | I00 546 | S"\xa7'\xb5\xbf\x964\xbf\xbe\xb2p\xee>C\xd6d\xbf\xe5\xc0\xa0?\x87\xaa\x9e@\x03\xf0\xcc\xbdxr\x82\xc0D\x0e\xb7?Y,>\xc0Yz\x8d?O\xa97@\x0ej\xcd=\xb6\xda\xf0\xbf\xb8\xab\xab?\xd3m\x86=\x0c\xeb\xc1\xbe\xf9\x7f\xaf\xbf\x0bW\x9d?\xbe\x96\xa3?*c@?\xd7R\xbc\xbf\xbe\x82@\xbf\xfd\x80\xed\xbf}(->\xf22\xd6\xbfW \r\xbfw\x81+\xbf\xed\x97J>\xde\xdb\xc3>V\x14S\xbe<\x97 \xc0\x00T\xec>2[\xd4?\xb6\x1e\xba\xbe^\xea\xda\xbe\xc11\x81\xbf\x9aM\xf0?\t\x1f\xa9>\xb2A\x9c\xbe" 547 | p202 548 | tp203 549 | bg54 550 | g112 551 | tp204 552 | Rp205 553 | sg29 554 | g45 555 | (g26 556 | (g27 557 | (I0 558 | tp206 559 | g29 560 | tp207 561 | Rp208 562 | (I1 563 | (I20 564 | tp209 565 | g36 566 | I00 567 | S'y\x10E>\x90\x16d>\xeb\xda\x9d>\xe4\xea\x9b\xbd6\xd6\x89>sk\x0c=\xdc\xdeZ\xbd\xb9\xfe9\xbe.r\x88>\x1a\xa6`\xbd&A\xa7>_\xc9\xc3>c\x0cg>\xf0\xcfQ>\x106\x83>\xd6\xe2\xf1=\xe7g\xa4>x\xb0i>4\x1a\x1a=\xbfo\xc7>' 568 | p210 569 | tp211 570 | bg54 571 | g29 572 | tp212 573 | Rp213 574 | sg57 575 | (lp214 576 | g112 577 | ag29 578 | asg43 579 | I01 580 | sbsg57 581 | (lp215 582 | sb. -------------------------------------------------------------------------------- /chimp/simulators/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Environment simulators. 3 | 4 | * Arcade Learning Environment for Atari game simulation 5 | * Tiger Problem 6 | 7 | Required functions: 8 | __init__, get_screenshot, act, reward, game_over, reset_game 9 | 10 | Require attributes: 11 | n_actions, 12 | 13 | ''' 14 | -------------------------------------------------------------------------------- /chimp/simulators/atari/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Environment simulators. 3 | 4 | ''' 5 | 6 | from atari import AtariSimulator -------------------------------------------------------------------------------- /chimp/simulators/atari/atari.py: -------------------------------------------------------------------------------- 1 | from ale_python_interface import ALEInterface 2 | import pygame 3 | 4 | import numpy as np 5 | import scipy.misc as spm 6 | 7 | 8 | class AtariSimulator(object): 9 | 10 | def __init__(self, settings): 11 | 12 | '''Initiate Arcade Learning Environment (ALE) using Python interface 13 | https://github.com/bbitmaster/ale_python_interface/wiki 14 | 15 | - Set number of frames to be skipped, random seed, ROM and title for display. 16 | - Retrieve a set of legal actions and their number. 17 | - Retrieve dimensions of the original screen (width/height), and set the dimensions 18 | of the cropped screen, together with the padding used to crop the screen rectangle. 19 | - Set dimensions of the pygame display that will show visualization of the simulation. 20 | (May be cropped --- showing what the learner sees, or not --- showing full Atari screen) 21 | - Allocate memory for generated grayscale screenshots. Accepts dims in (height/width) format 22 | ''' 23 | 24 | self.ale = ALEInterface() 25 | self.ale.setInt("frame_skip",settings["frame_skip"]) 26 | self.ale.setInt("random_seed",settings["seed_simulator"]) 27 | self.ale.loadROM(settings["rom_dir"] + '/' + settings["rom"]) 28 | 29 | self.title = "ALE Simulator: " + str(settings["rom"]) 30 | self.actions = self.ale.getLegalActionSet() 31 | self.n_actions = self.actions.size 32 | 33 | self.screen_dims = self.ale.getScreenDims() 34 | self.model_dims = settings['model_dims'] 35 | self.pad = settings['pad'] 36 | 37 | print("Original screen width/height: " + str(self.screen_dims[0]) + "/" + str(self.screen_dims[1])) 38 | print("Cropped screen width/height: " + str(self.model_dims[0]) + "/" + str(self.model_dims[1])) 39 | 40 | self.viz_cropped = settings['viz_cropped'] 41 | if self.viz_cropped: 42 | self.display_dims = (int(self.model_dims[0]*2), int(self.model_dims[1]*2)) 43 | else: 44 | self.display_dims = (int(self.screen_dims[0]*2), int(self.screen_dims[1]*2)) 45 | 46 | # preallocate an array to accept ALE screen data (height/width) ! 47 | self.screen_data = np.empty((self.screen_dims[1],self.screen_dims[0]),dtype=np.uint8) 48 | 49 | 50 | def get_screenshot(self): 51 | '''returns a cropped snapshot of the simulator 52 | - store grayscale values in a preallocated array 53 | - cut out a square from the rectangle, using provided padding value 54 | - downsample to the desired size and transpose from (height/width) to (width/height) 55 | ''' 56 | 57 | self.ale.getScreenGrayscale(self.screen_data) 58 | self.tmp = self.screen_data[(self.screen_dims[1]-self.screen_dims[0]-self.pad):(self.screen_dims[1]-self.pad),:] 59 | self.frame = spm.imresize(self.tmp,self.model_dims[::-1],interp='nearest').T #, interp='nearest' 60 | 61 | return self.frame 62 | 63 | 64 | def act(self,action_index): 65 | '''function to transition the simulator from s to s' using provided action 66 | the action that is provided is in form of an index 67 | simulator deals with translating the index into an actual action''' 68 | 69 | self.last_reward = self.ale.act(self.actions[action_index]) 70 | 71 | 72 | def reward(self): 73 | '''return reward - has to be called after the "act" function''' 74 | 75 | return self.last_reward 76 | 77 | 78 | def episode_over(self): 79 | '''return a boolean indicator on whether the game is still running''' 80 | 81 | return self.ale.game_over() 82 | 83 | 84 | def reset_episode(self): 85 | '''reset the game that ended''' 86 | 87 | self.ale.reset_game() 88 | 89 | 90 | def init_viz_display(self): 91 | '''initialize display that will show visualization''' 92 | 93 | pygame.init() 94 | self.screen = pygame.display.set_mode(self.display_dims) 95 | if self.title: 96 | pygame.display.set_caption(self.title) 97 | 98 | 99 | def refresh_viz_display(self): 100 | '''if display is shut down, shut the game down 101 | else move the current simulator's frame (cropped or not cropped) into the pygame display, 102 | after expanding it 2x along x and y dimensions''' 103 | 104 | for event in pygame.event.get(): 105 | if event.type == pygame.QUIT: 106 | exit 107 | 108 | if self.viz_cropped: 109 | self.surface = pygame.surfarray.make_surface(self.frame) # has already been transposed 110 | else: 111 | self.surface = pygame.surfarray.make_surface(self.screen_data.T) 112 | 113 | self.screen.blit(pygame.transform.scale2x(self.surface),(0,0)) 114 | pygame.display.flip() 115 | 116 | -------------------------------------------------------------------------------- /chimp/simulators/gym/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import OpenAI Gym Wrapper 3 | """ 4 | 5 | -------------------------------------------------------------------------------- /chimp/simulators/gym/gym_wrapper.py: -------------------------------------------------------------------------------- 1 | class GymWrapper(): 2 | 3 | def __init__(self, env): 4 | 5 | self.env = env 6 | self.last_reward = 0.0 7 | self.current_state = None 8 | self.terminal_flag = False 9 | self.n_actions = env.action_space.n 10 | self.model_dims = env.observation_space.shape 11 | 12 | def act(self, action): 13 | """ 14 | Transitions to the next state and computes the reward 15 | """ 16 | state, reward, done, info = self.env.step(action) 17 | self.last_reward = reward 18 | self.current_state = state 19 | self.terminal_flag = done 20 | def reward(self): 21 | return self.last_reward 22 | 23 | def get_screenshot(self): 24 | return self.current_state 25 | 26 | def episode_over(self): 27 | """ 28 | Checks if the car reached the top of the mountain 29 | """ 30 | return self.terminal_flag 31 | 32 | def reset_episode(self): 33 | self.current_state = self.env.reset() 34 | 35 | def simulate(self, nsteps): 36 | """ 37 | Runs a simulation using the provided DQN policy for nsteps 38 | """ 39 | 40 | self.reset_episode() 41 | 42 | rtot = 0.0 43 | # run the simulation 44 | for i in xrange(nsteps): 45 | self.env.render() 46 | state = self.get_screenshot() 47 | a = self.env.action_space.sample() 48 | self.act(a) 49 | r = self.reward() 50 | rtot += r 51 | if self.episode_over(): 52 | break 53 | return rtot 54 | 55 | -------------------------------------------------------------------------------- /chimp/simulators/mdp/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Import MDP models and simulator 3 | """ 4 | 5 | -------------------------------------------------------------------------------- /chimp/simulators/mdp/cart_pole.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ################################################################# 4 | # Implements the simulator class for pole cart MDP 5 | ################################################################# 6 | 7 | class CartPole(): 8 | 9 | def __init__(self): 10 | self.actions = np.array([-1,1]) 11 | self.n_actions = 2 12 | 13 | self.state_shape = (1,4) # x, xdot, theta, thetadot 14 | 15 | self.gravity = 9.8 16 | self.mass_cart = 1.0 17 | self.mass_pole = 0.3 18 | self.total_mass = self.mass_cart + self.mass_pole 19 | self.length = 0.7 20 | self.polemass_length = self.mass_pole * self.length 21 | self.force_mag = 10.0 22 | self.tau = 0.02 23 | 24 | self.term_deg = 0.2094384 25 | 26 | 27 | def transition(self, s, a): 28 | if self.isterminal(s): 29 | return s.copy() 30 | x, xdot, theta, thetadot = s[0], s[1], s[2], s[3] 31 | 32 | sint = np.sin(theta) 33 | cost = np.cos(theta) 34 | 35 | force = self.actions[a] * self.force_mag 36 | 37 | temp = (force + self.polemass_length * thetadot**2 * sint) / self.total_mass 38 | thetaacc = (self.gravity * sint - cost * temp) / (self.length * (4.0/3.0 - self.mass_pole * cost**2 / 39 | self.total_mass)) 40 | xacc = temp - self.polemass_length * thetaacc * cost / self.total_mass 41 | 42 | sp = np.zeros(4, dtype=np.float32) 43 | sp[0] = x + self.tau * xdot 44 | sp[1] = xdot + self.tau * xacc 45 | sp[2] = theta + self.tau * thetadot 46 | sp[3] = thetadot + self.tau * thetaacc 47 | 48 | return sp 49 | 50 | def reward(self, s, a): 51 | r = 0.0 52 | if self.isterminal(s): 53 | r = -1.0 54 | return r 55 | 56 | 57 | def isterminal(self, s): 58 | if (s[0] < -2.4 or s[0] > 2.4 or s[2] < -self.term_deg or s[2] > self.term_deg): 59 | return True 60 | return False 61 | 62 | 63 | def initial_state(self): 64 | s = np.zeros(4, dtype=np.float32) 65 | s[0] = 2.2 * np.random.rand() - 1.1 66 | s[1], s[2], s[3] = 0.0, 0.0, 0.0 67 | return s 68 | 69 | -------------------------------------------------------------------------------- /chimp/simulators/mdp/mdp_simulator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ################################################################# 4 | # Implements the simulator class for MDPs 5 | ################################################################# 6 | 7 | class MDPSimulator(): 8 | 9 | def __init__(self, model): 10 | """ 11 | Implements the multi-agent simulator: 12 | This serves as a wrapper for MDP problem types 13 | """ 14 | 15 | self.model = model # problem instance 16 | 17 | # initalize 18 | self.current_state = model.initial_state() 19 | self.last_action = 0 20 | self.last_reward = 0.0 21 | 22 | self.model_dims = model.state_shape 23 | 24 | self.n_actions = model.n_actions 25 | 26 | def act(self, action): 27 | """ 28 | Transitions the model forward by moving 29 | """ 30 | mdp = self.model 31 | 32 | self.last_reward = mdp.reward(self.current_state, action) 33 | self.current_state = mdp.transition(self.current_state, action) 34 | if self.episode_over(): 35 | self.last_reward += mdp.reward(self.current_state, action) 36 | 37 | def reward(self): 38 | return self.last_reward 39 | 40 | def get_screenshot(self): 41 | return self.current_state 42 | 43 | def episode_over(self): 44 | return self.model.isterminal(self.current_state) 45 | 46 | def reset_episode(self): 47 | self.current_state = self.model.initial_state() 48 | self.last_reward = 0.0 49 | 50 | def n_actions(self): 51 | return self.model.n_actions 52 | -------------------------------------------------------------------------------- /chimp/simulators/mdp/mountain_car.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ################################################################# 4 | # Implements the mountain car MDP 5 | ################################################################# 6 | 7 | class MountainCar(): 8 | 9 | def __init__(self, 10 | term_r = 10.0, 11 | nonterm_r = -1.0, 12 | height_reward = True, 13 | discrete = False, 14 | discount = 0.95): 15 | 16 | self.actions = np.array([-1.0, 0.0, 1.0]) 17 | self.n_actions = 3 18 | 19 | self.state_shape = (1,2) # x and v 20 | 21 | self.term_r = term_r 22 | self.nonterm_r = nonterm_r 23 | 24 | self.vmin, self.vmax = (-0.07, 0.07) 25 | self.xmin, self.xmax = (-1.2, 0.6) 26 | 27 | self.height_reward = height_reward 28 | 29 | self.discrete = discrete 30 | self.xgrid = 10 31 | self.vgrid = 10 32 | self.discrete_x = np.linspace(self.xmin, self.xmax, self.xgrid) 33 | self.discrete_v = np.linspace(self.vmin, self.vmax, self.vgrid) 34 | 35 | 36 | def transition(self, s, a): 37 | """ 38 | Returns a next state, given a state and an action 39 | """ 40 | sp = np.zeros(2, dtype=np.float32) 41 | #sp = np.zeros(2, dtype=np.float32) 42 | sp[1] = s[1] + 0.001 * self.actions[a] - 0.0025 * np.cos(3 * s[0]) 43 | sp[1] = self.vclip(sp[1]) 44 | sp[0] = self.xclip(s[0] + sp[1]) 45 | 46 | return sp 47 | 48 | 49 | def reward(self, s, a): 50 | """ 51 | Rewarded for reaching goal state, penalized for all other states 52 | """ 53 | r = s[0] if (self.height_reward and s[0] > 0.0) else 0 54 | if s[0] >= self.xmax: 55 | r += self.term_r 56 | else: 57 | r += self.nonterm_r 58 | return r 59 | 60 | 61 | def isterminal(self, s): 62 | if s[0] >= self.xmax: 63 | return True 64 | return False 65 | 66 | def initial_state(self): 67 | xi = np.random.uniform(self.xmin, self.xmax*0.9) 68 | vi = 0.0 69 | return np.array([xi, vi], dtype=np.float32) 70 | 71 | 72 | 73 | ################################################################# 74 | ########################## UTILITIES ############################ 75 | ################################################################# 76 | 77 | def clip(self, val, lo, hi): 78 | return min(hi, max(val, lo)) 79 | 80 | def vclip(self, val): 81 | return self.clip(val, self.vmin, self.vmax) 82 | 83 | def xclip(self, val): 84 | return self.clip(val, self.xmin, self.xmax) 85 | 86 | def find_nearest(self, vals, target): 87 | idx = (np.abs(vals - target)).argmin() 88 | return vals[target] 89 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Environment simulators. 3 | 4 | ''' 5 | 6 | from models.simulator import POMDPSimulator 7 | from models.simulator_momdp import MOMDPSimulator 8 | from models.tiger import TigerPOMDP 9 | from models.rock_sample import RockSamplePOMDP 10 | 11 | from models.tools.belief import DiscreteBelief 12 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/__init__.py: -------------------------------------------------------------------------------- 1 | # dummy file 2 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/rock_sample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from copy import deepcopy 3 | from tools.belief_momdp import MOMDPBelief 4 | import math 5 | import itertools 6 | 7 | ################################################################# 8 | # Implements the Rock Sample POMDP problem 9 | ################################################################# 10 | 11 | class RockSamplePOMDP(): 12 | 13 | # constructor 14 | def __init__(self, 15 | xs=7, # size of grid y dim 16 | ys=7, # size of grid x dim 17 | 18 | rocks={(2,4):False, (3,4):True, (5,5):False, # (2,0):False, (0,1):True, (3,1):False, (6,3):True, 19 | (1,6):True}, 20 | 21 | seed=1, # random seed 22 | rbad=-10.0, rgood=10.0, rexit=10.0, rbump=-100.0, # reward values 23 | d0=20, # quality of rover observation, 24 | h_conf=0.5, # confidence level before moving in heuristic policy 25 | discount=0.99): 26 | 27 | self.random_state = np.random.RandomState(seed) # used for sampling 28 | self.discount = discount 29 | 30 | self.xs = xs - 1 # y-size of the grid 31 | self.ys = ys - 1 # x-size of the grid 32 | 33 | self.rocks = rocks # dictionary mapping rock positions to their types (x,y) => good or bad 34 | self.rock_pos = [k for k in sorted(rocks.keys())] 35 | self.rock_types = [rocks[k] for k in sorted(rocks.keys())] 36 | self.rock_map = {(k):i for (i, k) in enumerate(sorted(rocks.keys()))} 37 | k = len(rocks) 38 | self.k = k # number of rocks 39 | 40 | self.rbad = rbad 41 | self.rgood = rgood 42 | self.rbump = rbump 43 | self.rexit = rexit 44 | 45 | # states: state is represented by the rover position and the rock types 46 | self.rover_states = [(j,i) for i in range(xs) for j in range(ys)] # fully observable vars 47 | rs = itertools.product(*(xrange(2) for i in xrange(k))) 48 | self.rock_states = [[bool(j) for j in i] for i in rs] 49 | self.n_rock_states = len(self.rock_states) 50 | self.n_rover_states = len(self.rover_states) 51 | 52 | # actions: total of 5+k 53 | self.ractions = [0, # move left 54 | 1, # move right 55 | 2, # move up 56 | 3, # move down 57 | 4] # sample 58 | for i in range(k): 59 | self.ractions.append(5+i) # sample rock i 60 | 61 | # observations 62 | self.robs = [0, # none 63 | 1, # good 64 | 2] # bad 65 | 66 | # pre-allocate state variables 67 | self.rover_state = np.zeros(2) # rover (x,y) position 68 | self.rock_state = np.zeros(k, dtype=np.bool) # (good, bad) type for each rock 69 | 70 | self.d0 = d0 71 | self.h_conf = h_conf 72 | 73 | self.action_vectors = [[-1, 0], [1, 0], [0, 1], [0, -1]] 74 | 75 | # belief and observation dimensions 76 | self.xdims = 2 77 | self.odims = 1 78 | 79 | ################################################################# 80 | # Setters 81 | ################################################################# 82 | def set_discount(self, d): 83 | self.discount = d 84 | 85 | def set_rewards(self, rs, rg, rb, re, rm): 86 | self.rsample = rs 87 | self.rgood = rg 88 | self.rbad = rb 89 | self.rexit = re 90 | 91 | ################################################################# 92 | # S, A, O Spaces 93 | ################################################################# 94 | def fully_obs_states(self): 95 | return self.rover_states 96 | 97 | def partially_obs_states(self): 98 | return self.rock_states 99 | 100 | def actions(self): 101 | return self.ractions 102 | 103 | def observations(self): 104 | return self.robs 105 | 106 | ################################################################# 107 | # Reward Function 108 | ################################################################# 109 | def reward(self, x, y, a): 110 | # Rewarded: 111 | # sampling good or bad rocks 112 | # exiting the map 113 | # trying to move off the grid 114 | rocks = self.rocks 115 | xpos, ypos = x 116 | 117 | # if in terminal state, no reward 118 | if self.isterminal(x, y): 119 | return 0.0 120 | # if exit get exit reward 121 | if a == 1 and xpos == self.xs: 122 | return self.rexit 123 | # if trying to move off the grid 124 | if (a == 0 and xpos == 0) or (a == 2 and ypos == self.ys) or (a == 3 and ypos == 0): 125 | return self.rbump 126 | # if trying to sample 127 | if a == 4: 128 | # if in a space with a rock 129 | if x in rocks: 130 | # if rock is good 131 | if rocks[x]: 132 | return self.rgood 133 | # if rock is bad 134 | else: 135 | return self.rbad 136 | return 0.0 137 | 138 | ################################################################# 139 | # Distribution Functions 140 | ################################################################# 141 | # rover moves determinisitcally: distribution is just the position of rover 142 | def fully_obs_transition(self, x, y, a, dist): 143 | xpos = x[0] 144 | ypos = x[1] 145 | # going left 146 | if a == 0 and xpos > 0: 147 | xpos -= 1 148 | # going right 149 | elif a == 1 and xpos < (self.xs+1): 150 | xpos += 1 151 | # going up 152 | elif a == 2 and ypos < self.ys: 153 | ypos += 1 154 | # going down 155 | elif a == 3 and ypos > 0: 156 | ypos -= 1 157 | dist[0] = xpos 158 | dist[1] = ypos 159 | return dist 160 | 161 | # the positions of rocks don't change, good rocks turn bad after sampling 162 | def partially_obs_transition(self, x, y, a, dist): 163 | # fill the distribution with our y var 164 | for i in xrange(len(y)): 165 | dist[i] = y[i] 166 | # if a rock is sampled it becomes bad 167 | if a == 4: 168 | rocks = self.rocks 169 | # if we are on a rock state change type to bad 170 | if x in rocks: 171 | ri = self.rock_map[x] 172 | self.rock_types[ri] = False 173 | rocks[x] = False 174 | dist[ri] = False 175 | return dist 176 | 177 | # sample the transtion distribution 178 | def sample_fully_obs_state(self, d): 179 | # deterministic transition 180 | return (d[0], d[1]) 181 | 182 | def sample_partially_obs_state(self, d): 183 | # rock states do not change 184 | return d 185 | 186 | # returns the observation dsitribution of o from the (x,y,a) 187 | def observation(self, x, y, a, dist): 188 | prob = 0.0 189 | # if the action checks a rock 190 | if self.is_check_action(a): 191 | xpos = x[0] 192 | ypos = x[1] 193 | 194 | ri = self.act2rock(a) # rock index 195 | rock_pos = self.rock_pos[ri] # rock position 196 | rock_type = y[ri] # rock type 197 | 198 | r = math.sqrt((xpos - rock_pos[0])**2 + (ypos - rock_pos[1])**2) 199 | eta = math.exp(-r/self.d0) 200 | p_correct = 0.5 + 0.5 * eta # probability of correct measure 201 | 202 | dist.fill(0.0) 203 | # if rock is good 204 | if rock_type == True: 205 | dist[1] = p_correct 206 | dist[2] = 1.0 - p_correct 207 | # rock is bad 208 | else: 209 | dist[1] = 1 - p_correct 210 | dist[2] = p_correct 211 | else: 212 | dist.fill(0.0) 213 | dist[0] = 1.0 214 | return dist 215 | 216 | 217 | # sample the observation distirbution 218 | def sample_observation(self, d): 219 | oidx = self.categorical(d) 220 | return self.robs[oidx] 221 | 222 | def fully_obs_transition_pdf(self, d, x): 223 | if d[0] == x[0] and d[1] == x[1]: 224 | return 1.0 225 | else: 226 | return 0.0 227 | 228 | # only single rock configuration, always return 1 229 | def partially_obs_transition_pdf(self, d, y): 230 | if y == d: 231 | return 1.0 232 | else: 233 | return 0.0 234 | 235 | # pdf for observation prob 236 | def observation_pdf(self, d, dval): 237 | assert dval < 3, "Attempting to retrive pdf value larger than observation size" 238 | return d[dval] 239 | 240 | # numpy categorical sampling hack 241 | def categorical(self, d): 242 | return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0] 243 | 244 | 245 | ################################################################# 246 | # Create functions 247 | ################################################################# 248 | def create_fully_obs_transition_distribution(self): 249 | td = np.array([0,0]) # position of rover 250 | return td 251 | 252 | def create_partially_obs_transition_distribution(self): 253 | return deepcopy(self.rock_types) 254 | 255 | def create_observation_distribution(self): 256 | od = np.zeros(3) + 1.0/3 # none, good, bad 257 | return od 258 | 259 | def create_belief(self): 260 | return MOMDPBelief(self.n_rock_states) 261 | 262 | def initial_belief(self): 263 | return MOMDPBelief(self.n_rock_states) 264 | 265 | def initial_fully_obs_state(self): 266 | # returns a (0, y) tuple 267 | return (0, self.random_state.randint(self.xs+1)) 268 | 269 | def initial_partially_obs_state(self): 270 | for (i, k) in enumerate(sorted(self.rocks.keys())): 271 | t = bool(self.random_state.randint(2)) 272 | self.rock_types[i] = t 273 | self.rocks[k] = t 274 | return deepcopy(self.rock_types) 275 | 276 | 277 | ################################################################# 278 | # Misc Functions 279 | ################################################################# 280 | def isterminal(self, x, y): 281 | xpos, ypos = x 282 | if xpos > self.xs: 283 | return True 284 | return False 285 | 286 | def index2action(self, ai): 287 | return ai 288 | 289 | def is_check_action(self, a): 290 | return True if a > 4 else False 291 | 292 | def act2rock(self, a): 293 | return a - 5 294 | 295 | def n_xstates(self): 296 | return len(self.rover_states) 297 | 298 | def n_ystates(self): 299 | return len(self.rock_states) 300 | 301 | def n_actions(self): 302 | return len(self.ractions) 303 | 304 | def n_obsevations(self): 305 | return 2 306 | 307 | 308 | ################################################################# 309 | # Policies 310 | ################################################################# 311 | 312 | def heuristic_policy(self, sc): 313 | # takes in a screen shot, [x, b] array 314 | x = (sc[0], sc[1]) # x and y pos 315 | b = np.array(sc[2:]) # belief 316 | return self.heuristic(x, b) 317 | 318 | def heuristic(self, x, b): 319 | # if we are not confident, keep checking randomly 320 | if b.max() < self.h_conf: 321 | return self.random_state.randint(5, 5+self.k) 322 | else: 323 | ri = b.argmax() # index of highest confidence rock state 324 | y = self.rock_states[ri] # rock state 325 | # find closest good rock 326 | c = float('inf') 327 | ci = -1 328 | for (i, t) in enumerate(y): 329 | # if rock is good 330 | if t: 331 | # if on the rock sample 332 | if x == self.rock_pos[i]: 333 | return 4 334 | xrover = x[0] 335 | yrover = x[1] 336 | xrock, yrock = self.rock_pos[i] 337 | dist = math.sqrt((xrock-xrover)**2 + (yrock-yrover)**2) 338 | if dist < c: 339 | c = dist 340 | ci = i 341 | if ci > -1: 342 | return self.move_to(x, self.rock_pos[ci]) 343 | # if no good rocks left move right 344 | return 1 345 | 346 | # action to move rover from origin o to target t 347 | def move_to(self, o, t): 348 | # vector components 349 | v = [t[0] - o[0], t[1] - o[1]] 350 | sa = float('inf') 351 | ai = 1 352 | # move in the direction that minimizes angle between action and target 353 | for (i, a) in enumerate(self.action_vectors): 354 | ang = angle(v, a) 355 | if ang < sa: 356 | sa = ang 357 | ai = i 358 | return ai 359 | 360 | def dotproduct(v1, v2): 361 | return sum((a*b) for a, b in zip(v1, v2)) 362 | 363 | def length(v): 364 | return math.sqrt(dotproduct(v, v)) 365 | 366 | def angle(v1, v2): 367 | return math.acos(dotproduct(v1, v2) / (length(v1) * length(v2))) 368 | 369 | 370 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/rock_test.py: -------------------------------------------------------------------------------- 1 | from rock_sample import RockSamplePOMDP 2 | 3 | pomdp = RockSamplePOMDP() 4 | 5 | x = pomdp.initial_fully_obs_state() 6 | y = pomdp.initial_partially_obs_state() 7 | 8 | tdx = pomdp.create_fully_obs_transition_distribution() 9 | tdy = pomdp.create_partially_obs_transition_distribution() 10 | od = pomdp.create_observation_distribution() 11 | 12 | for a in range(pomdp.n_actions()): 13 | print "Action ", x, y, a 14 | tdx = pomdp.fully_obs_transition(x, y, a, tdx) 15 | tdy = pomdp.partially_obs_transition(x, y, a, tdy) 16 | od = pomdp.observation(x, y, a, od) 17 | x = pomdp.sample_fully_obs_state(tdx) 18 | y = pomdp.sample_partially_obs_state(tdy) 19 | o = pomdp.sample_observation(od) 20 | print "Observation ", x, y, o 21 | 22 | b = pomdp.initial_belief() 23 | 24 | x = (1,1) 25 | a = 6 26 | 27 | od = pomdp.observation(x, y, a, od) 28 | o = pomdp.sample_observation(od) 29 | 30 | b.update(pomdp, x, a, o) 31 | 32 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/simulator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ################################################################# 4 | # This file implements a pomdp simulator using the interface 5 | # defined in the README 6 | ################################################################# 7 | 8 | class POMDPSimulator(): 9 | 10 | # constructor 11 | def __init__(self, pomdp, robs=False): 12 | self.pomdp = pomdp 13 | self.current_state = pomdp.initial_state() 14 | self.current_action = None 15 | self.current_observation = np.array([-1]) 16 | self.current_belief = pomdp.initial_belief() 17 | self.current_reward = 0.0 18 | 19 | self.robs = robs # returns observation or belief 20 | 21 | self.tdist = pomdp.create_transition_distribution() 22 | self.odist = pomdp.create_observation_distribution() 23 | 24 | self.n_actions = self.pomdp.n_actions() 25 | self.n_states = self.pomdp.n_states() 26 | 27 | if not robs: 28 | self.model_dims = self.pomdp.belief_shape 29 | else: 30 | self.model_dims = self.pomdp.observation_shape 31 | 32 | #@profile 33 | # progress single step in simulation 34 | def act(self, ai): 35 | pomdp = self.pomdp 36 | s = self.current_state 37 | b = self.current_belief 38 | tdist = self.tdist 39 | odist = self.odist 40 | 41 | a = pomdp.index2action(ai) 42 | 43 | r = pomdp.reward(s, a) 44 | 45 | tdist = pomdp.transition(s, a, tdist) 46 | s = pomdp.sample_state(tdist) 47 | 48 | odist = pomdp.observation(s, a, odist) 49 | o = pomdp.sample_observation(odist) 50 | 51 | b.update(pomdp, a, o) 52 | 53 | self.current_reward = r 54 | self.current_state = s 55 | self.current_observation = o 56 | 57 | # returns the current simulator belief 58 | def get_screenshot(self): 59 | if self.robs: 60 | return np.array([self.current_observation]) 61 | else: 62 | return self.current_belief.new_belief() 63 | 64 | # returns the current reward 65 | def reward(self): 66 | return self.current_reward 67 | 68 | # check if reached terminal states 69 | def episode_over(self): 70 | return self.pomdp.isterminal(self.current_state) 71 | 72 | def reset_episode(self): 73 | pomdp = self.pomdp 74 | self.current_state = pomdp.initial_state() 75 | self.current_belief = pomdp.initial_belief() 76 | 77 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/tiger.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from copy import deepcopy 3 | from tools.belief import DiscreteBelief 4 | 5 | ################################################################# 6 | # Implements the Tiger POMDP problem 7 | ################################################################# 8 | 9 | class TigerPOMDP(): 10 | 11 | # constructor 12 | def __init__(self, 13 | seed=999, # random seed 14 | rlisten=-1.0, rtiger=-100.0, rescape=10.0, # reward values 15 | pcorrect=0.85, # correct observation prob 16 | discount=0.95): # discount 17 | 18 | self.random_state = np.random.RandomState(seed) 19 | self.rlisten = rlisten 20 | self.rtiger = rtiger 21 | self.rescape = rescape 22 | self.pcorrect = pcorrect 23 | self.discount = discount 24 | 25 | # transition arrs 26 | self.tstates = [0, 1] # left, right 27 | 28 | # actions 29 | self.tactions = [0, 1, 2] # open left, open right, listen 30 | 31 | # observations arrs 32 | self.tobs = [0, 1] # observed on the left, observed on the right 33 | 34 | # belief and observation shape 35 | self.belief_shape = (2,1) 36 | self.observation_shape = (1,1) 37 | 38 | ################################################################# 39 | # Setters 40 | ################################################################# 41 | def set_discount(self, d): 42 | self.discount = d 43 | 44 | def set_rewards(self, rl, rt, re): 45 | self.rlisten = rl 46 | self.rtiger = rt 47 | self.rescape = re 48 | 49 | def set_listen_prob(self, pc): 50 | self.pcorrect = pc 51 | 52 | ################################################################# 53 | # S, A, O Spaces 54 | ################################################################# 55 | def states(self): 56 | return self.tstates 57 | 58 | def actions(self): 59 | return self.tactions 60 | 61 | def observations(self): 62 | return self.tobs 63 | 64 | ################################################################# 65 | # Reward Function 66 | ################################################################# 67 | def reward(self, s, a): 68 | r = 0.0 69 | rt = self.rtiger 70 | re = self.rescape 71 | if a == 2: 72 | r += self.rlisten 73 | elif a == 1: 74 | r = (r + rt) if s == 1 else (r + re) 75 | else: 76 | r = (r + rt) if s == 0 else (r + re) 77 | return r 78 | 79 | ################################################################# 80 | # Distribution Functions 81 | ################################################################# 82 | # returns the transtion distriubtion of s' from the (s,a) pair 83 | def transition(self, s, a, dist): 84 | if a == 0 or a == 1: 85 | dist[0] = 0.5 86 | dist[1] = 0.5 87 | elif s == 0: 88 | dist[0] = 1.0 89 | dist[1] = 0.0 90 | else: 91 | dist[0] = 0.0 92 | dist[1] = 1.0 93 | return dist 94 | 95 | # sample the transtion distribution 96 | def sample_state(self, d): 97 | sidx = self.categorical(d) 98 | return self.tstates[sidx] 99 | 100 | # returns the observation dsitribution of o from the (s,a) pair 101 | def observation(self, s, a, dist): 102 | p = self.pcorrect 103 | if a == 2: 104 | if s == 0: 105 | dist[0] = p 106 | dist[1] = 1.0 - p 107 | else: 108 | dist[0] = 1.0 - p 109 | dist[1] = p 110 | else: 111 | dist[0] = 0.5 112 | dist[1] = 0.5 113 | return dist 114 | 115 | # sample the observation distirbution 116 | def sample_observation(self, d): 117 | oidx = self.categorical(d) 118 | return self.tobs[oidx] 119 | 120 | # pdf should be in a distributions module 121 | def transition_pdf(self, d, dval): 122 | assert dval < 2, "Attempting to retrive pdf value larger than state size" 123 | return d[dval] 124 | 125 | def observation_pdf(self, d, dval): 126 | assert dval < 2, "Attempting to retrive pdf value larger than state size" 127 | return d[dval] 128 | 129 | # numpy categorical sampling hack 130 | def categorical(self, d): 131 | return np.flatnonzero( self.random_state.multinomial(1,d,1) )[0] 132 | 133 | ################################################################# 134 | # Create functions 135 | ################################################################# 136 | def create_transition_distribution(self): 137 | td = np.array([0.5, 0.5]) 138 | return td 139 | 140 | def create_observation_distribution(self): 141 | od = np.array([0.5, 0.5]) 142 | return od 143 | 144 | def create_belief(self): 145 | return DiscreteBelief(self.n_states()) 146 | 147 | def initial_belief(self): 148 | return DiscreteBelief(self.n_states()) 149 | 150 | def initial_state(self): 151 | return self.random_state.randint(2) 152 | 153 | ################################################################# 154 | # Misc Functions 155 | ################################################################# 156 | 157 | def isterminal(self, s): 158 | # no terminal state in model 159 | return False 160 | 161 | def index2action(self, ai): 162 | return ai 163 | 164 | def n_states(self): 165 | return 2 166 | 167 | def n_actions(self): 168 | return 3 169 | 170 | def n_obsevations(self): 171 | return 2 172 | 173 | ################################################################# 174 | # Policies 175 | ################################################################# 176 | 177 | def optimal_policy(self): 178 | def pol(b): 179 | if b[0] < 0.04: 180 | return 0 181 | elif b[0] > 0.96: 182 | return 1 183 | else: 184 | return 2 185 | return pol 186 | 187 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # init file 2 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/tools/belief.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from copy import deepcopy 3 | 4 | ################################################################# 5 | # Implements Belief and Belief Updater 6 | ################################################################# 7 | 8 | class DiscreteBelief(): 9 | 10 | def __init__(self, n): 11 | self.bold = np.zeros(n) + 1.0/n 12 | self.bnew = np.zeros(n) + 1.0/n 13 | self.n = n 14 | 15 | def __getitem__(self, idx): 16 | return self.bnew[idx] 17 | 18 | def __setitem__(self, idx, val): 19 | self.bold[idx] = val 20 | self.bnew[idx] = val 21 | 22 | def update(self, pomdp, a, o): 23 | 24 | # swap pointers 25 | (bnew, bold) = (self.bold, self.bnew) 26 | 27 | sspace = pomdp.states() 28 | 29 | td = pomdp.create_transition_distribution() 30 | od = pomdp.create_observation_distribution() 31 | 32 | # old belief is now new, new is fresh 33 | bnew.fill(0.0) 34 | 35 | for (i, sp) in enumerate(sspace): 36 | # get the distributions 37 | od = pomdp.observation(sp, a, od) 38 | # get the prob of o from the current distribution 39 | probo = pomdp.observation_pdf(od, o) 40 | # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero 41 | if probo == 0.0: 42 | continue 43 | b_sum = 0.0 # belef for state sp 44 | for (j, s) in enumerate(sspace): 45 | td = pomdp.transition(s, a, td) 46 | pp = pomdp.transition_pdf(td, sp) 47 | b_sum += pp * bold[j] 48 | bnew[i] = probo * b_sum 49 | norm = sum(bnew) 50 | for i in range(self.length()): 51 | bnew[i] /= norm 52 | (self.bnew, self.bold) = (bnew, bold) 53 | return self 54 | 55 | def length(self): 56 | return self.n 57 | 58 | def empty(self): 59 | self.bold.fill(0.0) 60 | self.bnew.fill(0.0) 61 | 62 | def empty_old(self): 63 | self.bold.fill(0.0) 64 | 65 | def empty_new(self): 66 | self.bnew.fill(0.0) 67 | 68 | def old_belief(self): 69 | return self.bold 70 | 71 | def new_belief(self): 72 | return self.bnew 73 | 74 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/tools/belief_momdp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from copy import deepcopy 3 | 4 | ################################################################# 5 | # Implements Belief and Belief Updater 6 | ################################################################# 7 | 8 | class MOMDPBelief(): 9 | 10 | def __init__(self, n): 11 | self.bold = np.zeros(n) + 1.0/n 12 | self.bnew = np.zeros(n) + 1.0/n 13 | self.n = n 14 | 15 | def __getitem__(self, idx): 16 | return self.bnew[idx] 17 | 18 | def __setitem__(self, idx, val): 19 | self.bold[idx] = val 20 | self.bnew[idx] = val 21 | 22 | def update(self, pomdp, x, a, o): 23 | 24 | # swap pointers 25 | (bnew, bold) = (self.bold, self.bnew) 26 | 27 | yspace = pomdp.partially_obs_states() 28 | 29 | tdp = pomdp.create_partially_obs_transition_distribution() 30 | od = pomdp.create_observation_distribution() 31 | 32 | # old belief is now new, new is fresh 33 | bnew.fill(0.0) 34 | 35 | # iterate 36 | for (i, yp) in enumerate(yspace): 37 | # get the distributions 38 | od = pomdp.observation(x, yp, a, od) 39 | # get the prob of o from the current distribution 40 | probo = pomdp.observation_pdf(od, o) 41 | # if observation prob is 0.0, then skip rest of update b/c bnew[i] is zero 42 | if probo == 0.0: 43 | continue 44 | b_sum = 0.0 # belef for state sp 45 | for (j, y) in enumerate(yspace): 46 | tdp = pomdp.partially_obs_transition(x, y, a, tdp) 47 | pp = pomdp.partially_obs_transition_pdf(tdp, yp) 48 | b_sum += pp * bold[j] 49 | bnew[i] = probo * b_sum 50 | norm = sum(bnew) 51 | for i in xrange(self.length()): 52 | bnew[i] /= norm 53 | (self.bnew, self.bold) = (bnew, bold) 54 | return self 55 | 56 | def length(self): 57 | return self.n 58 | 59 | def empty(self): 60 | self.bold.fill(0.0) 61 | self.bnew.fill(0.0) 62 | 63 | def empty_old(self): 64 | self.bold.fill(0.0) 65 | 66 | def empty_new(self): 67 | self.bnew.fill(0.0) 68 | 69 | def old_belief(self): 70 | return self.bold 71 | 72 | def new_belief(self): 73 | return self.bnew 74 | 75 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/models/tools/distributions.py: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | # Implements distriubtions for POMDP models 3 | ################################################################# 4 | 5 | import numpy as np 6 | from copy import deepcopy 7 | 8 | class Categorical(): 9 | 10 | def __init__(self, n): 11 | self.indices = np.zeros(n, dtype=np.int64) 12 | self.weights = np.zeros(n) + 1.0/n 13 | self.n = n 14 | 15 | def __getitem__(self, idx): 16 | return (self.indices[idx], self.weights[idx]) 17 | 18 | def __setitem__(self, idx, val): 19 | self. 20 | 21 | def sample(self): 22 | idx = self.quantile(np.random.rand()) 23 | return self.indices[idx] 24 | 25 | 26 | def quantile(self, p): 27 | k = self.n 28 | pv = self.weights 29 | i = 1 30 | v = pv[1] 31 | while v < p and i < k: 32 | i += 1 33 | v += pv[i] 34 | return i 35 | 36 | -------------------------------------------------------------------------------- /chimp/simulators/pomdp/sim_loop.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | from models.tiger import TigerPOMDP 4 | from models.simulator import POMDPSimulator 5 | 6 | ##################################################################### 7 | # This is a sample simulation loop for the DRL framework using POMDPs 8 | ##################################################################### 9 | 10 | # initialize pomdp 11 | pomdp = TigerPOMDP(seed=1) 12 | 13 | # initialize and pass the pomdp into simulator 14 | sim = POMDPSimulator(pomdp) # state and initial belief automatically initialized 15 | 16 | sim.n_states # number of states-input layer size 17 | 18 | opt = pomdp.optimal_policy() 19 | 20 | steps = 50000 21 | 22 | rtot = 0.0 23 | 24 | for i in xrange(steps): 25 | # get the initial state 26 | s = sim.get_screenshot() 27 | # pick random action 28 | #ai = np.random.randint(sim.n_actions) 29 | # pick optimal aciton 30 | ai = opt(s) 31 | 32 | # progress simulation 33 | sim.act(ai) 34 | 35 | # get reward and next states 36 | r = sim.reward() # real valued reward 37 | sp = sim.get_screenshot() # pomdp state, this is a belief 38 | 39 | print "Step: ", i 40 | #print "Action ", ai, " Reward: ", r, " Screen Shot: ", sp 41 | #print "Current State: ", sim.current_state, " Current Belief: ", sim.current_belief.bnew, "\n" 42 | 43 | rtot += r 44 | 45 | # check if reached terminal state 46 | if sim.episode_over(): 47 | sim.reset_episode() 48 | 49 | print "Total reward: ", rtot 50 | -------------------------------------------------------------------------------- /chimp/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dummy File 3 | """ 4 | -------------------------------------------------------------------------------- /chimp/utils/distributions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ################################################################# 4 | # Implements helper functions 5 | ################################################################# 6 | 7 | def categorical(p, rng): 8 | """ 9 | Draws multinomial samples from distribution p 10 | """ 11 | return np.argmax(rng.multinomial(1,p)) 12 | 13 | def softmax(z): 14 | """ 15 | Computes softmax values for each Q-value in x 16 | """ 17 | # TODO: extend to multi-dimensional input? 18 | ex = np.exp(z - np.max(z)) 19 | return ex / np.sum(ex) 20 | -------------------------------------------------------------------------------- /chimp/utils/policies.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from chimp.utils.distributions import * 4 | 5 | ################################################################# 6 | # Implements DQN controllers 7 | ################################################################# 8 | 9 | 10 | class DQNPolicy(): 11 | """ 12 | Class that handles policies generated by the DQN 13 | """ 14 | 15 | def __init__(self, learner): 16 | self.learner = learner 17 | 18 | def action(self, obs): 19 | """ 20 | Returns the actions with the highes Q value given observation obs 21 | """ 22 | q_vals = self.learner.forward(obs) 23 | return np.argmax(q_vals) 24 | 25 | 26 | class StochasticDQNPolicy(): 27 | """ 28 | Generates actions stochastically according to Q-vals 29 | Network output is turned into probs using softmax 30 | """ 31 | 32 | def __init__(self, learner, seed=None): 33 | self.learner = learner 34 | self.rng = np.random.RandomState(seed) 35 | 36 | def action(self, obs): 37 | """ 38 | Returns the action according to probs generated by taking softmax over Qs 39 | """ 40 | q_vals = self.learner.forward(obs) 41 | q_probs = softmax(q_vals) 42 | return categorical(q_probs[0], self.rng) 43 | 44 | 45 | class EpsGreedyPolicy(): 46 | """ 47 | Epsilon greedy policy 48 | """ 49 | 50 | def __init__(self, policy, n_actions, eps, seed=None): 51 | self.polciy = policy 52 | self.n_actions = n_actions 53 | self.eps = eps 54 | self.rng = np.random.RandomState(seed) 55 | 56 | def action(self, obs): 57 | if self.rng.rand() < self.eps: 58 | return self.rng.randint(self.n_actions) 59 | else: 60 | return self.policy.action(obs) 61 | 62 | 63 | class RandomPolicy(): 64 | 65 | # constructor 66 | def __init__(self, n_actions, rng = np.random.RandomState()): 67 | self.rng = rng 68 | self.n_actions = n_actions 69 | 70 | def action(self, obs): 71 | return self.rng.randint(self.n_actions) 72 | 73 | 74 | class OneStepLookAhead(): 75 | 76 | # constructor 77 | def __init__(self, simulator, n_rollouts=100): 78 | self.simulator = simulator 79 | 80 | def action(self, obs): 81 | # run each action n_rollouts times, take the highest average 82 | pass 83 | 84 | 85 | class SingleAction(): 86 | """ 87 | Dummy single action policy 88 | """ 89 | 90 | def __init__(self, a): 91 | self.a = a 92 | 93 | def action(self, obs): 94 | return self.a 95 | -------------------------------------------------------------------------------- /examples/atari_tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# be sure to have run ' python setup.py ' from chimp director" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Training DeepMind's Atari DQN with Chimp" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "Load Chimp modules" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "from chimp.memories import ReplayMemoryHDF5\n", 37 | "\n", 38 | "from chimp.learners.dqn_learner import DQNLearner\n", 39 | "from chimp.learners.chainer_backend import ChainerBackend\n", 40 | "\n", 41 | "from chimp.simulators.atari import AtariSimulator\n", 42 | "\n", 43 | "from chimp.agents import DQNAgent" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "Load Python packages" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "%matplotlib inline \n", 62 | "import matplotlib.pyplot as plt\n", 63 | "\n", 64 | "import numpy as np\n", 65 | "import random\n", 66 | "import chainer\n", 67 | "import chainer.functions as F\n", 68 | "import chainer.links as L\n", 69 | "from chainer import Chain\n", 70 | "import os\n", 71 | "\n", 72 | "import pandas as ps" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Set training parameters" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 4, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "settings = {\n", 91 | "\n", 92 | " # agent settings\n", 93 | " 'batch_size' : 32,\n", 94 | " 'print_every' : 5000,\n", 95 | " 'save_dir' : './results_atari',\n", 96 | " 'iterations' : 5000000,\n", 97 | " 'eval_iterations' : 5000,\n", 98 | " 'eval_every' : 50000,\n", 99 | " 'save_every' : 50000,\n", 100 | " 'initial_exploration' : 50000,\n", 101 | " 'epsilon_decay' : 0.000005, # subtract from epsilon every step\n", 102 | " 'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions\n", 103 | " 'epsilon' : 1.0, # Initial exploratoin rate\n", 104 | " 'learn_freq' : 4,\n", 105 | " 'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r)\n", 106 | " 'model_dims' : (84,84),\n", 107 | " \n", 108 | " # Atari settings\n", 109 | " 'rom' : \"Breakout.bin\",\n", 110 | " 'rom_dir' : './roms',\n", 111 | " 'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square\n", 112 | " 'action_history' : True,\n", 113 | "\n", 114 | " # simulator settings\n", 115 | " 'viz' : True,\n", 116 | " 'viz_cropped' : False,\n", 117 | "\n", 118 | " # replay memory settings\n", 119 | " 'memory_size' : 1000000, # size of replay memory\n", 120 | " 'frame_skip' : 4, # number of frames to skip\n", 121 | "\n", 122 | " # learner settings\n", 123 | " 'learning_rate' : 0.00025, \n", 124 | " 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used\n", 125 | " 'discount' : 0.99, # discount rate for RL\n", 126 | " 'clip_err' : False, # value to clip loss gradients to\n", 127 | " 'clip_reward' : 1, # value to clip reward values to\n", 128 | " 'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations\n", 129 | " 'optim_name' : 'RMSprop', # currently supports \"RMSprop\", \"ADADELTA\", \"ADAM\" and \"SGD\"'\n", 130 | " 'gpu' : True,\n", 131 | " 'reward_rescale': False,\n", 132 | "\n", 133 | " # general\n", 134 | " 'seed_general' : 1723,\n", 135 | " 'seed_simulator' : 5632,\n", 136 | " 'seed_agent' : 9826,\n", 137 | " 'seed_memory' : 7563\n", 138 | "\n", 139 | " }" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "You may want to set a smaller number of iterations (like 100000) - for illustration purposes. We set the GPU option to True, turn it off if your machine does not support it. Be sure to have the requested rom in the indicated directory." 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 5, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "# set random seed\n", 158 | "np.random.seed(settings[\"seed_general\"])\n", 159 | "random.seed(settings[\"seed_general\"])" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Now we initialize the simulator first, as we need to use some information it provides - e.g., number of actions." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 6, 172 | "metadata": { 173 | "collapsed": false 174 | }, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Original screen width/height: 160/210\n", 181 | "Cropped screen width/height: 84/84\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "simulator = AtariSimulator(settings)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "Here we define the convolutional network, in a format required by Chainer - the deep learning library we use." 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 7, 199 | "metadata": { 200 | "collapsed": false 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "#Define the network\n", 205 | "class Convolution(Chain):\n", 206 | "\n", 207 | " def __init__(self):\n", 208 | " super(Convolution, self).__init__(\n", 209 | " l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),\n", 210 | " l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),\n", 211 | " l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),\n", 212 | " l4=F.Linear(3136, 512, wscale = np.sqrt(2)),\n", 213 | " l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)),\n", 214 | " )\n", 215 | "\n", 216 | " def __call__(self, ohist, ahist):\n", 217 | " if len(ohist.data.shape) < 4:\n", 218 | " ohist = F.reshape(ohist,(1,4,84,84))\n", 219 | " h1 = F.relu(self.l1(ohist/255.0))\n", 220 | " h2 = F.relu(self.l2(h1))\n", 221 | " h3 = F.relu(self.l3(h2))\n", 222 | " h4 = F.relu(self.l4(h3))\n", 223 | " output = self.l5(h4)\n", 224 | " return output\n", 225 | "\n", 226 | "net = Convolution()" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "We then initialize the learner + chainer backend, replay memory, and agent modules." 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 8, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "backend = ChainerBackend(settings)\n", 245 | "backend.set_net(net)\n", 246 | "learner = DQNLearner(settings, backend)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 9, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "memory = ReplayMemoryHDF5(settings)" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 10, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "agent = DQNAgent(learner, memory, simulator, settings)" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "Now let the agent train." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "collapsed": true 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "agent.train()" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "# Visualizing results" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "First, let's visualize the training and evaluation results." 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 11, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n", 312 | "train_stats.columns = ['Iteration','MSE Loss','Average Q-Value']" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 12, 318 | "metadata": { 319 | "collapsed": true 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None)\n", 324 | "eval_stats.columns = ['Iteration','Total Reward','Reward per Episode']" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 13, 330 | "metadata": { 331 | "collapsed": false 332 | }, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEPCAYAAACHuClZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnXmcHGWd/z/fyTGTyTAJ5CIhCZNwJJAAAUSOoGk8EFbQ\nCCJEFMKiyyrnqiisyrGrrC7+WGEFFEUCIuoKIoqC4UiLIGdCIDeEMBNCJoeZkExmckwy398f33pS\n1dVV1dV19DH9fb9e/Zqu6u6qp5+pfj71vZ6HmBmKoiiKEoe6cjdAURRFqX5UTBRFUZTYqJgoiqIo\nsVExURRFUWKjYqIoiqLERsVEURRFiU2qYkJEY4noaSJaQkSLiOhya//1RLSGiBZYj9PSbIeiKIqS\nLpRmnQkR7Q9gf2ZeSERNAOYD+CSAcwF0MvMtqZ1cURRFKRn90zw4M68DsM56vo2IlgE4wHqZ0jy3\noiiKUjpKFjMhohYA0wC8aO26jIgWEtHPiGhIqdqhKIqiJE9JxMRycT0I4Epm3gbgDgATmXkaxHJR\nd5eiKEoVk2rMBACIqD+ARwE8xsy3erx+IIA/MvORHq/pxGGKoigRYOaShhJKYZn8HMBSp5BYgXnD\nWQAW+32YmfXBjOuvv77sbaiUh/aF9oX2RfCjHKQagCei6QDOB7CIiF4FwAD+HcBniWgagF4ArQAu\nSbMdfYHW1tZyN6Fi0L6w0b6w0b4oL2lncz0HoJ/HS4+neV5FURSltGgFfJUwe/bscjehYtC+sNG+\nsNG+KC+pB+DjQERcye1TFEWpRIgI3AcD8EoCZLPZcjehYtC+sNG+sNG+KC8qJoqiKEps1M2lKIrS\nx1A3l6IoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+\nsNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGai\nKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoP\nttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEps\nNGaiKIrSx9CYiaIoilKVqJhUCeoPttG+sNG+sNG+KC8qJoqiKEpsNGaiKIrSx9CYiaIoilKVqJhU\nCeoPttG+sNG+sNG+KC8qJoqiKAmwdi1w883lbkX5SDVmQkRjAdwHYBSAXgA/ZebbiGhfAL8BcCCA\nVgCfYeYtHp/XmImiKFVBNgt8/evASy+VuyV9M2ayG8BXmHkKgBMBXEpEkwFcA+BJZp4E4GkA16bc\nDkVRlFTp6QF27ix3K8pHqmLCzOuYeaH1fBuAZQDGAvgkgHutt90LYGaa7egLqD/YRvvCRvvCptx9\nsWuXiklJIKIWANMAvABgFDOvB0RwAIwsVTsURVHSoKcH2LGj3K0oH/1LcRIiagLwIIArmXkbEbkD\nIb6BkdmzZ6OlpQUAMHToUEybNg2ZTAaAfSdSC9uZTKai2qPblbNtqJT2lGvb7CvX+V99NYutWwGg\n9OfPZrOYM2cOAOwdL0tN6kWLRNQfwKMAHmPmW619ywBkmHk9Ee0PYB4zH+bxWQ3AK4pSFfzyl8Dl\nlwMdHeVuSd8MwAPAzwEsNUJi8QcAs63nFwJ4pATtqGrcd6G1jPaFjfaFTbF9sXw58JOfJHd+jZmk\nCBFNB3A+gA8R0atEtICITgPwfQAfJaIVAD4M4HtptkNRFMXN/PnAHXckd7xaz+bSubkURalJfvxj\n4MorgW3bgAED4h/vRz8SN9fu3UC/fvGPF4e+6uZSFEWpODo7xTX15pvJHK+nR/7WqnVSUExI+BwR\nXWdtjyei96ffNMWJ+sZttC9stC9sTF/Mmwf8138Vfn9np/xdtCiZ8+/aJX9VTPy5A1K9Psva7gRw\ne2otUhSlaFpbgd/9rtytqAzmzwcWLiz8vs5OoLk5OTFRy6QwxzPzpQB2AAAzbwYwMNVWKXk4c+lr\nHe0LG9MXf/0rcPfd5W1LuTF90d5uWwlBbNsGnHBC8pZJrRYuhhGTHiLqB6uwkIhGQCZtVBSlQti0\nyXbb1Drt7eGsg85O4MQTgcWLkzmvWiaFuQ3AwwBGEtF3ATwL4KZUW6Xkob5xG+0LG9MXHR1yp13L\nmL4oRkyOPlren0Tf1XrMpOB0Ksz8SyKaD6kHIQAzmXlZ6i1TFCU0apnYtLcDI0YUfl9nJzB0KDB5\nMrBkCXD88fHOW+uWia+YENF+js0NAH7lfI2ZK2DSgNpB4wQ22hc2pi82bVLLxBkzaW4u/P7OTmCf\nfYCpUyVuEldM1DLxZz4kTkIAxgPYbD0fCmA1gAmpt05RlFB0dKhlAgDd3cDWreEG9G3bREyOOCKZ\nIHytWya+MRNmnsDMEwE8CeBMZh7OzMMAnAFgbqkaqAgaJ7DRvrAxfbFpE9DVBfTWcGpMNpvFunXy\nPEw2l7FMjjgimSB8rVsmYQLwJzDzn80GMz8G4KT0mqQo0fjgB2vX1bNpk/zt6ipvO8pNezswalT4\nALxaJskRRkzWEtG3iKjFenwTwNq0G6bkonECG7++ePllGUxqCdMXHR1AU1Ntu7oymQza24EDDyw8\noO/ZI/UgjY3AmDEyn9b69fHOv2sX0L+/ikkQswCMgKQHPwxZFXFW4CcUpcQwy+CwcWO5W1J6du6U\ngWz06NoWEwB7xaSQm2vbNmDwYIBIHklYJz09Yulo0aIPzNzBzFcC+CCADzDzlZrJVXo0TmDj1Rdm\n8Kg1Mclms9i0CdhvPxnIatXNB0hftLcDLS2FrQMTfDdMnRo/brJrl1iHapn4QERHENGrABYDWEJE\n84loavpNU5TwmLvBDRvK245y0NEBDBumbi4Aod1cJl5iSNIyUTHx5ycAvsLMBzLzgQC+CuCudJul\nuNGYiY1XXxgxSdIyufji+H70tMlkMti0ScRkn31qW0ycMZNdu8T16UcaYrJrl4pJIQYz8zyzwcxZ\nAINTa5GiRCANMfnLX4DXXkvueGmhbi6b9nbggAMkEG6yq7zo7BRLzjB1KrB0abzUarVMCrOKiL7t\nyOb6FoBVaTdMyUVjJjZefZGGmHR2AitXJne8NMhms+rmsjAxk9Gjgfr64EHdbZkMGSJ/46RWa8yk\nMP8Myeb6nfUYbu1TFADA3LnAXWV2fCYtJsxyl//WW8kcL03UzSXs3g28957MyzVwYHBGl1tMAEkT\n7u6Ofn61TArAzJuZ+QpmPgbAcQCus9Y0UUpIJcdM/v534LnnSnc+v5jJgAHJiUl3t7g8Kt0yMTGT\nqG6uww+XAH5f4LDDMhg+XNZfL2SZuLO5AGDQIGD79ujnV8ukAET0ABE1E9FgAIsALCWiq9NvmlIt\nrF1b/jviHTuAsWOTy+Yyg3I1WCZR3VzbtwPLlgFvvJFe20qJcXEBxbu5ALVM4hLGzXU4M28FMBPA\nY5AJHj+faquUPCo5ZtLeXlox8YuZjBsnlklQFk9YOjuBkSOBVasqe74rU2cSxc1lZguoBsEMw9y5\nWey/vzwP4+ZyBuCB5CwTLVr0ZwARDYCIyR+YuQfWqouKAlSOZTJ0qAwiSbSls1PucpubK3+Klqhu\nrrXWpEh9RUw2bYpnmQwapJZJHMLWmbRC0oGfIaIDAWxNs1FKPpUcMym1mPjFTBoaJPiaRNzEDDYH\nHVTZg20mk4ns5mpvl6lEKj0uFJbm5kxsN5fGTKITJgB/GzMfwMz/xEIbgFNK0DYlQc46C3un506S\n3bslTlEJlkktigkQPZtr7Vop1qv07xcWZ8ykkJvLLwCvlkl0fMWEiD5n/f2K+wHgipK1UAEQP2by\nt78Ba9Yk0xYnGzZIFlUlxEzSEJODDy79nfvddwNtbeHeO29e9Lm52tuBD3yg74jJokXZslsmKibe\nmCr3fXweSpXADGzeDGzZkvyx29tlwO3sTCbwHRUjJiNHJpPRZe5cS22Z9PYC114LPPtsuPeblOiG\nhmhurmOOkZUJy21ZJkESMZOoYsIsVnotu7l8l+1l5p9Yf28sXXNqF3MxDhjg/XqcmElnp6zfkIaY\nrF0rs7S++aYMbIMGxT9mlL4wYtLQUN2Wyfz50v6wVuThh2cwbJg8j+LmOuAAYOJEyVo76qji21tJ\ndHVlQru5vLK54qQG9/TI9VpIxPoyYepMJhLRH4loIxFtIKJHiGhiKRpXSzz5JHDOOekc2xSlpSUm\nY8YkW339pz8Bny8y+Xz79uTdXE1NpbdMHn9cMsjCiolxcQHR3FyjR1dHXKgQvb0yKadJDS61ZaJi\nEi6b6wEA/wdgNIAxAH4L4FdpNqoWWb48eACJEzOJKiZ/+ANwzz3B7zEDUpJismEDsGSJ/+uljJkM\nGyYDVamqxB97DPjsZ8OLybx52b2WyeDBIiZh3Y3t7XIjUI64UNJ0dAD19VnU18t2lAr4OJbJrl1i\nDamYBNPIzL9g5t3W434ADWk3rNZobU1vwIoqJi+/LIISRBqWSWenuF2KicGkJSZEpRtsOzpkgaZZ\ns8KLyZYt2Csm/fvLYBZmQNyxQwbUYcP6hmXS3m73AxBtbq6kLBMtWvTnMSK6xpox+EAi+jqAPxPR\nfkS0X9oNrBVaW8Vl4UecmElUMdmyJdhCAERMkrZMOjtlQPRbS6QUdSbOO9dSDbZPPAF88IMiXmHF\nZP/9M3vdXEB4V9e6deISIpLvl7ZYMqfbh5IIktm7HWQhONd/d1KtlsnOnUAlTJDhG4B38Bnr7yWu\n/edBKuE1fpIAbW2SVWPucJKko0OOGUVM3nrLHqi9MK6SpMUEEOvE+MALkXQ2l/POtVRi8thjwOmn\nA6NGyY2FGaCCMAWLBpPRNWpU8OecNRkHH5z+93vzTeDYY0XEBqewGpLz+wDBg7pz/Xcn1Roz2bgR\n+Nzn0kn9L4YwRYsTAh4qJAnR2ioXo5+rK27MZPz4aGLS2wusWOH/nrTcXID/ABcmZhI3TdmZ7VMK\nN1dvrwTfTz9dZr3df/9w07gsXJjNEZOw/wdjUQJybbS3pzsIbtsmjwcfLPzeKDcD7e1AT09273aQ\nm8vLxQXEK1o0wt/QUHox2b49mSzKuAQVLX7d8fwc12s3pdmoWmPbNrmIJ04MdnVFZfNmYMKEaGIy\nbJi/q2v3bmnvyJHJi8n48WKZhMWISWMjUFcXb5Ej04ZSWiavvSZZXBOt27OxY8PdaW7dikhuLmNR\nAnITM3as3NAYVq0Cvv/90M0vSHe3iOTPfx78vrfflmu12PihO2YSZCH4iUmcosVyWiYVLyYQN5bh\nWtdrp4U5OBHdTUTrieh1x77riWgNES2wHqGO1Zdpa5N1q4cP9xeTuDGTqGJy0kmynKkXGzbID7h/\n/+TF5Kij/AfwoJgJkEzcxDnglMIyMS4uQ1gxGTgw4+nmKoTTMgHyXV133imV+EmxfTswfbpkLb75\npv/7VqwQ4Sn23FLNn9m7XcjNlZZl0r+/WJl79kQ7ThSqQUzI57nXth/3APiYx/5bmPkY6/F4yGP1\nWVpbpfBv2LB0LJOODjl+sWLy3nsiJn6WiXNAam5OTky2bRMxiWKZAMmLyZgx0hdxrZ0gHn8cOM1x\nWxVWTMy8XIawou6OMTitr927gfvvB955J7lZDbq7ZVbnz30OmDPH/31vvgkcdxxw++3FDcitrWLN\nGqK4uZKwTIhKb51Ug5iwz3Ovbe8DMD8LwGtVxrBiVBMYyyRITOLGTNKwTJyukqQtk2nTosVMgGTE\nxHn3Wlcn/VeMuBVDdzewYAHgNLjCisk772Rju7mA3IyuuXPl5qOxEfjHP/I/+9OfBguCF93dMuBd\ndBFw773+QrFyJXDeeSJ0f/xjuGMzi0WzcWN2775Cbi539TsQLwDvTJZQMcnnKCLaSkSdAI60npvt\nI2Ke9zIiWkhEPyOiITGPVfVUomXCLP74970PWL3aO3feaZkkLSaTJok1ENbt4BSTuBldZv13Z9ZR\nmq6ud9+V7CvngFBMzCRpN9ecOcDs2XKnv3p1/mefegq47jq5Gw/L9u0iTlOniog98YT3+958Ezjk\nEOCKK4Dbbgt37HXrZCAf4hhJosZM4k6nUujcaVDxYsLM/Zi5mZn3Yeb+1nOzHSd59Q4AE5l5GoB1\nAG6Jcaw+QWtrYcskbsxk7Fi54IMKuZx0d8uPo7FRgsJeGV0mkwtIXkyam0UAvayBtGMm3d0yIPR3\nJM6nWYuxfn1+Kq+XmPT2AjfdZK/82NsLdHdnsO++9nuKcXN5WSYdHWKZnHuuiMk77+R/9u23pb9/\n85tw3w+QPjV1Hf/8z/6B+JUrRdjOPlviK4sWFT72ihXA5Mm510XUbK6kLJNSFi4aq6/chKkzSRRm\ndv7Mfwog0JidPXs2WlpaAABDhw7FtGnT9l40xt1R7dttbRm0tADz52exbBkAJHv8jg4J0jY2ZvHn\nPwMzZxb+/JYtQENDFtmsTCa4dCmweXPu++fPz+LQQ6W9++wDvP22vD9uezs75XhDh2bxyCPA1KmF\nP79jh6TJrl8PjBiRwcaN0c8/ebKc3/n6pEnAH/+YxXHHJf//37Qpg1Gjcl8fOxZYuTK3P++/P4tv\nfhOYPj2DGTOARx/NoqEB6N/fPt6GDcC++waf76ST5P+7eHEWdXXyukz2mMUNNwCnn57B0KFAXV0W\nTz2Vf728/XYG3/secP31WRxwAHDKKfbrK1cCF1+cAVHu+bu7xQ2VzQLnnZfBNdcAjzySxZAhdvue\neiqL1lZg4sQMBg4EPvaxLK69Fnj00eDvs3y5/H+cr9fXA6tXe1+P27bl/38BuX42bwai/P56eoAt\nW+R89fUZq5Aw/OfjbG/fnsF772Uxe/YcANg7XpYcZk71AaAFwCLH9v6O5/8G4IGAz3ItMHIk89q1\nzA89xDxzpvd75s2bF+nY3d3M9fXMvb3MEyYwv/lmuM8tXco8aZI8v/565m9+M/89H/848yOPyPOn\nnmKeMSNSE3Po7WWuq2PetYv58suZb7kl/z1efTFqlPQhM/PPf8584YXR2/DGG8wHHZS775lnmE84\nofhj7dlT+D233858ySW5+3btYh4wgLmnx953//2yb/Zsu52jR8/L+dyPfsT8pS8Fn6+tjfmAA/L3\njxnDPG4c8+OPy/b3v8/81a/mvqezk7mhQb7XlCnMc+farz30EDMR8+LF+ce+4Qbm666zt08/nfnh\nh3Pfs3Il84EH2tvr1jEPHcq8aVPw97nqKuabb869Ln77W+azzvJ+/7e/zXzjjfn7OzqYhwwJPpcf\nDz5on2/KFObXXot2nCjceivzZZfl7rPGztTHd+cjzHQqkSGiBwD8HcChRLSaiC4C8N9E9DoRLQQw\nwxKUmmX7dolljBqVTsxk82apQyASn3LYuMmWLZJ9AwCHH+4dhE8jAG/W5xgwQFwvYYPeSbq5vFJH\nTR8Uk920ZAkwZUrhz5ipTZwMGCCp4s7VMV95BbjsMuDhh6WNHR3iDnQS5v/gdnEZDjpIAuMf+Yhs\ne7m5jEu2rg742teAm2+W/c8/D1xyiXwPrwQAp5sLkGr4+fNz32PiJYZRo4AjjwQWLgz+PsuXS4zN\nSRQ3V1Ixk1IXLlZ8zAQAiKgfEc2LenBm/iwzj2HmemYez8z3MPMFzHwkM09j5pnM7DMDU23Q1iY/\n2rq6dGImHR3Y61MvRkzee88OaE6Z4p0enEYA3vlDnzjRO6MrTMwkTgDea7AZNkx+sO++G/44P/uZ\nDHSFBNErZgLkx01eeQU44wxZHfGhh+RaaWnJ5HymqalwNpc7LdgwaRJwwQVSXAgA48blB+BbWyWz\nDZDZjZcular2s84C7rtPjuE1ILv9+n5icvDBufsGDy4cx/CKmUTJ5ho4UNKid+8OPp8Xms1VQEyY\neQ+AXs24Sg9zpwekY5l0dNgV0sVaJkZMDjlEBhXnD2T3bkkbNYNgGmLiZZl43eUzS9vM9ONxLRO/\nwcZPVL3YuVNqNd73PuCFF4LfG0ZM9uyRO/RjjpFMqzlz8mtMgHD/B3cml+EHPwCuv97e9srmMhXq\ngAyeV1wh6/DceKMUXTY2etfjmGwugxET5/9z5cpcywQoHBTfvl2+j2mTIUo2F1H0WhPN5go3a/A2\nAIusavbbzCPthtUKbW2StQTIwNDR4T1gmmBbsSQhJgMHyo/VmdG1YYO4YUzGUxpiMmGCiK2zJuEz\nnwFuvDGb85mdO6WNddbVPHJkfDHxGmyKEZPf/14KL887LxkxWbFCXEhDhwJnnilT1c+fD2zfns35\nTBw315AhuRN6jh4tNwzOFGCnmADApZeK2+1f/kW2Bw/2FhO3m2vsWPnrtLy8LJNBg4Izo1auFAu2\nf//c30iQm8uvAt6cL4qYqGUSTkx+B+DbAJ4BMN/xUBLA1JgA9hTWSa7HnYSYAPkDqfvutqlJBgyT\nthoV50A+aJAI7Nq1sr1qlbh33PM2uWc1HjzYpM3Gb4MTv9iRF3ffDXzhC8AJJyQjJq+8IlYOINfJ\nrFmSXuuOmYRxc/lZJm7695d2OV17bjEZPBiYOTN3209MnAMeUb6ry8syaWgIHty94iVANMsEiD6l\nilom4WYNvhey0uILzHyveaTftNrA6eYC/F1dcWImSYmJcyB11pgAYhU0Nha3bKwX7h+6M25y++3y\ngx0zJpPzGbeYEMVzdcW1TN5+WyraZ84Ut9TSpf4DInPucrNO3GJy7LH2a7NnSzuPPTaT85k4lokX\nbleXW0zcDB7sPRi73VxArpjs3i3ncR+7kKVg4iVAcTETPzGJ6uYqt2Xi7ttyEGYN+DMBLATwuLU9\njYgKrL+nhMXp5gJk4E8ybmKyuYB4YnL44bkDqVcQNwlXl5eYrFolIjVnjszt5BYsr/VWgsTkhhuC\n7z793CBhM7ruuQc4/3xp06BB8rkFC/zPBXjHaJxiMn++bZkAwNFHA0ccIa5GJ2HFJIxlAkgQ3mR0\nMRcWE7+YidvNBeSKSVubCKr7/1jIzeVnmRTK5vLqb3O+JGImpSxarBrLBMANAN4P4D0AYOaF0AWx\nEsPp5gL8LZM4MZMo2VxuMZk6FXjxRdvl4bZMgGTExD2QmwkI778fmDFDrINly7I5n/ETE6+Mrp4e\n4LvfBd54w78NfoNNmIyuPXtETC6+2N4X5Oryc3EBtpjs3i1T1B9zjP0aEfDII1LY6cS4uYIEL6yb\nC8i1TDZbs+w5K+7dhI2ZACImr7wibXWnBRsKubmclonzNxLHMoni5iq3ZVItYtLDzO4hKKZnXAHk\ngtu0KfeHnXRGl9vNtXVruM8560wA4LDDgC99SQa0Bx/0dpUkZZk4B3Lj5rrtNuDyy+U19+BSjGWy\nerUMzkHzXgUNNoVcXXPnyv/zyCPtfVHFZMwYGfiXLAEOOCA/PjJhQv4gYhIR/Aaz3btFFEaO9P8O\nTpxiYqwS9wqFTvzExGvAGztWjrVmjT2NipsgS8FM8Jh0zESzuaIRRkyWENFnAfQjokOI6H8hhYhK\nTFavlh+UyesHKidm4qwzAeRH/61vyUyu114LPPBAadxcBx0EPPqo9FEmI2LS3JzJ+YyXmHjVSAD2\n/FpRxcTt7nMzd67UXDiJKiYNDSLojz2W6+Jy4nVdBP0f1q8X15jzmgvC6eYq5OIC/GMmXpaJMwjv\nZ5kEubna2+V1Yyk5+8LPzbVnjwz0fjGGJCwTLVr053IAUwDsBPArAFsBXJVmo/oSu3b5++7dwXcg\nfcskqpvL8P73A6++CnzlK8Dxx+e+5jWIrV9fXBGYV8ykq0vqGYi8s5W8xGTyZPGnuzFi4jWBoV8b\nnLgTEdy88oqsx+Fk4kRpo5eABYkJIDcbv/+9v5h4EZTRtXp1eBcX4G2ZBFFMzATIFRMvyyTIzeUX\nLwH8rYNt26R//KwrtUyiEyabq5uZvwngwwBOYeZvMnMJw0vVze9+B3zxi96vuYPvQDoxkyTFBJAf\n43/8R/4g6CUm558P/OUv4dvrHshHjpTMpfPPt8+9Zk025zPFisnhh6fj5nIWFjohEuvkxRfzP+M1\nlYqTsWPlc85MLide14X7/9DVJZbkpz4FfOxjwCc+4X8+N04xcVa/+1GMmwuwxcQrLRgIHtyd8RIg\nXMwkKPgOaMwkDmGyuY4jokUAXocUL75GRD6XtuLmnXfkjs4Ld/AdqHzLJAgvMWlrE3dEWNwDOZEE\ntM1dbdiYyaRJEmR3B6JXrgROOSVYTIKK2oIyut54Q8TPK0Dt5+oKY5kQSfZWWNz/h3/9V+AnP5FU\n5dWrc6vcC7HvvnLXvXVreDeXW0yY/adJP/ZY4KWXvNOCgWA3V5Bl0q+fnNe9CFfQjYI5n1om0Qjj\n5robwJeZuYWZWwBcClmOt2r5xz/kx10K2tu9ffeAv5vLXZQHRIuZ7N4tP2wTuA0rJmZhLHfAtxDu\nQYxZMp+KmSer0I+9qQmoq8vk7PMSkyFD5L3uzKuVKyX2Usgy8bt7Dcromj/f34KIIyaTJ/v3idd1\n4XZzPfsscNddwIUX5iZVhIHInvDx7bfzb37ceMVMdu2SAkjn+jAGEzP0SgsGgt1cbsvE2Rd+y+cW\nur7UMolOGDHZw8x/MxssS/FGmAqtcli/Xu6GSvEPX7tWgtleWVReboMkLRMTRDfTjAweLN+50Ap5\nXV3ygzB3WmFxi8nmzXKhry9iKs8gqwAIHzMB8l1de/bIgPjBDwavb15owPFzdQWJyXHHSazJ3feF\nxOSoo2TOq2Jw/h82bpSbEy8XUljGjxcL08uSduMVMwka7EwQ3iteAgRbCkGWCeA9qBe6vtQyiU4Y\nMfkrEf2EiDJENIOI7gCQJaJjiOiYgp+uQLZulYGkrS39cxkXj9+KdWnGTJwuLkB+uM3NhdOD3WnB\nYXGLibl7L9YyCfJpNzXZi3QZworJmjWSyTRypNxFmroJrzYEDTh+GV3OKU/cNDfLjcPrr+fuLyQm\nH/848P/+n//rhWImRuDqwvzSfRg3Dnj5ZTlu0P8G8HZz+QXfDccd5y8Kfm6uHTsk3uT8/bj7wiuj\nK4ybKwns5HRkAAAgAElEQVTLpFRFi8yVIyZhVlo8yvrr9rQeDYABfCjRFpUA80N7+21YKwWmR3u7\nuLJWr5Y7WsPOneJuO+CA3PcnaZm4xQSwXV3u2WadRImXAPlismaN/MCSdHN5TUnuJyaTJuVOTums\nZRg3Ttrn7h9mGQyDBs0pU0Q4nPgF350cd1z+tCiFAvBRcFpvL7+cn11WLOPHA9ls4XgJEE1Mrr7a\nP+PPz81lrG4v15khqpurGEvaUC7LZOdOOW+cm4WkCJPNdUrAo+qEBLAHvNbW9M/V3i4ptO64SVtb\nfo0JID+Q7u58d0iUmEmQmAThrjEJi5dlcsQRxf04C/3YBw4EiDI5d5xhLROnmHitsQ5I3zc0BNdh\nTJkis/Y6CQq+G445JndiQ1OpXuhuP4hCdSZJiMm4cbL4VVgxcd/ZF7pzbmryt4T93E5eAuXuCz8x\nCervSpo1+L33JAU/iEqxSoBwbq4+h9MySfs8vb0y+AQtMuSESAYkryB8sUQVk6Qsk3fflQE0ScvE\nq9Zkxw7vH1QhMfFyPRYabACxLN56K/f6CYqXOD/nFBPj4gqqKI+C8/8Q5HoLy/jx0sdhxMQrZlLI\nMgnCz80V5phR3FxRA/BprLS4ejXw298Gv0fFpMxs3SqDbNpiYibUM24uJ0GZMV6urigxE+ckj4ZS\nismaNcC0adIOd4qmF8x2UVkQ/ftn88TEyzIZP1760bw3jGVSaLABZMC56CKZxdgQRkyOOgpYtswe\naArFS8LgdV0YsX33XRnk3BmDxTJ+vPwNIyYNDXJO5/87jpj4ubm6usQKcuLui1IG4NOwTLq6CrdF\nxaTMdHbKD7tUYuK1Yl1QAVhQ3KS3N7zV4pzk0VBqy+TAA8WFESYO1N0tP8QgPzggP54wYlJXJ1lM\nZlLHpMQEAL78ZZnF2LQjjAXQ2CjTw5jgfRJi4oX5PxgXV1zLxyxkFUZMzGqFTuskzoBXjJvLTSlT\ng9OImfQZMSGis4IepWxk0nR2ykR8pRCTMWP8lz8txjIx/uBnn5W7/TAXa7ndXGvWSILByJHh4iaF\n7hoNo0ZlcgYrPzEBbFcXs7imDjpI9psAvJuwYtLSImux339/uOC7wRk3SSL4HhQz8ZraJQqDBsl1\n7Je+68YdN4nr5ooaM4mazVVJlkl3d/AM0FUhJgDOtB4XQwoXz7cePwPwz+k3LT06O2VQ6eqKv5hT\nEGaq77Fj5bnT9A+qJg6yTNatE1//PSHKRsOIybp1wCWX5L4nydTgAw6Qu+8wcZMw8QrAO2ZSSEza\n2yU91wwkQTGTMGICyHxht90mGWOFgu8GZ9wkLcvE9M/LL8ePlxhefz2cZQLkWyZxxKS+XgZq9wqe\nXm4ur88WG4CvNMsECD5WVYgJM1/EzBcBGADgcGY+m5nPhkz6WGQ5W2WxdasMqi0t6Vonxs1VXy+D\n+rp19mtBBWBBMZN//EPubm+6qfAFG0ZMnnsOuPfeXKFLwjLZvl1+DKauI6yYhBnId+wIFzMBbDFx\nT3Fu3Fzuu74wMRtDJiNZXzffXDheYkhaTPzqTLZuTc4yAYJTyd2404PjDHhE8r91B+G9BCpMzKTa\nLBMguD1VISYOxjGzc3al9QDGp9SekmAuqAkTSiMmQK6rq6tLfux+Lo4gy2TTJuC002SxKqd10tsL\nfP3r4sc3hBGTBQvkwncWcEZNDR40SOoFenrEKhkzRgaDsG6usGISNmYC2LUmbjFpbpaYitvlV4xl\nQiRrrMyZE15Mpk2TmMmuXenGTJYskcE26RqWMLjFJI5lAngP8JWczZVU0WJfFJOniOgvRDSbiGYD\n+BOAJ9NtVrqYC6oUlolZQMopJm1tEpj2KzTyWrrX+IM3bRKxuf562zrp7QUuuwx44gkRFBOgDysm\nAwfmptBGtUxM2m5np4iJCdwW4+YKM5BPnJgJLSaHHirTm69Yke/z94qbFCMmgCwjvO++4S2AwYPl\nJmbJkmTExG9urs7O5KySYkkyZgKEt0zC1pkEzTmnlkl0whQtXgbgx5BK+KMA3MXMl6fdsDQplWXi\nXB7VvS5E0BxHhSyTYcOkEHLqVODuu0VIFi4E/vpX4Oyzgf/8T3nv5s3B2VzM4nL5+MdzK8Wjiglg\nu7pM8B1I3s1VTMykqUlcbU8/nS8mXnGTYsWksVHiCaecEv4zJgifRvU7YLc/qXhJsSSZzQV4D/Bx\nYiZpTKeSRszEXON9QkyIqB8RzWPmh5n536zHw6VqXFqYGXEnTEi3Ct7PzVVoXYhCMZPhw2Xf9dcD\nV14pQvL44/KdbrwR+MUvJB22kJisXSt/P/zhZCwTwBYTp2WStJtr06bwMRNA4iavvOItJnEtE3Oc\nYtJvjz1WLMI0YyZAeS2Tcri5kpibq7ExvmWSVNFin7JMmHkPgF4iiji0VCalsEy2b5dBzgzmSVsm\ngFgnc+bYQgLIwH311bJee0ODfYEbnGKyYIHcJbsrxZO2TMK6ucKmBhcTMwHsSQRNWrDBS0zCtiEO\nxx4LPPOMJD2kcS6T8FEuyyRpMQnr5nITxTKpr88vugxDmtlchcQkTt8mSZiYyTbIolh3E9Ft5pF2\nw9LELSbOjJ7f/ha49db45zBWibljjWuZuGMmhvPPz/cBX3klsGpVfrwE8BcTt5srSmow4G+ZJOnm\nOvLI8DETQL7fsGH5VppzfXNnG+LMlRWGadNkga0kplLxipkQyfcKk6qcBu6YSancXIViJrt3yyPo\nWiGKFjdxWib9+0scs1hBctOnLBOL3wH4NoBnAMx3PKqS3l57Vth995UguLOi/M47ZZAthp07gY9+\nNHfmU2e8BCjeMuno8C5WcouJFw0Nkq7qNY2Gl5iMGSN9snmznLNQkDIIp5g4Yybr1wcXXwHF1ZmE\nLVoEpED18MPz9yfl5iqWffaRxIA0MrkM5bxbTbLOBEgum8tcX4UEPIqYOC0Tv4W5iqWrS8anoBhO\nVYkJM9/r9ShF49Kgq0s638wK63R1rVsnQWz30rOFWL0aePLJ3JlknfESQOIc3d3iRilkmdTXy8O5\n7kg2m0VPj3w+jNXw6U8DTz2Vv7+pSQbf3bttMSGyU2i3bcvtn2LxcnOZH7DX2uBOwg7kbW3FxUxO\nPhmYOzd/f7nEBBBXVxLB9yhztqVNudxchepMtm4N978tNj3YWCDO30xSYjJsWB+yTIjoECJ6kIiW\nEtEq8yhF49LAPVg4xeShh+RusdDiUW7M5597zt7nFhOz/OmSJXKRmSC6H6NG5QetzVxbYdcu8Fop\nkUi+/1tvSV8YUTNiErXGxLDPPnKMDRtyv38YV1cadSaAXfjmppxictJJ8SdgrFSSLFoEksvmKub6\nKsYycVolfueOgin6DWpLd3cViQlkvfc7IUv1ngLgPgD3p9moNAkSk//7P5kNtljLpLVVLmynmKxd\na9eYGMaPF8unpaWwqT16tL1KIyD+4DAurjAMGQLMm2dbJYAdhI8TfAekb1eulHY6f2BJislJJxUX\nM/FjyBBxezrrbkolJl/6EnDLLfGPE2Wdm7RJus4krJur0NxcYf+3xVomzniJIYnCxTBiUlWWCYBB\nzPwUAGLmNma+AcDH021WeviJydq1wKJF4h4q1jJpbQXOOivYMgFETJ55pvA62kC+mADh4iVhcIqJ\nIUkxWb7cDr4bvCwtN1HqTJjlx1xfX3xbifILF4uZTiUOdXWFZ0euVpKOmSSVzdUXLZNqE5OdRFQH\n4E0iuoyIPgWgBD+3dDA1JgYjJg8+CHziE/LPi2KZfPSjcoGbgclPTJ59NtyEeW4xyWaziYpJNpsr\nJsbNlYSYLFuWvxxxGMskbFru4sV2zGTnTrP6YrT2ul1dpbJMkqIaYiZpuLm6u4tfz6TUlklYMVm7\n1js5pasLGDGib4nJlQAaAVwB4FgAnwNwYZqNShM/y+Q3vwHOPTd/5tswmBmATzoJ+PvfZZ+fmGzZ\nEt0ycRYsxmHIEBnYnWJyyCGSTrxpU3wxWbs23zJJK2YS1cVlcFbBh1n/XSlMKYoWu7qiZXOVyjIJ\nW7jILPVAzjovQ18Ukw5m3sbMa6yZhM9m5hdSb1lKuC+olhYZRJcvl0pw40IplMbqxGRnTZ9uu7r8\nYiZANMsk6ZjJ4MEiIAazZsXChdFrTAC7b92WSVg3V5iB/NRTM4mJyWmnAd/9rvwPu7oKr/9eadRC\nzCSpubmKEZNSWSZtbfI7dy9419srQrHffn1LTH5ORG8R0a+J6FIiOiLswa1Cx/VE9Lpj375ENJeI\nVlgTSJa0ut59QTU2ygA9c6ZcEP37y8VbKI3VsH271GeMHm2Lyc6dch73wG/EpNwxk+ZmKZxzD5qT\nJgEvvhjfMgGiubmKdUP09sYXk3PPBb7yFeBDH5JMu2pycVUqpZibK2ydSVQ3V6liJs8/L3/ds1eb\nLK1CLreqEhNmngHgMAD/C2AogD8RUciFY3EPgI+59l0D4ElmngTgaQDXhm9ufNwxEwA44gjg85+3\nt816EGFoa5Mgbl2d1A4sWyZpt6NG5afwmjmcwlgmY8akFzPZd1/g6KPz90+eLBMQJiEmxbq5inEx\n/e1v2b13j3HFBJBp5K+8Evinf6o+F1elx0z27ImeIGFwi0lPj9xIuK0Br5hJKdxccSyTFywfj3u8\nManPhdpSSWJSMJ+EiE4G8AHrMRTAowD+FubgzPwsEbmz6T8JYIb1/F4AWYjAlASvC2ru3NwAbnNz\n+LiJswCxoUHWlv/97/PjJYD80+fPDzfNxejR9kSMhqRiJpdd5r1/0iQZnNOwTAq5uYp1MZkq+CTE\nBBAxYZZ5zpR4OMXEzB0VZ9oYt5vLWCWFjunl5nJfl14UG4CPa5lMnpxvmVSjmIRxc2UBzARwF4AM\nM3+ZmX8V45wjmXk9ADDzOgAjYxyraLzExH1RFhOEd6+YOH26ZIa54yUGL4vAi/32kwvaXEhJxkxG\nj/YWu8mT5W853FzFZFFlMpm9sa2kxAQArroKeOyxZI5VKio9ZpLEYOceUP1cXF51JqVIDY5qmWzf\nLq7VD32ob1gmYcRkOID/AHAigMeJ6Eki+s8E21BEqDs+YS6oYtxc7nm2pk8HXn3Ve7AuBiKZbsO5\n1G9SYuJHEmIybJjMg+V2Fw0bJpXxzvnLnBPhFTtbbxpiAsSfeFHJjZnEDb4D+QNqmOp3ILqbKynL\npFDR4oIFwGGHye+8L1gmBd1czPyeNX3KOABjAZyEeGvAryeiUcy8noj2BxAYlp09ezZarNF66NCh\nmDZt2t47EOMjLWb7rbeA008Pfn9zcwadneGO99JLwL/8i70tWWAZjB4drX3O7cGDs/jTn4DLLssg\nm82ivV1qQaZMif79g7aXLcuisREYMiTe8ZYsyX+9Xz+gqSmLP/wBOOusDJYvB048MYv77gPOPDNj\nWYJZZLOFjw8ATU0ZPPNMFtu3Aw0N6fRHNWwvXLgQV111VcW0BwBmzMigtxd48sks3n0XaGyMd7yG\nhgx27LC399svg8bG/Pf/8Ic/zBkfXn89a82+LdttbVmsWmVv+51v0KAMNmwI376engwGDsx9vb5e\nzh90Pd9/fxbjxsl4s3Jl7utdXcCuXVksXw5s3+5/fgnUy/gwx1qzuyVMhk8aMHPgA8AqAH8G8O8A\nTgYwsNBnXJ9vAbDIsf19AN+wnn8DwPcCPstJc+qpzI89Fvyez36W+b77wh3v/e9n/vvfc/cdcgjz\nXXdFa5+TmTOZH3xQnj/11Dzu14955874xw3iqquY165N59hTpzK/9po8P+ss5qYm5rvvlu1slvkD\nHwh3nHnz5vGppzI//jjzww8zf/KT6bS3Gpg3b165m+BJczPz5s3M8+czH310vGM9/TTzjBn29vPP\ny+/Ojbsvli5lPvRQe/v44+Wzhfjxj5m/+MXw7fvTn5hPPz1336WXMt96a/DnPv1p5vvvZ77nHuYL\nLsh97ZFHmM84g3nBAuajjvL+/K5dzP36eb9mjZ2hx+kkHmHcXAcz8z8x803M/Cwz7yr8EYGIHgDw\ndwCHEtFqIroIwPcAfJSIVgD4sLVdMsKYusUE4L2mk7/6apmpNi7O9OCjj85g8OB832zS/M//xHfR\n+WHiJi++CLz0EnD77cCvrOhbsTGTwYPTcXNVG5UYMwHsuEkabi6v6nfAu87E6eYKO2twEjGTMEWL\nzz8PnHBC7rIQhjBurkpycQEh3FwADiaiOwGMYuapRHQkgE8w83cKfZCZP+vz0keKaWSShI2ZhBGT\nri55n3sq8S9+MXr7nDjFJO14SSkwGV3f+Y4sOXzOOcAVV0hcqNhpTJzFpbUsJpWKiZskJSbO+EOY\n6negdNOpRMnmWrNGXp84UZJ4vALwTU3VJSZhLJOfQmpBegCAmV8HcF6ajUqTsJZJmAB8W5tMI55W\n0NYpJnPnZqteTEaOBO67TwRl9mz5IZx5pmS/FSMm2Ww2tQB8teGMI1USJj04iQGvoSFcNpe7LwZW\ncDbX888DJ54oY0chy8RP2KpRTBqZ+SXXvt2e76wCvIoW3YS1TAqtmBgXp5hs3Vr9lsnIkVLT893v\n2jPmzpolrq6olkmti0mlYsSklG4uN043F3P4jMFSVMC/8IK4uAAZj9xism1b9bm5wojJP4joIFgp\nvET0aQDtwR+pTMyStEmlBhdaMTEuTjEZMyaTSMFiOWlpkbuxT33K3veRj0iGWjFTmZg6kySLFquV\nWomZhHFzecVMzIC+fbsM+GGm/S/F3FzGMgHEMilUZ+I1V2A1ismlAH4CYDIRvQvgKgBfSrVVKbFz\np0xxUiiIHTYA7y5YTJq+FjM57zzg6adz3YIDBwJnny2zNqtl0ncwMZNSurncDBhgT71SjOWb9nom\nO3cCr70mswUDwW6u/v1lVohdHmlPlbTKIhBubq5VzPwRACMATGbmk5m5NfWWpUDYC6pS3FwjR8ps\norJee/XHTOrqvAf+WbPkh6Exk+Kp9JhJkm4uc3ceNmZSV2cLSjFikvZKiytWSKzVtKe+Xr6bU3yc\nhZl+bjczVU2lEGYN+BFERMzcxcydRDSLiBaXonFJEyZeAoQPwKft5urXT+biWr9e7lyqXUz8+MAH\nZPqZYiZZVDGpbJIUk/79RRjM7AlhK+AB20KoJMtk8+bcOfa8gvDO7+jXnqpxcxHRWUT0DwCvA3iH\niD5BRAsAfAbABaVqYJIkbZmk7eYCbFdXfX31x0z86NcPuOMOEZUwmJiJikllx0yScnMBua6usHNz\nAXZGVzksEz8x2bo1f8oidxC+GsUkKBx1A4ATmHklER0D4EUAn2LmR0vSshQoRkwKWSadnfLPHDEi\nmbb5YcSkL8RMgvjkJ4t7v4pJZWMG5O7u/OUIomAG1Obm4qwdk9FVasskqGhxy5Z8D4k7CF+NYhLk\n5trNzCsBgJkXAFhRzUIChL+gwgTgjVWS9sSARkza2qo/ZpIU2WxWK+AtaiFmAuRmdPm5ubz6Ioqb\nq6FBBMg5EWkQUSwTLzHpy5bJSCL6imN7qHObmW9Jr1npEDZmYtxczP5iUQoXF2CLSV+oM0kStUwq\nm8GDZeqcUrq5vHC6ucL89gH5zZs1VMLEZoqNmXiNQ+44rVtMvNxulSYmQZbJTwHs43i4t6uOsHcn\n/fvLxRDkN21rs5fhTRMjJp2dGRUTC42Z2FR6zCRJyyRKzCSKmwsoLm6SRMzEyzIxCSlVb5kw842l\nbEgpKOaCMtaJ353Ju++GW7UtLqNHAw8/LHdLlZQGWG5UTCobZ8wkaTdX2Ap4wB7Uw07y6Dxf2LhJ\nsZbJli35i+cVskyqQUzCFC32GYoVk6Ag/Nq1pROTxYtlLRBduEkwdSZaAV/5MZM03Fx+FfBefREl\nmwsobkqVNGImZjqVoLaomJSRsDEToHAQvpSWybp14dtdKwwcKJXNW7fWtphUKuVwc3kR1c1VzJQq\nxa60WEhMentzCxLVMqlAori5/CiVmJjp7Q88MJP+yaqETCYDInF1/eMftS0mtRQzKeTm8ouZRLFM\ninFzJVVnYjwhsnqoFGoGtaVPiIlVd1J1FHNBFaqCL5WY1NcD++2HPluwGIemJqm/qWUxqVRMzKSU\nbi4v4ri5jGVyxx2AtTKyJ1FiJkGWiTv1uU+LCap0osekLJPOTslBd99dpMXo0cCOHdnSnKwKML7x\npiZxCdSymFR6zCRpNxdz+Lm5gHhurq4u4JprgJtukiQYP4pdabFQanC1ikmYlRbzYOaE1hIsLcXE\nTIIC8MYqKVVAfMyY0glXNWF+cLUsJpVKWm6unh753bktAT+iurkaG4GvfU1muFiwQObg8ztGEnUm\nhSyTbdvyj1N1YuLj0toCoI2Zq2qRrGLdXH6WSalcXIbRo2UNeEUwvnGTh1/LYlLpMZOk3VxBkzwm\nNTcXIEtMT50K/PKXIiyTJwNLlwLHH5//Xi/LpH9/sZr37JG555wUqjPxEpONG/PPW3ViAuAOAMdA\nJnwkAFMBLAEwhIi+xMxzU2xfoiSVGlxqMbnuOrVMvDBiUl9f3nYo+TQ2yu9t4MD8wTQKxtVTrKUT\n1c116612ABwApkzxFxMvy4TItk6c7d25U0TGfc32BTdXmJjJWgBHM/P7mPlYAEcDWAXgowD+O83G\nJU21WiYHHQQsXpwt3QkrHGfMpL6+dO7GSqRSYyYDBsgjqUJb4+YKEpOk5uYCcoUEAA4/XFYD9cLL\nMnGe24lxcbmv2SDLxK/OpOoWxwJwKDPv7UZmXgpZJGtVes1Kh2JjJpUiJoo3TU217eKqdMyys0lg\n3FzFVL8DMshv3x5+ni0/pkzxFxMvywQIFhM3++wjcZHe3typVIC+ZZksIaI7iWiG9bgDwFIiqgfQ\nk3L7EmP3bvmnh+38oNTgcohJpfrGy4EzZlLrYlLJ18XgwclaJiZm4ndMvzqTTZukLXEsWOPm8sLP\nMjFLFzvxipcA4gpsbBRB6cturtkAVkLWfr8K4uKaDRGSU9JqWNJ0dsrgE/aCUsuk8lExqWwaG5MX\nkygxk02binNxedHSIgWyXmOCn2UyfLic24lXjYnBuLqcU6kAfUtMTgfwI2b+lPX4ATN3M3MvM3sk\nrFUmxfpMKykAD1Sub7wcOGMmtS4mlXxdJO3mMjETP3eV39xcGzfGn46ors7O6HLjZ5kMH56fhRXk\najfekL5smZwJ4A0i+gURnUFEkWpTyk0x8RLAPwC/e7dcIGaaE6V8qJhUNqV2c3lRXy8WRVzLBPB3\ndflZJiNGyLmdBI1DxjIJIyZ79shYVEmZjAXFhJkvAnAwgN8CmAXgLSL6WdoNS5oolomXmKxfL4tU\nhS2aSopK9o2XGo2Z2FTydZGGmAS5uYJiJkmIiV9GV5Bl4iUmfmn+QZaJe9JJY5VUUiZjqOlUmLkH\nwGMAfg1gPoCZaTYqDYoVE78A/Lvv5q9FoJSHwYNVTCqZxsbSurm8GDgwWcvES0yCYiZuMQkTMwlj\nmVSaiwsIISZEdDoRzQHwJoCzAfwMQNU5eaJaJsy5+8sVfK9k33ipMX0xfrzU4NQylXxdlNrN5Vdn\nUuzCWH74ubmSipkMGeJtmXjVmVSimISJf1wA4DcALmFmn9lmKp9iYyYDBsiUCM51BQDN5Kokjj/e\nuyJZqQzipuM6iZPNBSQjJs6MLufxio2Z+I0fzc193DJh5lnM/HsjJER0MhHdnn7TkqVYywTwDsKX\nS0wq2TdearQvbCq5L9LK5iomZmIshiTExCuji7k4N1dQzKSQm8vpJalKMQEAIjqaiG4molYA/wlg\neaqtSoGNG2VdkGLwCsKrZaIo4UgrAF9MzCRJywTId3Xt3i0eDC8LrNiYiV8Avn9/EbIeR4l4VYkJ\nER1KRNcT0TIAPwTQBoCY+RRm/t+StTAh3noLOPjg4j7jFYQv1drvbirZN15qtC9sKrkvLr4Y+MIX\nkjlWnJgJkJyYuDO6/OIlQLSYibFMnNOpAPmurqoSE4j1cSyAU5l5BjP/CMCe0jQred56q/hgrVom\nihKdlhZZByQJwri5vEjSzQXkZ3T5ubgA8YRs2SLWiyFKAB6ofjE5C0A3gGeI6MdE9CHIFPRVycqV\nxVsmXlXwGjMpP9oXNrXSFw0NYgW4pxpx4ldnAqTn5gqyTPr1A4YOBTZvtvcVqjPxmk4FyK81qSox\nsYLu50HWL3kGwL8BGGlN+nhqqRqYBJs3yz99xIjiPucOwJd6uV5FUQSzPkhHR/myuYD8ObqCLBMg\nP24Spc4EqH7LBADAzF3M/AAznwlgLIBXAXwj7omJqJWIXiOiV4nopbjHC8LES4pNU3S7uUq9XK+T\nSvaNlxrtC5ta6ouGBqlmLyZmkrSbq64OOPRQYMUK2Q6yTAC5gXXGTQrNzbVli7crz11rUpVi4oSZ\nNzPzXcz84QTO3Qsgw8xHM/P7EzieL1HiJUB+AF7jJYpSPgYNsqeTD0vSlgkATJpki0kxlsnOnZLe\n6zef1pAhMl1TfX3+6pR9wjJJESrV+aPESwB/y6Qc1IpvPAzaFza11BeDBgHvvVf83FxAemJSyDJx\niomJl/h5NoYMkfHGSyzdYrJ+PTByZLT2p0U5xYQBPEFELxPRF9M8UZS0YEAtE0WpJMw8bFGyueJO\nQe9k8mRguVVpV4xlEhQvAeT79e8fTkxaWyV+U0mUczr56czcTkQjIKKyjJmfdb9p9uzZaLF6bejQ\noZg2bdreOxDjIy20vXJlBp//fPj3m+21a7N44w0AkO2XXspi7Fh7u9jjxdl2+oNLcb5K3jb7KqU9\n5dxeuHAhrrrqqoppT5rbu3fL9uDB3q//8Ic/zBsfduwAgAz22Se59kyalMGKFbK9dCkwcKD/+7ds\nAXbskO2nn85aVon3+//61ywaG72/36BBwPz5WQwZItutrcDmzVlks/ZvYc6cOQCwd7wsOcxc9geA\n6wF8xWM/J8GYMcyrVxf/uV//mvmcc+ztM85gfvDBRJpUNPPmzSvPiSsQ7QubWuqLk09mBph7erxf\n95lTZWIAAAzeSURBVOqLPXuYv/CFZNvR2ck8aJAc+29/Y54+3f+9997L/LnPmfYxz5gRfOwJE5jf\n9778/RdeyPzzn9vbo0czv/OO/3GssbOk43hZ3FxE1EhETdbzwQBOBbDY671eK4wVQ3e3pBNGcU85\n3VxPPQXMnw/MmBGvPVExdy+K9oWTWuqLhgZxW/X38ad49UVdHfDTnybbjqYmWdNo9eriYyaF3G1D\nhhR2c+3YIYkIo0dHa39alMvNNQrAw0TEVht+ycxzvd64Zg1wyCHRT7RqlVTh1kWQTROA37gRuPBC\n4N575eJQFKX0DBqU3FxfcXEG4QvFTExqcKGYCRAsJqZocfVqYNy4/IyvclMWy4SZ32bmaSxpwUcw\n8/f83vvOO/HOtXJl9DUvTN73RRcB558PfPSj8doSB2e8oNbRvrCppb4YNCg4LbiUfTFpkgThw9SZ\nFGOZNDfnz8sF5FomlRh8B8qbzRWKuGISNZMLEMtkyRK5s/jOd+K1Q1GUeDQ0VI5lMnmyWCbFZHMF\nTaVi8LNMnEWLKiYRScIyiSomw4aJi+yBB0q/5rubWvKNF0L7wqaW+qKQm6uUfWHcXIUsk6YmEZzt\n28NbJoViJiomESm3mytq9byiKMlSyM1VSoybq5BlQmS7uuLGTFRMYlJONxdQnnm4vKgl33ghtC9s\naqkvCrm5StkX48ZJNX5HR7BlAtiurjCWyXnnycNNNYhJOYsWQxFHTHbtkqr1Aw9Mrj2KopSHSsrm\nqquTLNPFiwu7wJ1iUihmcuSR3vurQUz6tGXS1gaMHVv+eEcS1JJvvBDaFza11BeF3Fyl7otJk4BF\ni5K1TPwwYlKpNSZAFYjJrl35qx2GJU68RFGUyqKSLBNAMrrCWCZmGvowMRM/jJhUao0JUAViMm5c\ndOskbrykkqgl33ghtC9saqkvzjkHuPpq/9dL3ReTJslCVqWyTLq7K9fFBfRxMVHLRFH6DqNGyQBe\nKZi2JBkz8cPUmaiYxEAtE6GWfOOF0L6w0b6wKUfMBChtzETFJAZRxaSrC3jpJf/sCEVRlDg0NckE\nsmFiJmvWyHO/VRYLoWKSAFHF5Mc/Bj74Qalg7wvUkm+8ENoXNtoXNuXoi0mTwlkmb70Vb4EuFZME\niCIm3d3AD34AXHddOm1SFEUBgOnTC6fpDh8ObNgQPV4CVIeYkKyjUpkQES9ezDj7bHuZzDDccgvw\n3HPAQw+l1zZFUZQw7Nol7q2jjwYWLIh2jJ4eOcaAAXKzXCg1mIjAzCWdv6PiK+CNZcLsPbXJr34l\nud7XXSed3d0N3Hwz8PjjpW+roiiKm4EDxcUVx801YIBU3VdqjQlQBW6u5mZZWW3zZu/Xv/c94Mkn\ngeOPl+ni77oLOPFE4KijStvOtFHfuI32hY32hU0l98Xw4fHEBBBXV6W6uIAqsEwA2zrZb7/c/YsX\ny0RrbW3APfcAmQywZw/w9NNlaaaiKIonw4fHi5kAUmuiYhITIyZua+OBB4BZs8T8u/hiyd56+mlg\n2rTytDNNtJ7ARvvCRvvCppL7YsQItUwqAq+Mrt5eEZNHHrH3HXJIvPXiFUVR0qAW3FwVHzMBvMXk\n+edlBtFaKUqsZH9wqdG+sNG+sKnkvhg3TqaDiUNzc2VPD1U1lslTT+Xu++UvgfPPr5zFqxRFUfy4\n/vr4Y9Uf/yjuskql4utMmBlPPw189avACy9I+m9PDzBmjEyX0lcq3BVFUZJC60x8OPFE8RUecwww\nZ45Ukx56qAqJoihKpVAVMZNBg4Df/Q741reAM84ALr9cXFy1RCX7g0uN9oWN9oWN9kV5qQoxAcTf\nOGsW8NprwMyZwHnnlbtFiqIoiqEqYiaKoihKeMoRM6kay0RRFEWpXFRMqgT1B9toX9hoX9hoX5QX\nFRNFURQlNhozURRF6WNozERRFEWpSlRMqgT1B9toX9hoX9hoX5QXFRNFURQlNhozURRF6WNozERR\nFEWpSsomJkR0GhEtJ6I3iOgb5WpHtaD+YBvtCxvtCxvti/JSFjEhojoAPwLwMQBTAMwiosnlaEu1\nsHDhwnI3oWLQvrDRvrDRvigv5bJM3g/gTWZuY+YeAL8G8MkytaUqeO+998rdhIpB+8JG+8JG+6K8\nlEtMDgDgXIh3jbVPURRFqUI0AF8ltLa2lrsJFYP2hY32hY32RXkpS2owEZ0A4AZmPs3avgYAM/P3\nXe/TvGBFUZQIlDo1uFxi0g/ACgAfBtAO4CUAs5h5WckboyiKosSmLGvAM/MeIroMwFyIq+1uFRJF\nUZTqpaIr4BVFUZTqoCID8NVe0EhEdxPReiJ63bFvXyKaS0QriOgvRDTE8dq1RPQmES0jolMd+48h\notetfvihY/9AIvq19ZnniWi847ULrfevIKILHPtbiOgF67VfEVHqVikRjSWip4loCREtIqIrargv\n6onoRSJ61eqPm2q1LxznriOiBUT0B2u7JvuCiFqJ6DXr2njJ2ld9fcHMFfWACNxKAAcCGABgIYDJ\n5W5Xkd/hZADTALzu2Pd9AF+3nn8DwPes54cDeBXicmyxvruxGF8EcJz1/M8APmY9/xKAO6zn5wL4\ntfV8XwBvARgCYKh5br32GwDnWM/vBHBJCfphfwDTrOdNkDjZ5FrsC+tcjdbffgBeADC9VvvCOt+/\nAbgfwB9q9TdinWsVgH1d+6quL0py0RTZsScAeMyxfQ2Ab5S7XRG+x4HIFZPlAEZZz/cHsNzr+wF4\nDMDx1nuWOvafB+BO6/njAI63nvcDsMH9HsdFcK71fCOAOkcfP16GPvk9gI/Uel8AaIQknRxeq30B\nYCyAJwBkYItJrfbF2wCGufZVXV9UopurrxY0jmTm9QDAzOsAjLT2u7/vu9a+AyDf3eDsh72fYeY9\nALYQ0X5+xyKiYQA2M3Ov41hjEvpeoSCiFoi19gLkR1JzfWG5dV4FsA5AlpmXokb7AsD/ALgagDNo\nW6t9wQCeIKKXiegL1r6q64uyZHMpAHJ/RHEJk09e0pzznBMTNQF4EMCVzLyN8uuHaqIvrB/n0UTU\nDOAvRJRB/nfv831BRB8HsJ6ZF1p94Eef7wuL6czcTkQjAMwlohWowuuiEi2TdwGMd2yPtfZVO+uJ\naBQAENH+ADZY+98FMM7xPvN9/fbnfIakZqeZmTvg03fMvAnAEJIJNt3HShUrcPcggF8w8yPW7prs\nCwMzb4X4tN+H2uyL6QA+QUSrAPwKwIeI6BcA1tVgX4CZ262/GyGu4PejGq+LUvgEi/Qf9oMdgB8I\nCcAfVu52RfgeLQAWOba/D8vXCe+A2kAAE5AbUHvBurAIMvicZu3/MuyA2nnwDqiZ50Ot134D2x96\nJ4B/LVE/3AfgFte+musLAMNhBzcHAXgGUrRbc33h6pcZsGMm/11rfQGJnzVZzwcDeA7AqdV4XZTs\noimyg0+DZP68CeCacrcnQvsfALAWwE4AqwFcZP2znrS+11zzT7Pef611USwDcKpj/7EAFln9cKtj\nfz2A/7P2vwCgxfHabGv/GwAucOyfAMn2eMO6UAaUoB+mA9gDuSF4FcAC63+7Xw32xRHW938VwGsA\nvmbtr7m+cPWLU0xqri+sc5rfxyJY41019oUWLSqKoiixqcSYiaIoilJlqJgoiqIosVExURRFUWKj\nYqIoiqLERsVEURRFiY2KiaIoihIbFROlJiCiTuvvgUQ0K+FjX+vafjbJ4ytKNaBiotQKpqBqAoDP\nFvNBawqKIP4950TMJxdzfEXpC6iYKLXGfwE42VqU6UprJt//Jlm4aiERfREAiGgGET1DRI8AWGLt\ne9ia2XWRmd2ViP4LwCDreL+w9nWakxHRzdb7XyOizziOPY+IfmstcPSLEveBoiSOzhqs1BrXAPgq\nM38CACzxeI+ZjyeigQCeI6K51nuPBjCFmVdb2xcx83tE1ADgZSJ6iJmvJaJLmfkYxznYOvbZAI5k\n5iOIaKT1mb9a75kGmWdpnXXOk5j572l+cUVJE7VMlFrnVAAXWOuMvAiZE+kQ67WXHEICAFcR0ULI\n/EZjHe/zYzpkVlww8wYAWQDHOY7dzjKf0ULIxKCKUrWoZaLUOgTgcmZ+Imcn0QwAXa7tD0FWrNtJ\nRPMANDiOEfZchp2O53ugv0WlylHLRKkVzEDeCWAfx/6/APiyte4KiOgQImr0+PwQyOpzO4loMmQp\nU8Mu83nXuf4G4FwrLjMCwAcgy/UqSp9D74aUWsFkc70OoNdya81h5lutJYUXEBFBFiGa6fH5xwH8\nKxEtgUwL/rzjtbsAvE5E85n58+ZczPwwEZ0AmXK+F8DVzLyBiA7zaZuiVC06Bb2iKIoSG3VzKYqi\nKLFRMVEURVFio2KiKIqixEbFRFEURYmNiomiKIoSGxUTRVEUJTYqJoqiKEpsVEwURVGU2Px/UXYY\nagMKQHAAAAAASUVORK5CYII=\n", 337 | "text/plain": [ 338 | "" 339 | ] 340 | }, 341 | "metadata": {}, 342 | "output_type": "display_data" 343 | } 344 | ], 345 | "source": [ 346 | "plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode'])\n", 347 | "plt.xlabel(\"Iteration\")\n", 348 | "plt.ylabel(\"Avg. Reward per Episode\")\n", 349 | "plt.grid(True)\n", 350 | "#plt.savefig(settings['save_dir'] + '_' + \"evaluation_reward.svg\", bbox_inches='tight')\n", 351 | "plt.show()\n", 352 | "plt.close()" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 14, 358 | "metadata": { 359 | "collapsed": false 360 | }, 361 | "outputs": [ 362 | { 363 | "data": { 364 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEPCAYAAACKplkeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu8HfO9//HXR1zqmjQOSoNQd0qooi7NdneK0ouqKuJX\ndagq2iI4qtVDizrIT+q4p3Vrm5NWtYpQWW5NhCRbIiRBxDUkEknkInL5nD++szJr773W3mvvPWvN\nzF7v5+Mxj71m1qyZz/7stddnzfc78x1zd0RERJKwWtoBiIhIz6GiIiIiiVFRERGRxKioiIhIYlRU\nREQkMSoqIiKSmLoUFTO73czeN7OJJcs+bWYjzWyqmT1iZr3rEYuIiNROvY5U7gQOb7VsMPCYu28P\nPA5cVKdYRESkRqxeFz+a2ZbA39x912h+CjDQ3d83s88ABXffoS7BiIhITaTZp7Kxu78P4O7vARun\nGIuIiCQgSx31Gi9GRCTnVk9x3++b2SYlzV+zKq1oZio4IiJd4O5Wz/3V80jFoqnoAWBQ9PgU4K/t\nvdjdNblz2WWXpR5DViblQrlQLhxw5s+v9Fz91euU4nuBfwHbmdmbZnYq8GvgUDObChwczUsHZsyY\nkXYImaFcxJSLmHKRrro0f7n7dyo8dUg99i8iIvWRpY56qcKgQYPSDiEzlIuYchFTLtJVt+tUusPM\nPA9xiojUmxnMnw8bbFDuOcN7cEe9JKBQKKQdQmYoFzHlIqZcpEtFRUREEqPmLxGRDJs1CxYuhK23\nLv+8mr9ERKRqRx0Fn/tc2lFUT0UlZ9ReHFMuYspFrKflYsGCtCPoHBUVEZEaW7o07QjqR30qIiI1\nNG0abL89dPUjbIcdYOrUyq9Xn4qISAOZOzftCOpLRSVnelp7cXcoFzHlIqZcpEtFRUREEqM+FRGR\nGhozBr70JfWpiIhIghrle7GKSs6ovTimXMSUi1hWc3HPPWlHUB8qKiIidfDqq2lHUB8qKjnT1NSU\ndgiZoVzElItY1nNhBpMnV7/+1Km1i6UWVFREROosb4WiM1RUciar7cVpUC5iykVMuUiXioqIiCRG\nRSVnst5eXE/KRUy5iCkX6VJRERGRxKio5Izai2PKRUy5iDVyLubOhdGj041BRUVEpIcYPBj23Tfd\nGFRUckbtxTHlIqZcxPKci+9/H5Ys6frrV65MLpauUlEREcmI226D6dPTjqJ7VFRyppHbi1tTLmLK\nRSyrufjgg+S3mcXbFKuoiIjUwdChyW9zs82S32Z3pV5UzOwiM5tsZhPN7B4zWzPtmLIsz+3FSVMu\nYspFrJFykcVbFadaVMxsS+D7wO7uviuwOvDtNGMSEZGuS/tIZQHwCbCuma0OrAO8m25I2ZbV9uI0\nKBcx5SKW91xYXe/TmLzV09y5u39oZtcCbwKLgZHu/liaMYmIJOGtt2DRos6/Lu93iEy7+Wtr4Dxg\nS2AzYD0z+06aMWVdI7UXd0S5iCkXsVrn4h//gK99reP1DjwQdtyxpqFkUqpHKsCewDPuPhfAzP4M\n7Avc23rFQYMG0b9/fwD69OnDgAEDVr15ioe7mte85jVf6/nrry/w6KPw1FNNHHBA5fWXLg3z48eH\neWiKfhZ48UX4+tfLb3/ffQvstBOMHh2vX/r61utDgaeegiOPbGLmzAIwjEGDWPV5WXfuntoE7AZM\nAj4FGDAMOKvMei7BqFGj0g4hM5SLmHIRq3UuTjrJHdxPOaX99fr1C+uNHh1+Fj/GwH3EiPKvKa63\n1lptl7W3/vz5Yf5732u5bvTZWdfP9VSbv9z9BeD3wDjgBUJhuSXNmEREpOvSbv7C3a8Brkk7jryI\nD3lFuYgpFzHlIl1pn1IsIpI5CxbAihXJb3fmzOS3mTUqKjlT7KQT5aKUchFLIhe9e8OVV3Y/ltZO\nPjn5bWaNioqISBmvv578Nmtx9JM1Kio5o/bimHIRUy5iykW6VFRERMrI2pXtp50Gy5enHUXHVFRy\nRm3nMeUiplzEekIuyhW022+HOXPqH0tnqaiIiHRBtUcyQ4bUNo6sUVHJGbUXx5SLmHIRSyoXXWn+\ncocbbmi57L77EgkHgNGjYcSIys8//XRy++oqFRURkYR88gmce27l50eN6t72Tz0VvvnNys9Pndq9\n7SdBRSVnekJ7cVKUi5hyEVMu0qWiIiJSRlebvxqdikrOqO08plzElItYI+ciC3eNVFEREamzIUPg\nnXfSjqI2VFRyRu3FMeUiplzEkspFLZu/nngCttwS3n675zWZqaiIiNTA/PntP79iBWy+OTz+eJh/\n4IHax1QPKio508jtxa0pF7FGysX558MVV1R+vl65KHeEUbrso4+q286CBeHnMce0v+28UFERkVz5\nzW/g6qtrv580P9iXLcvvlfgqKjmjtvOYchFTLmK1yMW0abDXXuFxe8WmK2dfVXrNww8nt616UlER\nEenA00/Dc8/Bgw/C3XeHZe01f734YvXbzkIhSJKKSs40Utt5R5SLmHIRq+XYX0cdFT8uFpdynngi\nkRByafW0AxARScO994YzsE46KZntffnLcOSR4fGYMdW/Tkcqkiq1nceUi1ij5aK9D+Jqc3HSScne\nM/6pp+DPfw6Phw/v/vYeeqjtsjwUIBUVEZEyunP219Kl1a+bh0LRGSoqOaO285hyEVMuYmnmIu3r\nS7JQoFRURCRzVqyAlSvTjqI+slAIkqSikjN5aDtfuhQee6z2+8lDLuqlp+Vil13guOMqP59En0ot\nPsy7ep1Kc3P39jt3bvdenyQVFUncvffCoYemHYXk2ZQp4da5STr0UJg1q/r1O9OU9dZbnX9N0VFH\nwe67d/51pbbaquX87NnpHQGpqORMHtrO69VskYdc1ItyESuXi7vuCkfPEybEy1asiB+ffHIYU6zU\nu+/Gj6dNa3+fW2zR+Thr6eij09u3iookrppvSJ05O0Ya1+c/Dx98UP3606bBlVe2Xd7RqcN33QV3\n3NFyWekFjFddVX0MWfDss+ntO/WiYma9zWy4mb1sZpPNbO+0Y8qyrLSdf/ABnHhi11//qU/Fo7N2\nVVZykQU9NRcvvtjxUUKpm26CSy4pVHy+mi88771X/f5a09lfGSgqwA3AP9x9R2A34OWU45EqjB0b\n+k7KqfaNraMVqUa5D+qufniWe93HH7ec33TTrm0bki8qrTvgP/ww2e3XQqpFxcw2AA5w9zsB3H25\nu3fz+2vPloe283p9W8pDLupFuQjCe6+pw/VKP5x32qn0tS2tWNG593PSX5RGjEh2e/WQ9pHKVsAH\nZnanmY03s1vMbO2UY5JuysIhuPQcnfn239F7r/j8tdfGy15/PfycM6ftSSalnfnV6MzoxLWQhf+9\ntAeUXB3YAzjL3Z83s+uBwcBlrVccNGgQ/fv3B6BPnz4MGDBg1bezYntyI8yXtp2nGc/EiVD8Rtj6\n+ZdfLsbY/vY6er6j+eKyLP190ppvbm7m3HPPzUw8ScwX3x8TJhRYvrz0aKzAsmXx86WvDx+q11Mo\ntP18KF3/jTfieYifHzas5Xzr58vNt95+Z1/flflCoWU+AFaubGLZsgIwLFren1S4e2oTsAkwvWR+\nf+BvZdZzCUaNGpV2CO7u/uCD7pX+LL/7XeXnisB91qzuxZCVXGRBT8sFuG+6afj55JNtn+vbt+Wy\nxx93X7jQ/dhj3WFU2e2B+2OPuS9Z4n7JJfGy0uk//qPl/NKl5dcrTq23n/R0663V7/eqq8qti3ud\nP9dTbf5y9/eBt8xsu2jRwcBLKYaUeXloOw/fAjvW3UP1POSiXvKWi//6L/j975Pb3kEHwXXXwf33\nQ/Eb/UUXwW67tVzvjjtg7bUrv/duvjm5mOrttdfSjiBIu08F4EfAPWbWTDj7q8xZ5pK2xYthk02q\nW/f222sbi+TfpZfCz34WHrvDRx9VXrdcn8qSJeHaklKt+z8eeYSomTaWlQ/eak2eXP26Tz5Zuzg6\no8OiYmZXm9kGZraGmf3TzGab2XeTCsDdX3D3L7r7AHf/urvPT2rbPVFpf0I9zZoVpu2373jdao9A\nunv6ZVq5yKI85+Kee2CDDTr3miVLwgWNr7wCb7/d+tkCUP79FfpistGhXY35nfg0nDKldnF0RjVH\nKod5OM33KGAGsA1wfruvkB6rMxeiiVSjOG5WJcXicPDBbZtWt9su3HGxtUceiR//5Cfx4/HjuxZj\nWjozVllWVHP2V3GdI4Hh7j7f8lLme6CstJ3/539Wfk7XqdRfnnNR7fvl8cfhmWfaLi+eEhxr4uyz\nw1EMwH//d9f32ZFly5K7HXE5Dz5Yu23XSjVF5e9mNgVYApxpZhsBH3fwGunhSgfmE+mO0g94s7YX\n/JU2Y7XX91KqWFAqWby4uu10dJ3KmmtWt51G0mHzl7sPBvYF9nT3ZcBi4JhaBybl5aHtvNpvgd39\ntpiHXNRLnnMxeHDL+fY6p884o/Jzr75afFTocJ+/+U2HqwDhgsgsmj0bFi1KO4ryqumoXwf4AXBT\ntGgzYM9aBiU9yy9+kb9RXiU9rb9sVHtCx913Jx/LOeckv80kbLwxnHBC2lGUV01H/Z3AJ4SjFYB3\ngP+qWUTSrjy2nf/853BZmzESWlq8GL7znc5tN4+5qJWekIvWTU3Fo4Tbb4ezz+7MlpoSigj+/OfE\nNpW4tme9ZUM1ReVz7n41sAzA3RcD6qlvYE8/3f7z5Zq1Ovq2+dprcN99XY9J8u+UU1rOf/JJ+HnP\nPXDjjfWPR7qmmqLySTTIowOY2ecADVqekiy0nR9wQNoRBFnIRVakmYuPPur6qa+HHRY/Hjs2/Oz+\nmVmF7m5AuqGaonIZ8DCwuZndA/wTuKCmUUmP090LHSW7vvGN6kdbaO3RR9su0xUL+VbN2V+PAl8H\nBgH3Ec4CK9Q2LKkka23n3/pWy/mPP67fUBhZycWQIdCrV3LbO+IIeP/9zr0mjVy4w/Ll8M475Z//\n5JPqT90tbi8ZTUltSLqgmrO/vgzsDHwELAB2ipZJD3PvvW3vJ9GR4cNbznfnVqx59fzznc9bex55\nBMaNS257tTJ4MKy7bjxfvD30T38abjf9rW/BFluEZW+80XHRKD4/enTysUr9VNP8dX7JdCnwN+Dn\nNYxJ2lHLtvMTT4QZM2qz7Vo0f7WXi5Ur430uXx7OHurJTXBp9KlMmBCORl6KxhXv3Tv8vPZa2Gij\ncMOqOXNg4EDo3x8eeqjl61sPuVI84vn737sbWaG7G8iFji7wTEs1zV9Hl0yHArsAObhTsjSqW24J\nzVHFYcwnTgxnD11+ebpx5c348e0X4vhiw1jpUUaxGbQ4eu6RR4YhTcKNsNpqfa94ad/ChWlHUJ55\nJ7++WRj4a7K771SbkMru0zsbp3SeGZx1VjiT509/avncjBmw1VblX1f6p3n9ddh665bLzcKH/PLl\nLfd1xhlh+PNNN4VJk2DXXbt3NLFyZRjVtW/feNknn7QcSmPmzPDPuM02Xd9PayefHIZhT+otahbG\nfPrKV5LZXrXGjg2F4uij244a3Pp3GzcO9tQl0DlguHtdT33ocOwvM/v/RKcTE45sBgA5G+tTqjVs\nWPeGfzj22Pjx7NmhGaS1b34z/Pyf/wlTUh/GvXvDcce1XNZ6bKYdd4R580IB0llGsXnzYO+9w+OX\nytwm74wzwt/qpJNCs1Zzc33jk/yopk/leWBcNI0GLnT3xO6nIp1T67bzSh/w3/te5de8/npoupgz\np+VNkbbZJtz3AlpeLd16wMBSf/tby1jefTc+zF91g9RI61wsXAh33ll52xA+PAF237399ert3Xe7\nV+S6+7647rr48cCBbZ+/+eYQ391356GgFNIOoKF1eKTi7r+rRyCSnuXLYY01wuNyp4Duumtonqqk\n2NzV2oIFsM468XxTEzzxRNv1zOL7X3z1q/GpqsWYSm2ySTJnmL3wQhhGfcYMeOAB+OMfu7/N7rj4\n4vDzF78IP4tXk9fLaiVfL2fPru++pYepdPN6YBIwscw0CZjY3o3vk55CmJK0I45wf/JJ9/XXLx4D\nxJO7+8qV7n/8Y9vnaj1df337z//jH+V/n+7ssztOOqnr2xg8uLqYHnkkLLvppnjZkCHuc+d2Pe6i\nZ56p/99YU70m3L1+n9Uedlvxg3zL9qa6BtnV/1hpY/ly9xdfDAWjvTeju3uvXmn/Q7QfX9GwYcls\n81e/cl+xovM5raaozJvnPmZMy2Wvv95xTIsXuy9Z4t63b9vfHdxvvbXz8Za65Zb0/5aaajnh7hkp\nKlmaVFRio0aN6tbrq30zFgpp/zO0P111lfuIEaM6PKrp7DR/fuXcfelL7suWxfNz54bXrLNO+Llg\ngfsee7ifemrb155zTljn2Wfdp01zHzu2+ph22aXlfOnfcujQ7r0v0v471mYalYEYsjLh7hkrKsA+\nwHPAQsIQ+CuABXUNsvQ/qcFV++Fx/fXuV14Zzy9f3vHRSf6mUYlvc+hQ90WLyue0uI67+wknuD/1\nVOXtVHptUlPpNj/6yP3vfx/ls2aF5ffeG5aVWrHC/aWXWi57/PG0/375eV/kd8Lds1dUnge2ASYA\nvYBTgV/VNcjif5FUrfTDZ+jQtN/Y+Zquu65lLleudB83rnPbKO3rqEWMX/lKy/nNNw8/S/vAHnww\nxH3ZZfGyK690nzq1uqY3TT1hwt3rW1Q6vPjRzJ539z3NbKK77xotm+DudTspUxc/ds7cubDhhuHx\nO+/AZz+bbjx5c8EF4eyz6dPDTZrOPTcMGtlZO+4YznbbeOPkYxSpTv0vfqymqDwJHALcBrwHzAQG\nuftutQ9vVQwqKpFCoVBxRNp77oFp08JFbEceWd+40lGg1iPSjhoFBx5Y010kpIBG5y0qoFwUZfCK\neuAkwkWSPwTOAzYHvlHLoKRrvqtLUhNXOkKAiHSs4pGKmZ0P3Ofuqd8JWUcqlZ1xBuyzD5x6atqR\niOTfjTfCD3+YdhRJqv+RSnvDtGwGjDazp8zsB2ZWZhQnSZN7GD5DBUUa1T//mez2zjqr/KgPSbri\nitpuP20Vi4q7nwdsAfwn8Hlgopk9bGanmNn69QpQWiod4+nMM9OLIxsKaQeQIYW0A0jFTiVjpR91\nVPFRAQi3QFiyBO6/P16nOJjpwQfDttuG++w891wYnn/atPDcl7/ccqiaq6/uWmy33ho/3mcfOO20\n8Hj//ePlxxzTtW1nWrWniRFOJz6ccGrx4iRPQSMUt/HAAxWer3DibOMpvU5lt93SPl0x7WlUBmLI\nytSYuXAP12CVuvXWkIvitTpvvx3WHTHC/dVXw+MpU9r/Pyte0/Xuu2G+vRgqXcj64Yfx47fecn/t\ntTACwuTJcexFP/xhrXKEu2fslGIAM/s88G3geOADQl/LDUkVNjM7D/gCsIG7f7XM815NnI1k4UJY\nX8eL0uAqfSysWBHu4ZOU4gjSX/taOFX8D38Ip5xDuBnZ4MHhTMEPPohfs3Il/Pa3YZDUzTdvub2P\nP4ZPfar8Pvr0iUfTTiByPCt9Kma2rZldamaTgXuARcBh7r5PwgWlH/AVwinLUqXnn087ApF0uMPS\npfDRR5XXSbKglDrooNAn8tprocAAbLlluKndP/8ZrmfaccewvHjTu9YFBdoWlKIhQ+DDD2GttWoT\nfz2011H/MLAWcLy77+ruV7r79BrEcB1wPqBDkSpcdFGBiy/Oy7UTtVZIO4AMKaQdQEWt7yLaHdtt\nF36uuSast175dWpxz6HdoqvySu95M3x4uEVBsYDtumvoo6l0K4hqFLfV+mZzeVLxOhV3/1zrZWZ2\nlLv/Pamdm9mRwPvu3mxmTUDFw7RBgwbRv39/APr06cOAAQNWXQRYfBP1xPmhQ2HEiAI/+1mY//Wv\nIf4AaYp+Nuo8HTzfSPPNqez//vvh2GPbX3/ddcP8QQc18fjjXd/fgQeG13f0/9Mc3UUsyf/Ha6+F\n+fObOPzwls/36tV2/f32K0TFoXP7e/rpJvbcM8xvv33H+Sg/XwCGRfP9SUVnOmCA8Ul26ABXAm8C\n0wlX6i8Efl9mvfZ71Xqot96KO9zGj69VR17yk3v8+JRT0onhS18KQ81/8kmIZ9Ei93790s9N3qYx\nY9p/furU8LNXL3ezts9vv33I/5NPhvHQ5s2rvK3Zs91HjXK/4YZ42S23uJ9+ehjHrNEUO/S7N+Hu\n9e2o79zKMKFmgcBAdPZXC2l/oLSeRo50/+532y4//vj48frrh9jnz3f/zGfi3+XGG8PZaldcEb3r\n3P2ii9zXWKP9fR5wgPuf/tT5WO+6q3xOFyxIP495mqZNCz9vu63tc8XbBJx0Uiguy5a5/+Uv7jNm\nhOVf+1rId2sff+z+ve+5T58etjNhQth+qUceCWdLSfs3civ+L7nHN3Jzd7/wwnATvjwUlb1qFoiK\nShvl30SjUvlwGTiwZWwjR8bPXXttGFZ98ODwTbM906eH9UqNHx+/rnSfP/6x+/33h+UvvBD201Eu\nDjnE/fnn249hu+3qn7/aT8m+L446yv2VV+K/9ccfh8dNTeHLQpb/Jbt7z6GsmTXLffhw93XXdd92\n25Z/p1LLl7s/+mjLZZkrKsBGwM+B4cD9wC+BLeoeZJbfwTVSrw+P4nTBBe0/f9xxLeMrDgW/cGHX\n7pZYyZlnhu3ecEM1eWmZi0qvaW3OnPCPumiR+6WXhoKWdlH4t3/r7ja6977YYQf3DTcM13ZAaHqp\n5Je/dBWVFHz8cTgarFRUykmjqLR3SvF+hHupGHAXcAewEnjCzPY1s+u62Z0jZSxbBre1e3J1U032\naxbOu6805Msaa7Sc32OPcErnuuvCau2dQ9hJQ4fCiy+Gs2jKefjhcJZN0ATAmDFhLl7evr59YaON\nYJ114PLLocKgz1Xp3Tv8HDgw/Nxvv85v4+23wxXcK1e2v17xiu/ymjq/48guu8D48TBjRuUzqkpd\nckl4n2ZVpVG8826ttWD11eGaa8L/3pIlaUdUQaVqA4wBdi+zfAAwH/hdvSofWf5alLB6fCt+/PHQ\nnPTFL7rffLN7nz6hXdu9ZbPWxRfHj2fPTjcv5YD7JZeEe7hPmRKugu6qp55yP/TQ9vN2+unuAwaE\n/prSb4pLlsTxdHQ3xWJH9XnnhZ+bblo+ntY3VjvnnHgfSU/z5rXc949/HDd3Sb6RwpFKex/kL7Xz\n3CvAanULsocXlfvuC3fiO+aYaj4ERnX7Q6QjK1e6P/NMePzWW2FoiyyqRTPHmmuWz9nOO7dc79ln\n2752/Pi4sJXbRrGPwj2s16dP5TiWLw8drcVmsaLS7Q0Z0v33RVNT13OVVT21+asr0igq7TVcmJl9\nuszCvsByd+/gYF3KMYNhw1ouO+EE2Gor+Otfk9vPHXdAv37Q3BwGzCsaOrS6GPfdNzzu1w8+1+aK\npZ5r6dLyy6NLH1bZa6+26+y+e3xxXLFJrsgdttkmnjcLV05X0qsXPPRQuHPnM8/Ey+NBE0MT4aGH\nhmarUiNHhiaSanSluU6kXZWqDXA68BzhrKz1o6kJeBY4vZ6Vj5weqUyb5v6vf4UB53beOZynX/yG\nuOGGYZ1aNGfssEPbWMD97LPr+/vn2fDh7gceWP3RXb0UBzosFxOEU7dL5yE0ZZ14Ystlxenyy+sT\nt6SDLDV/hXg4CngSmEMYSPJJ4Oi6B5ml/+oqNTd3/OH/k58kX1C+8IXy8SxaFM4ckeqVXqiXJeC+\nzz5tl594ovvMmfH8v/9729iXL3d/4IGw/Jpr3N98s7axSrrSKCpVjVKctjyNUrx8eTg75vbbYc6c\nWuyhQOszfebODWc0AXzhC40z2GShUKj5mT6rrRaX7Kzo2zfcOnrIkHhZuVzMmgUzZ8bjVjWKerwv\n8sIsm/eolw6MGxdOsX3uOdh773RicG852J0kY9y4bBUUCKcfV/O33njjMInUk45UumHx4vCBU825\n/bW0dGkYtfW++8IIqWkVNhHJljSOVFRUuiELRwYZTIuIZEQaRaVL10Kb2R5JB5In6baxF1Y9Wrgw\nrRiyoRb3zcgr5SKmXKSrqwNsnJloFDmyeHHovE1yaJKuWnfdtCMQEWlJzV9VWrQIXnopjNP09a/X\nd9/nnAM33BDuMtd6DC4RkUoy2adSoalrPvCGuy+vSVRtY0i9qJx2WjhNuN5+8YtwinKt7rktIj1X\nVvtUfksYXPIW4FZgNGEo/KlmdlgNY8uEF16AV15Jp6BccQVcemnLgqL24phyEVMuYspFuqopKu8S\nRive092/AOxOuP3vocDVtQwuTWPHwtFHw4ABsN129d33ypXh+oiLL87GGWYiItWqpvnrRXffpdwy\nM2t29wE1jZB0mr/694c33qjrLlfJQTeXiORAVpu/JpvZTWY2MJp+C7xkZmsBGb5VT9eMHRtGfk2j\noJxxhgqKiORbNUVlEPAqcG40TY+WLQMOrFVgadl7b3jssfruc/PN4fTT4be/7XhdtRfHlIuYchFT\nLtJVzdhf/w7c6O7XlnmuR11+V8v+izPPhAsvhE02gbXXjpdPngw77VS7/YqI1FM1fSp3AgcRhr3/\nI/BwvU4lLomh5n0qM2aEG2Ul6fLL4YgjwqiyrW90VSxgau4SkVrJ5HUqAGa2BuGI5Xhgf+BRdz+t\nxrGV7r+mRWXBAujdO9ltbrVVGLV4ww3LP//d78KKFWEQSBGRWshqRz3uvgx4CPgDMA44tpZB1dPh\nhydTUNzD0c6IEfDwwzB9euWCAnD33V0rKGovjikXMeUiplykq8M+FTMrHqE0EUYzvA34Vk2jqpP3\n3gv3807KlluGSUSkUVXTp3IfoS/lIXdfWpeo2saQaPPXhx/CoEHwwAPd2860aXDcceGqe/WNiEjW\nZLL5y91PcPf7iwXFzPY3s6G1D612Jk3qXkEp3pxr221h112Ti0tEJO+q6lMxs93N7BozmwH8EphS\n06hqaOrU7p067N7ylOCbb4b33+9+XNVSe3FMuYgpFzHlIl0V+1TMbDvgBODbwCzCIJLm7rm+4HGH\nHWDw4OrXP+UUOOigcEpwuZtirb12yyIjItLIKvapmNlK4O/AWe7+VrRsurtvndjOzfoBvwc2AVYC\nt7r7kDLWCWcpAAAMJUlEQVTrJdan0tmjlGXLYPVqLhEVEcmYrPWpfB1YDDxpZv9jZgcBSQe3HPix\nu+8MfAk4y8x2SHgfq7z+evXrzpkTmrpUUEREqlexqESd898GdiFcTX8esHE0uGQi91Fx9/fcvTl6\nvBB4GfhsEtsumjQpHJ2YwdadOMbq2zfJKJKj9uKYchFTLmLKRbqqOftrkbvf6+5HA/2ACcCFSQdi\nZv2BAcCzSW1z8mTYc8/Ov+6vf00qAhGRxtKpxh13/5BwB8hbkgzCzNYD/hc4JzpiaWPQoEH0798f\ngD59+jBgwACampqA+JtJcX748ALXXAPPPdcUvboQ/ex4/pprYIMNChQKVNx+mvNNTU2Zikfz2Zkv\nyko8ac0Xl2UlnnrOFwoFhg0bBrDq87Leqhr7q6YBmK1OOCHgIXe/ocI6qzrqP/gANtoovthw5kxY\nujTcVGvcuHCnxq72g4wcCYccorstikjPkLWO+nq5A3ipUkEptWABvPRSPD9nDmy2WRi80Sw0dXWn\nY/3QQ7NfUFp/K21kykVMuYgpF+lK9dwmM9sPOBGYZGYTAAcudveHy62///6h4z28NtlYLr882e2J\niDSi1Ju/qmFm/u67zmab1W4fOUiDiEinZPZ+KmkzMw8HMbWRgxSIiHRao/apSCeovTimXMSUi5hy\nka6GLyoHH5x2BCIiPUfDN3/Nm5f8rYRFRLJAzV91tu22KigiIklq2KKy887wbGIDwtSP2otjykVM\nuYgpF+lq2DF4X3wx7QhERHqe3PapuId7nSxYADNmhNsDv/kmRMPerPLTn8KPfgT9+sGKFRrKXkQa\nh65TqaBYVNxh9mzo0wfWWKPy+i+8ALvtVr/4RESySB317bjyyvBzo43aLyjQswuK2otjykVMuYgp\nF+nKTVG54IK0IxARkY7kpvkrD3GKiGSJmr9ERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJD\nqU9FRERyTUUlZ9ReHFMuYspFTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmo\niIhIYtSnIiLSQ6lPRUREck1FJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPVRD9qmY2RFmNsXMppnZ\nhWnHIyIiXZdqUTGz1YAbgcOBnYETzGyHNGPKOrUXx5SLmHIRUy7SlfaRyl7AK+7+hrsvA/4AHJNy\nTCIi0kWp9qmY2TeAw9399Gj+u8Be7v6jVuupT0VEpJMask9FRER6jtVT3v87wBYl8/2iZW0MGjSI\n/v37A9CnTx8GDBhAU1MTELehNsJ8aXtxFuJJc764LCvxpDnf3NzMueeem5l40py//vrrG/rzYdiw\nYQCrPi/rLe3mr17AVOBgYCYwFjjB3V9utZ6avyKFQmHVm6nRKRcx5SKmXMTSaP5K/ToVMzsCuIHQ\nFHe7u/+6zDoqKiIindSQRaUaKioiIp2njnrpUGl/QqNTLmLKRUy5SJeKioiIJEbNXyIiPZSav0RE\nJNdUVHJG7cUx5SKmXMSUi3SpqIiISGLUpyIi0kOpT0VERHJNRSVn1F4cUy5iykVMuUiXioqIiCRG\nfSoiIj2U+lRERCTXVFRyRu3FMeUiplzElIt0qaiIiEhi1KciItJDqU9FRERyTUUlZ9ReHFMuYspF\nTLlIl4qKiIgkRn0qIiI9lPpUREQk11RUckbtxTHlIqZcxJSLdKmoiIhIYtSnIiLSQ6lPRUREck1F\nJWfUXhxTLmLKRUy5SJeKioiIJEZ9KiIiPZT6VEREJNdSKypmdrWZvWxmzWY2wsw2SCuWPFF7cUy5\niCkXMeUiXWkeqYwEdnb3AcArwEUpxpIbzc3NaYeQGcpFTLmIKRfpSq2ouPtj7r4ymh0D9EsrljyZ\nN29e2iFkhnIRUy5iykW6stKn8v+Ah9IOQkREumf1Wm7czB4FNildBDhwibv/LVrnEmCZu99by1h6\nihkzZqQdQmYoFzHlIqZcpCvVU4rNbBDwfeAgd1/azno6n1hEpAvqfUpxTY9U2mNmRwDnA19ur6BA\n/ZMiIiJdk9qRipm9AqwJzIkWjXH3H6QSjIiIJCIXV9SLiEg+ZOXsr7LM7Agzm2Jm08zswrTj6Swz\nu93M3jeziSXLPm1mI81sqpk9Yma9S567yMxeiS4KPaxk+R5mNjHKw/Uly9c0sz9ErxltZluUPHdK\ntP5UMzu5ZHl/MxsTPXefmdW8CdTM+pnZ42Y22cwmmdmPGjgXa5nZs2Y2IcrHlY2ai5J9r2Zm483s\ngWi+IXNhZjPM7IXovTE2Wpa/XLh7JidCwXsV2BJYA2gGdkg7rk7+DvsDA4CJJcuuAi6IHl8I/Dp6\nvBMwgdDP1T/63YtHks8CX4we/wM4PHp8JvDb6PHxwB+ix58GXgN6A32Kj6Pn/ggcFz2+CfiPOuTh\nM8CA6PF6wFRgh0bMRbSvdaKfvQjXaO3XqLmI9ncecDfwQKP+j0T7mg58utWy3OWiLm+aLiZ4H+Ch\nkvnBwIVpx9WF32NLWhaVKcAm0ePPAFPK/X6E63b2jtZ5qWT5t4GboscPA3tHj3sBs1qvU/JmOD56\nPBtYrSTHD6eQk/uBQxo9F8A6wNjoA6Ihc0G46PlRoIm4qDRqLl4HNmy1LHe5yHLz12eBt0rm346W\n5d3G7v4+gLu/B2wcLW/9+74TLfss4XcvKs3Dqte4+wpgvpn1rbQtM9sQ+NDjkQzeBjZL6Peqipn1\nJxy9jSH8szRcLqLmngnAe0DB3V+iQXMBXEc4C7S0c7dRc+HAo2b2nJmdFi3LXS5SO6VYVknyTIlq\nTr1O7fRsM1sP+F/gHHdfaG2vP2qIXET/pLtbGET1ETNrou3v3uNzYWZHAu+7e3OUg0p6fC4i+7n7\nTDPbCBhpZlPJ4fsiy0cq7wBblMz3i5bl3ftmtgmAmX0GmBUtfwfYvGS94u9baXmL15hZL2ADd59L\nhdy5+xygt5mtVmZbNRV18P0vcJe7/zVa3JC5KHL3BYQ27z1pzFzsB3zVzKYD9wEHmdldwHsNmAvc\nfWb0czahiXgv8vi+qEdbYRfbF3sRd9SvSeio3zHtuLrwe/QHJpXMX0XUFkr5jrc1ga1o2fE2JnqD\nGeFD6Iho+Q+IO96+TfmOt+LjPtFzfyRuL70JOKNOefg98N+tljVcLoB/I+4EXRt4Eji4EXPRKi8D\niftUrm60XBD619aLHq8LPAMclsf3Rd3eNF1M9BGEM4VeAQanHU8X4r8XeBdYCrwJnBr90R6Lfq+R\nxT9etP5F0ZvjZeCwkuVfACZFebihZPlawJ+i5WOA/iXPDYqWTwNOLlm+FeHskGnRG2aNOuRhP2AF\n4YvBBGB89Lft24C5+Hz0+08AXgB+Gi1vuFy0yktpUWm4XET7LP5/TCL6vMtjLnTxo4iIJCbLfSoi\nIpIzKioiIpIYFRUREUmMioqIiCRGRUVERBKjoiIiIolRUZGGYmYfRT+3NLMTEt72Ra3mn05y+yJ5\noKIijaZ4YdZWwHc688JoaIv2XNxiR+77d2b7Ij2Bioo0ql8B+0c3hzonGjn4ags30Go2s+8DmNlA\nM3vSzP4KTI6W/SUaSXZScTRZM/sVsHa0vbuiZR8Vd2Zm10Trv2Bm3yrZ9igzGx7daOmuOudAJHEa\npVga1WDgJ+7+VYCoiMxz973NbE3gGTMbGa27O7Czu78ZzZ/q7vPM7FPAc2Y2wt0vMrOz3H2Pkn14\ntO1vALu6++fNbOPoNU9E6wwgjOP0XrTPfd39X7X8xUVqSUcqIsFhwMnRfU6eJYy5tG303NiSggJw\nrpk1E8ZP6leyXiX7EUbhxd1nAQXgiyXbnulhvKRmwgCkIrmlIxWRwICz3f3RFgvNBgKLWs0fRLiD\n3lIzGwV8qmQb1e6raGnJ4xXof1JyTkcq0miKH+gfAeuXLH8E+EF03xfMbFszW6fM63sT7oa31Mx2\nINxiteiT4utb7esp4Pio32Yj4ADCbYRFehx9K5JGUzz7ayKwMmruGubuN0S3Oh5vZka4GdKxZV7/\nMHCGmU0mDEc+uuS5W4CJZjbO3U8q7svd/2Jm+xCGul8JnO/us8xsxwqxieSWhr4XEZHEqPlLREQS\no6IiIiKJUVEREZHEqKiIiEhiVFRERCQxKioiIpIYFRUREUmMioqIiCTm/wDm731G+4AB2QAAAABJ\nRU5ErkJggg==\n", 365 | "text/plain": [ 366 | "" 367 | ] 368 | }, 369 | "metadata": {}, 370 | "output_type": "display_data" 371 | } 372 | ], 373 | "source": [ 374 | "plt.plot(train_stats['Iteration'], train_stats['Average Q-Value'])\n", 375 | "plt.xlabel(\"Iteration\")\n", 376 | "plt.ylabel(\"Avg. Q-Values\")\n", 377 | "plt.grid(True)\n", 378 | "#plt.savefig(settings['save_dir'] + '_' + \"training_q_values.svg\", bbox_inches='tight')\n", 379 | "plt.show()\n", 380 | "plt.close()" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "# Evaluating the best policy" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "Let's load the network that collected the highest reward per game episode" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 15, 400 | "metadata": { 401 | "collapsed": true 402 | }, 403 | "outputs": [], 404 | "source": [ 405 | "best_iteration_index = np.argmax(eval_stats['Reward per Episode'])\n", 406 | "best_iteration = str(int(eval_stats['Iteration'][best_iteration_index]))" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 16, 412 | "metadata": { 413 | "collapsed": false 414 | }, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "'4500000'" 420 | ] 421 | }, 422 | "execution_count": 16, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "best_iteration " 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 17, 434 | "metadata": { 435 | "collapsed": true 436 | }, 437 | "outputs": [], 438 | "source": [ 439 | "agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p')" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 18, 445 | "metadata": { 446 | "collapsed": false 447 | }, 448 | "outputs": [], 449 | "source": [ 450 | "r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True)" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": 19, 456 | "metadata": { 457 | "collapsed": false 458 | }, 459 | "outputs": [ 460 | { 461 | "data": { 462 | "text/plain": [ 463 | "17.6" 464 | ] 465 | }, 466 | "execution_count": 19, 467 | "metadata": {}, 468 | "output_type": "execute_result" 469 | } 470 | ], 471 | "source": [ 472 | "r_per_episode" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": null, 478 | "metadata": { 479 | "collapsed": true 480 | }, 481 | "outputs": [], 482 | "source": [] 483 | } 484 | ], 485 | "metadata": { 486 | "kernelspec": { 487 | "display_name": "Python 2", 488 | "language": "python", 489 | "name": "python2" 490 | }, 491 | "language_info": { 492 | "codemirror_mode": { 493 | "name": "ipython", 494 | "version": 2 495 | }, 496 | "file_extension": ".py", 497 | "mimetype": "text/x-python", 498 | "name": "python", 499 | "nbconvert_exporter": "python", 500 | "pygments_lexer": "ipython2", 501 | "version": "2.7.11" 502 | } 503 | }, 504 | "nbformat": 4, 505 | "nbformat_minor": 0 506 | } 507 | -------------------------------------------------------------------------------- /examples/mountain_car_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a place holder for real unit testing. 3 | Right now we just overfit a simple control problem: 4 | - the agent tries to get to the top right corner (1,1) of a 2D map 5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1) 6 | - action 1 is optimal for all states 7 | """ 8 | 9 | from chimp.learners.chainer_backend import ChainerBackend 10 | from chimp.learners.dqn_learner import DQNLearner 11 | from chimp.utils.policies import DQNPolicy 12 | 13 | from chimp.simulators.mdp.mountain_car import MountainCar 14 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator 15 | 16 | import numpy as np 17 | import pickle 18 | import pylab as p 19 | 20 | import chainer 21 | import chainer.functions as F 22 | import chainer.links as L 23 | from chainer import Chain 24 | 25 | settings = { 26 | 27 | # agent settings 28 | 'batch_size' : 32, 29 | 'print_every' : 1000, 30 | 'save_dir' : 'results', 31 | 'iterations' : 2000000, 32 | 'eval_iterations' : 100, 33 | 'eval_every' : 1000, 34 | 'save_every' : 20000, 35 | 'initial_exploration' : 50000, 36 | 'epsilon_decay' : 0.000001, # subtract from epsilon every step 37 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 38 | 'epsilon' : 1.0, # Initial exploratoin rate 39 | 'learn_freq' : 1, 40 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 41 | 'model_dims' : (1,2), 42 | 43 | # simulator settings 44 | 'viz' : False, 45 | 46 | # replay memory settings 47 | 'memory_size' : 100000, # size of replay memory 48 | 'n_frames' : 1, # number of frames 49 | 50 | # learner settings 51 | 'learning_rate' : 0.00001, 52 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used 53 | 'discount' : 0.95, # discount rate for RL 54 | 'clip_err' : False, # value to clip loss gradients to 55 | 'clip_reward' : False, # value to clip reward values to 56 | 'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations 57 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 58 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 59 | 'gpu' : False, 60 | 'reward_rescale': False, 61 | 62 | # general 63 | 'seed_general' : 1723, 64 | 'seed_simulator' : 5632, 65 | 'seed_agent' : 9826, 66 | 'seed_memory' : 7563 67 | 68 | } 69 | 70 | class TestNet(Chain): 71 | 72 | def __init__(self): 73 | super(TestNet, self).__init__( 74 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0), 75 | l2=F.Linear(20, 10, bias=0.0), 76 | bn1=L.BatchNormalization(10), 77 | l3=F.Linear(10, 10), 78 | l4=F.Linear(10, 10), 79 | bn2=L.BatchNormalization(10), 80 | lout=F.Linear(10, simulator.n_actions) 81 | ) 82 | self.train = True 83 | # initialize avg_var to prevent divide by zero 84 | self.bn1.avg_var.fill(0.1), 85 | self.bn2.avg_var.fill(0.1), 86 | 87 | def __call__(self, ohist, ahist): 88 | h = F.relu(self.l1(ohist)) 89 | h = F.relu(self.l2(h)) 90 | h = self.bn1(h, test=not self.train) 91 | h = F.relu(self.l3(h)) 92 | h = F.relu(self.l4(h)) 93 | h = self.bn2(h, test=not self.train) 94 | output = self.lout(h) 95 | return output 96 | 97 | 98 | def car_sim(nsteps, simulator, policy, verbose=False): 99 | mdp = simulator.model 100 | 101 | # re-initialize the model 102 | simulator.reset_episode() 103 | 104 | rtot = 0.0 105 | xpos = np.zeros(nsteps) 106 | vel = np.zeros(nsteps) 107 | # run the simulation 108 | input_state = np.zeros((1,2), dtype=np.float32) 109 | for i in xrange(nsteps): 110 | state = simulator.get_screenshot() 111 | input_state[0] = state 112 | a = policy.action((input_state,None)) 113 | simulator.act(a) 114 | r = simulator.reward() 115 | rtot += r 116 | xpos[i], vel[i] = state 117 | if simulator.episode_over(): 118 | break 119 | return rtot, xpos, vel 120 | 121 | 122 | mdp = MountainCar() 123 | simulator = MDPSimulator(mdp) 124 | 125 | net = pickle.load(open("../chimp/pre_trained_nets/mountain_car.net", "rb")) 126 | backend = ChainerBackend(settings) 127 | backend.set_net(net) 128 | learner = DQNLearner(settings, backend) 129 | 130 | policy = DQNPolicy(learner) 131 | 132 | r, xtrace, vtrace = car_sim(300, simulator, policy, verbose=True) 133 | 134 | p.plot(xtrace); p.plot(10.0*vtrace) 135 | p.show() 136 | -------------------------------------------------------------------------------- /examples/run_atari.py: -------------------------------------------------------------------------------- 1 | # be sure to have run ' python setup.py ' from chimp directory 2 | 3 | 4 | # # Training DeepMind's Atari DQN with Chimp 5 | 6 | # First, we load all the Chimp modules. 7 | 8 | from chimp.memories import ReplayMemoryHDF5 9 | 10 | from chimp.learners.dqn_learner import DQNLearner 11 | from chimp.learners.chainer_backend import ChainerBackend 12 | 13 | from chimp.simulators.atari import AtariSimulator 14 | 15 | from chimp.agents import DQNAgent 16 | 17 | 18 | # Then we load Python packages. 19 | 20 | import matplotlib.pyplot as plt 21 | 22 | import numpy as np 23 | import chainer 24 | import chainer.functions as F 25 | import chainer.links as L 26 | from chainer import Chain 27 | import os 28 | 29 | import pandas as ps 30 | 31 | 32 | # Finally, we set training parameters in a params dictionary that will be passed to the modules. 33 | 34 | # Define training settings 35 | 36 | settings = { 37 | 38 | # agent settings 39 | 'batch_size' : 32, 40 | 'print_every' : 10000, 41 | 'save_dir' : './results_atari', 42 | 'iterations' : 5000000, 43 | 'eval_iterations' : 5000, 44 | 'eval_every' : 50000, 45 | 'save_every' : 50000, 46 | 'initial_exploration' : 50000, 47 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step 48 | 'eval_epsilon' : 0.05, # epsilon used in evaluation, 0 means no random actions 49 | 'epsilon' : 1.0, # Initial exploratoin rate 50 | 'learn_freq' : 4, 51 | 'history_sizes' : (4, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 52 | 'model_dims' : (84,84), 53 | 54 | # Atari settings 55 | 'rom' : "Breakout.bin", 56 | 'rom_dir' : './roms', 57 | 'pad' : 15, # padding parameter - for image cropping - only along the length of the image, to obtain a square 58 | 'action_history' : True, 59 | 60 | # simulator settings 61 | 'viz' : True, 62 | 'viz_cropped' : False, 63 | 64 | # replay memory settings 65 | 'memory_size' : 500000, # size of replay memory 66 | 'frame_skip' : 4, # number of frames to skip 67 | 68 | # learner settings 69 | 'learning_rate' : 0.00025, 70 | 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used 71 | 'discount' : 0.99, # discount rate for RL 72 | 'clip_err' : False, # value to clip loss gradients to 73 | 'clip_reward' : 1, # value to clip reward values to 74 | 'target_net_update' : 10000, # update the update-generating target net every fixed number of iterations 75 | 'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 76 | 'gpu' : True, # NO GPU FOR THIS EXAMPLE 77 | 'reward_rescale': False, 78 | 79 | # general 80 | 'seed_general' : 1723, 81 | 'seed_simulator' : 5632, 82 | 'seed_agent' : 9826, 83 | 'seed_memory' : 7563 84 | 85 | } 86 | 87 | 88 | # set random seed 89 | np.random.seed(settings["seed_general"]) 90 | 91 | 92 | # initialize the simulator 93 | 94 | simulator = AtariSimulator(settings) 95 | 96 | # Define the network 97 | class Convolution(Chain): 98 | 99 | def __init__(self): 100 | super(Convolution, self).__init__( 101 | l1=F.Convolution2D(settings['history_sizes'][0], 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)), 102 | l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)), 103 | l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)), 104 | l4=F.Linear(3136, 512, wscale = np.sqrt(2)), 105 | l5=F.Linear(512, simulator.n_actions, wscale = np.sqrt(2)), 106 | ) 107 | 108 | def __call__(self, ohist, ahist): 109 | if len(ohist.data.shape) < 4: 110 | ohist = F.reshape(ohist,(1,4,84,84)) 111 | h1 = F.relu(self.l1(ohist/255.0)) 112 | h2 = F.relu(self.l2(h1)) 113 | h3 = F.relu(self.l3(h2)) 114 | h4 = F.relu(self.l4(h3)) 115 | output = self.l5(h4) 116 | return output 117 | 118 | net = Convolution() 119 | 120 | 121 | # initialize the learner + chainer backend, replay memory, and agent modules 122 | 123 | backend = ChainerBackend(settings) 124 | backend.set_net(net) 125 | learner = DQNLearner(settings, backend) 126 | 127 | memory = ReplayMemoryHDF5(settings) 128 | 129 | agent = DQNAgent(learner, memory, simulator, settings) 130 | 131 | # launch training 132 | 133 | agent.train() 134 | 135 | 136 | # Visualizing results 137 | 138 | train_stats = ps.read_csv('%s/training_history.csv' % settings['save_dir'],delimiter=' ',header=None) 139 | train_stats.columns = ['Iteration','MSE Loss','Average Q-Value'] 140 | 141 | eval_stats = ps.read_csv('%s/evaluation_history.csv' % settings['save_dir'],delimiter=' ',header=None) 142 | eval_stats.columns = ['Iteration','Total Reward','Reward per Episode'] 143 | 144 | 145 | plt.plot(eval_stats['Iteration'], eval_stats['Reward per Episode']) 146 | plt.xlabel("Iteration") 147 | plt.ylabel("Avg. Reward per Episode") 148 | plt.grid(True) 149 | plt.savefig(settings['save_dir'] + '_' + "evaluation_reward.svg", bbox_inches='tight') 150 | #plt.show() 151 | plt.close() 152 | 153 | 154 | plt.plot(train_stats['Iteration'], train_stats['Average Q-Value']) 155 | plt.xlabel("Iteration") 156 | plt.ylabel("Avg. Q-Values") 157 | plt.grid(True) 158 | plt.savefig(settings['save_dir'] + '_' + "training_q_values.svg", bbox_inches='tight') 159 | #plt.show() 160 | plt.close() 161 | 162 | 163 | # Evaluating the best policy 164 | 165 | # load the network that collected the highest reward per game episode 166 | 167 | best_iteration_index = np.argmax(eval_stats['Reward per Episode']) 168 | best_iteration = str(int(eval_stats['Iteration'][best_iteration_index])) 169 | 170 | agent.learner.load_net(settings['save_dir']+'/net_' + best_iteration + '.p') 171 | 172 | 173 | # evaluate policy performance 174 | 175 | r_tot, r_per_episode, runtime = agent.simulate(10000, epsilon=0.05, viz=True) 176 | 177 | r_per_episode 178 | 179 | 180 | -------------------------------------------------------------------------------- /examples/run_cartpole.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a place holder for real unit testing. 3 | Right now we just overfit a simple control problem: 4 | - the agent tries to get to the top right corner (1,1) of a 2D map 5 | - action 0 takes it towards (0,0), action 1 takes it toward (1,1) 6 | - action 1 is optimal for all states 7 | """ 8 | 9 | # Memory 10 | from chimp.memories import ReplayMemoryHDF5 11 | 12 | # Learner (Brain) 13 | from chimp.learners.dqn_learner import DQNLearner 14 | from chimp.learners.chainer_backend import ChainerBackend 15 | 16 | # Agent Framework 17 | from chimp.agents import DQNAgent 18 | 19 | # Simulator 20 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator 21 | from chimp.simulators.mdp.cart_pole import CartPole 22 | 23 | # Rollout Policy 24 | from chimp.utils.policies import RandomPolicy 25 | 26 | import numpy as np 27 | import pickle 28 | import pylab as p 29 | 30 | import chainer 31 | import chainer.functions as F 32 | import chainer.links as L 33 | from chainer import Chain 34 | 35 | settings = { 36 | 37 | # agent settings 38 | 'batch_size' : 32, 39 | 'print_every' : 1000, 40 | 'save_dir' : 'results/cartpole-1', 41 | 'iterations' : 10000, 42 | 'eval_iterations' : 200, 43 | 'eval_every' : 1000, 44 | 'save_every' : 1000, 45 | 'initial_exploration' : 10000, 46 | 'epsilon_decay' : 0.000005, # subtract from epsilon every step 47 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 48 | 'epsilon' : 1.0, # Initial exploratoin rate 49 | 'learn_freq' : 1, 50 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 51 | 'model_dims' : (1,4), 52 | 53 | # simulator settings 54 | 'viz' : False, 55 | 56 | # replay memory settings 57 | 'memory_size' : 10000, # size of replay memory 58 | 'n_frames' : 1, # number of frames 59 | 60 | # learner settings 61 | 'learning_rate' : 0.00001, 62 | 'decay_rate' : 0.95, # decay rate for RMSprop, otherwise not used 63 | 'discount' : 0.99, # discount rate for RL 64 | 'clip_err' : False, # value to clip loss gradients to 65 | 'clip_reward' : False, # value to clip reward values to 66 | 'target_net_update' : 2000, # update the update-generating target net every fixed number of iterations 67 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 68 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 69 | 'gpu' : False, 70 | 'reward_rescale': False, 71 | 72 | # general 73 | 'seed_general' : 1723, 74 | 'seed_simulator' : 5632, 75 | 'seed_agent' : 9826, 76 | 'seed_memory' : 7563 77 | 78 | } 79 | 80 | mdp = CartPole() 81 | simulator = MDPSimulator(mdp) 82 | 83 | class CartNet(Chain): 84 | 85 | def __init__(self): 86 | super(CartNet, self).__init__( 87 | l1=F.Linear(4, 20, bias=0.0), 88 | l2=F.Linear(20, 10, bias=0.0), 89 | bn1=L.BatchNormalization(10), 90 | l3=F.Linear(10, 10), 91 | l4=F.Linear(10, 10), 92 | bn2=L.BatchNormalization(10), 93 | lout=F.Linear(10, simulator.n_actions) 94 | ) 95 | self.train = True 96 | # initialize avg_var to prevent divide by zero 97 | self.bn1.avg_var.fill(0.1), 98 | self.bn2.avg_var.fill(0.1), 99 | 100 | def __call__(self, ohist, ahist): 101 | h = F.relu(self.l1(ohist)) 102 | h = F.relu(self.l2(h)) 103 | h = self.bn1(h, test=not self.train) 104 | h = F.relu(self.l3(h)) 105 | h = F.relu(self.l4(h)) 106 | h = self.bn2(h, test=not self.train) 107 | output = self.lout(h) 108 | return output 109 | 110 | 111 | def pole_sim(nsteps, simulator, policy, verbose=False): 112 | mdp = simulator.model 113 | 114 | # re-initialize the model 115 | simulator.reset_episode() 116 | 117 | rtot = 0.0 118 | xpos = np.zeros(nsteps) 119 | thetas = np.zeros(nsteps) 120 | # run the simulation 121 | input_state = np.zeros((1,4), dtype=np.float32) 122 | for i in xrange(nsteps): 123 | state = simulator.get_screenshot() 124 | input_state[0] = state 125 | #a = policy.action((input_state,None)) 126 | a = policy.action(state) 127 | simulator.act(a) 128 | r = simulator.reward() 129 | rtot += r 130 | xpos[i], thetas[i] = state[0], state[2] 131 | print state, r 132 | if simulator.episode_over(): 133 | break 134 | return rtot, xpos, thetas 135 | 136 | 137 | class PoleCartHeuristic(): 138 | 139 | def __inti__(self): 140 | self.a = 0 141 | 142 | def action(self, state): 143 | if state[2] > 0: 144 | return 1 145 | else: 146 | return 0 147 | 148 | 149 | net = CartNet() 150 | 151 | # Initialize Learner with a Chainer backend 152 | backend = ChainerBackend(settings) 153 | backend.set_net(net) 154 | learner = DQNLearner(settings, backend) 155 | 156 | # Initialize memory 157 | memory = ReplayMemoryHDF5(settings) 158 | 159 | # Initialize Agent Framework 160 | agent = DQNAgent(learner, memory, simulator, settings) 161 | 162 | # Start training 163 | agent.train(verbose=True) 164 | 165 | #policy = RandomPolicy(simulator.n_actions) 166 | #policy = PoleCartHeuristic() 167 | 168 | #r, xs, ts = pole_sim(100, simulator, policy, verbose=True) 169 | 170 | #p.plot(xs); p.plot(10.0*ts) 171 | #p.show() 172 | -------------------------------------------------------------------------------- /examples/run_mountain_car.py: -------------------------------------------------------------------------------- 1 | """ 2 | File to initialize training. 3 | Contains settings, network definition for Chainer. 4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training. 5 | """ 6 | 7 | import numpy as np 8 | 9 | import chainer 10 | import chainer.functions as F 11 | import chainer.links as L 12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils 13 | from chainer import Link, Chain, ChainList 14 | 15 | # Memory 16 | from chimp.memories import ReplayMemoryHDF5 17 | 18 | # Learner (Brain) 19 | from chimp.learners.dqn_learner import DQNLearner 20 | from chimp.learners.chainer_backend import ChainerBackend 21 | 22 | # Agent Framework 23 | from chimp.agents import DQNAgent 24 | 25 | # Simulator 26 | from chimp.simulators.mdp.mdp_simulator import MDPSimulator 27 | from chimp.simulators.mdp.mountain_car import MountainCar 28 | 29 | # Rollout Policy 30 | from chimp.utils.policies import RandomPolicy 31 | 32 | 33 | settings = { 34 | 35 | # agent settings 36 | 'batch_size' : 32, 37 | 'print_every' : 1000, 38 | 'save_dir' : 'results/mountain_car', 39 | 'iterations' : 200000, 40 | 'eval_iterations' : 100, 41 | 'eval_every' : 1000, 42 | 'save_every' : 20000, 43 | 'initial_exploration' : 50000, 44 | 'epsilon_decay' : 0.000001, # subtract from epsilon every step 45 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 46 | 'epsilon' : 1.0, # Initial exploratoin rate 47 | 'learn_freq' : 1, 48 | 'history_sizes' : (1, 0, 0), # sizes of histories to use as nn inputs (o, a, r) 49 | 'model_dims' : (1,2), 50 | 51 | # simulator settings 52 | 'viz' : False, 53 | 54 | # replay memory settings 55 | 'memory_size' : 100000, # size of replay memory 56 | 'n_frames' : 1, # number of frames 57 | 58 | # learner settings 59 | 'learning_rate' : 0.00001, 60 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used 61 | 'discount' : 0.95, # discount rate for RL 62 | 'clip_err' : False, # value to clip loss gradients to 63 | 'clip_reward' : False, # value to clip reward values to 64 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations 65 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 66 | 'optim_name' : 'ADAM', # currently supports "RMSprop", "ADADELTA", "ADAM" and "SGD"' 67 | 'gpu' : False, 68 | 'reward_rescale': False, 69 | 70 | # general 71 | 'seed_general' : 1723, 72 | 'seed_simulator' : 5632, 73 | 'seed_agent' : 9826, 74 | 'seed_memory' : 7563 75 | 76 | } 77 | 78 | mdp = MountainCar() 79 | simulator = MDPSimulator(mdp) 80 | 81 | class CarNet(Chain): 82 | 83 | def __init__(self): 84 | super(CarNet, self).__init__( 85 | l1=F.Linear(settings['model_dims'][1], 20, bias=0.0), 86 | l2=F.Linear(20, 10, bias=0.0), 87 | bn1=L.BatchNormalization(10), 88 | l3=F.Linear(10, 10), 89 | l4=F.Linear(10, 10), 90 | bn2=L.BatchNormalization(10), 91 | lout=F.Linear(10, simulator.n_actions) 92 | ) 93 | self.train = True 94 | # initialize avg_var to prevent divide by zero 95 | self.bn1.avg_var.fill(0.1), 96 | self.bn2.avg_var.fill(0.1), 97 | 98 | 99 | def __call__(self, ohist, ahist): 100 | h = F.relu(self.l1(ohist)) 101 | h = F.relu(self.l2(h)) 102 | h = self.bn1(h, test=not self.train) 103 | h = F.relu(self.l3(h)) 104 | h = F.relu(self.l4(h)) 105 | h = self.bn2(h, test=not self.train) 106 | output = self.lout(h) 107 | return output 108 | 109 | 110 | net = CarNet() 111 | 112 | # Initialize Learner with a Chainer backend 113 | backend = ChainerBackend(settings) 114 | backend.set_net(net) 115 | learner = DQNLearner(settings, backend) 116 | 117 | # Initialize memory 118 | memory = ReplayMemoryHDF5(settings) 119 | 120 | # Initialize Agent Framework 121 | agent = DQNAgent(learner, memory, simulator, settings) 122 | 123 | # Start training 124 | agent.train(verbose=True) 125 | -------------------------------------------------------------------------------- /examples/run_tiger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | File to initialize training. 3 | Contains settings, network definition for Chainer. 4 | Creates the simulator, replay memory, DQN learner, and passes these to the agent framework for training. 5 | ''' 6 | 7 | import numpy as np 8 | 9 | import chainer 10 | import chainer.functions as F 11 | import chainer.links as L 12 | from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils 13 | from chainer import Link, Chain, ChainList 14 | from memories import ReplayMemoryHDF5 15 | 16 | from learners import Learner 17 | from agents import DQNAgent 18 | 19 | from simulators.pomdp import POMDPSimulator 20 | from simulators.pomdp import TigerPOMDP 21 | 22 | print('Setting training parameters...') 23 | # Set training settings 24 | settings = { 25 | # agent settings 26 | 'batch_size' : 32, 27 | 'print_every' : 5000, 28 | 'save_dir' : 'results/nets_tiger_observation', 29 | 'iterations' : 500000, 30 | 'eval_iterations' : 5000, 31 | 'eval_every' : 5000, 32 | 'save_every' : 5000, 33 | 'initial_exploration' : 10000, 34 | 'epsilon_decay' : 0.0001, # subtract from epsilon every step 35 | 'eval_epsilon' : 0, # epsilon used in evaluation, 0 means no random actions 36 | 'epsilon' : 1.0, # Initial exploratoin rate 37 | 'model_dims': (1,1), 38 | 'learn_freq' : 1, 39 | 40 | # simulator settings 41 | 'viz' : False, 42 | 43 | # replay memory settings 44 | 'memory_size' : 100000, # size of replay memory 45 | 'n_frames' : 5, # number of frames 46 | 47 | # learner settings 48 | 'learning_rate' : 0.001, 49 | 'decay_rate' : 0.99, # decay rate for RMSprop, otherwise not used 50 | 'discount' : 0.95, # discount rate for RL 51 | 'clip_err' : False, # value to clip loss gradients to 52 | 'clip_reward' : False, # value to clip reward values to 53 | 'target_net_update' : 1000, # update the update-generating target net every fixed number of iterations 54 | 'double_DQN' : False, # use Double DQN (based on Deep Mind paper) 55 | 'optim_name' : 'RMSprop', # currently supports "RMSprop", "ADADELTA" and "SGD"' 56 | 'gpu' : False, 57 | 'reward_rescale': False, 58 | 59 | # general 60 | 'seed_general' : 1723, 61 | 'seed_simulator' : 5632, 62 | 'seed_agent' : 9826, 63 | 'seed_memory' : 7563 64 | 65 | } 66 | 67 | print(settings) 68 | 69 | np.random.seed(settings["seed_general"]) 70 | 71 | print('Setting up simulator...') 72 | pomdp = TigerPOMDP( seed=settings['seed_simulator'] ) 73 | simulator = POMDPSimulator(pomdp, robs=True) 74 | 75 | settings['model_dims'] = simulator.model_dims 76 | 77 | print('Initializing replay memory...') 78 | memory = ReplayMemoryHDF5(settings) 79 | 80 | print('Setting up networks...') 81 | 82 | class Linear(Chain): 83 | 84 | def __init__(self): 85 | super(Linear, self).__init__( 86 | l1=F.Bilinear(settings["n_frames"], settings["n_frames"], 200), 87 | l2=F.Linear(200, 100, wscale=np.sqrt(2)), 88 | l3=F.Linear(100, 100, wscale=np.sqrt(2)), 89 | l4=F.Linear(100, 50, wscale=np.sqrt(2)), 90 | l5=F.Linear(50, simulator.n_actions, wscale = np.sqrt(2)) 91 | ) 92 | 93 | def __call__(self, s, action_history): 94 | h1 = F.relu(self.l1(s,action_history)) 95 | h2 = F.relu(self.l2(h1)) 96 | h3 = F.relu(self.l3(h2)) 97 | h4 = F.relu(self.l4(h3)) 98 | output = self.l5(h4) 99 | return output 100 | 101 | net = Linear() 102 | 103 | print('Initializing the learner...') 104 | learner = Learner(settings) 105 | learner.load_net(net) 106 | 107 | print('Initializing the agent framework...') 108 | agent = DQNAgent(settings) 109 | 110 | print('Training...') 111 | agent.train(learner, memory, simulator) 112 | 113 | print('Loading the net...') 114 | learner = agent.load(settings['save_dir']+'/learner_final.p') 115 | 116 | ind_max = learner.val_rewards.index(max(learner.val_rewards)) 117 | ind_net = settings['initial_exploration'] + ind_max * settings['eval_every'] 118 | agent.load_net(learner,settings['save_dir']+'/net_%d.p' % int(ind_net)) 119 | 120 | np.random.seed(settings["seed_general"]) 121 | 122 | print('Evaluating DQN agent...') 123 | print('(reward, MSE loss, mean Q-value, episodes - NA, time)') 124 | reward, MSE_loss, mean_Q_value, episodes, time, paths, actions, rewards = agent.evaluate(learner, simulator, 50000) 125 | print(reward, MSE_loss, mean_Q_value, episodes, time) 126 | -------------------------------------------------------------------------------- /logos/chimp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/chimp.png -------------------------------------------------------------------------------- /logos/monkey_text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sisl/Chimp/39aecc18a635ce2608b3f604310dedd738946574/logos/monkey_text.png -------------------------------------------------------------------------------- /roms/README.md: -------------------------------------------------------------------------------- 1 | # Put roms here -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import numpy 4 | 5 | """ 6 | This script creates a symbolic link to the chimp source code in your python's site-packages directory 7 | """ 8 | 9 | np_path = numpy.__file__ 10 | source_path = os.path.dirname(os.path.realpath("setup.py")) + "/chimp" 11 | 12 | np_split = np_path.split("/") 13 | target_path = '/'.join(np_split[:-2]) + "/chimp" 14 | 15 | # symlink to the site packages dir 16 | cmd = "ln -s " + source_path + " " + target_path 17 | 18 | subprocess.call([cmd], shell=True) 19 | 20 | 21 | --------------------------------------------------------------------------------