├── ADMCode ├── snuz │ ├── ars │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── shared_noise.py │ │ ├── optimizers.py │ │ ├── policies.py │ │ ├── lqr_env.py │ │ ├── run_policy.py │ │ ├── logz.py │ │ ├── filter.py │ │ └── ars.py │ ├── ppo │ │ ├── agents │ │ │ ├── __init__.py │ │ │ ├── reinforce.py │ │ │ └── ppo_gae.py │ │ ├── __init__.py │ │ ├── envs.py │ │ ├── utils.py │ │ ├── run_ppo.py │ │ ├── storage.py │ │ └── models.py │ └── __init__.py ├── foobar.txt ├── __init__.py ├── sdt.py ├── utils.py ├── ddm.py ├── believer_skeptic.py ├── qlearn.py ├── visualize.py └── neural.py ├── requirements.txt ├── notebooks ├── images │ ├── IGT.png │ ├── car.gif │ ├── bandit.png │ ├── attractor.mp4 │ ├── believer-skeptic.png │ ├── multichannel_selection.png │ ├── multichannel_selection.tiff │ ├── believer-skeptic_to_accumulation.png │ └── believer-skeptic_to_accumulation.tiff ├── Lab TEMPLATE.ipynb ├── Lab 5 - SNUZ.ipynb └── Homework 4.ipynb ├── data └── IGTCards.csv ├── LICENSE ├── .gitignore ├── setup.py └── README.md /ADMCode/snuz/ars/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/agents/reinforce.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | pandas 4 | matplotlib 5 | seaborn 6 | scikit-learn 7 | numba 8 | future 9 | -------------------------------------------------------------------------------- /notebooks/images/IGT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/IGT.png -------------------------------------------------------------------------------- /notebooks/images/car.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/car.gif -------------------------------------------------------------------------------- /notebooks/images/bandit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/bandit.png -------------------------------------------------------------------------------- /notebooks/images/attractor.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/attractor.mp4 -------------------------------------------------------------------------------- /notebooks/images/believer-skeptic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic.png -------------------------------------------------------------------------------- /notebooks/images/multichannel_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/multichannel_selection.png -------------------------------------------------------------------------------- /notebooks/images/multichannel_selection.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/multichannel_selection.tiff -------------------------------------------------------------------------------- /notebooks/images/believer-skeptic_to_accumulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic_to_accumulation.png -------------------------------------------------------------------------------- /notebooks/images/believer-skeptic_to_accumulation.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic_to_accumulation.tiff -------------------------------------------------------------------------------- /ADMCode/snuz/__init__.py: -------------------------------------------------------------------------------- 1 | from ADMCode.snuz.ppo.run_ppo import run_ppo 2 | from ADMCode.snuz.ars.ars import run_ars 3 | from ADMCode.snuz import ars 4 | from ADMCode.snuz import ppo 5 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | from ADMCode.snuz.ppo import models 2 | from ADMCode.snuz.ppo import agents 3 | from ADMCode.snuz.ppo import envs 4 | from ADMCode.snuz.ppo import storage 5 | -------------------------------------------------------------------------------- /ADMCode/foobar.txt: -------------------------------------------------------------------------------- 1 | Now let's evaluate the performance of the model using two metrics: 2 | 3 |
4 | 5 | **Payoff (P)** is the degree to which the agent chooses the High Value decks over the Low Value decks. This is a measure of efficient value-based decision-making. 6 | 7 | P = $\Sigma (C + D) - \Sigma (A + B)$ 8 | 9 | **Sensitivity (Q)** is the sensitivity of the agent to High Frequency rewards over Low Frequency rewards. 10 | 11 | Q = $\Sigma (B + D) - \Sigma (A + C)$ 12 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/envs.py: -------------------------------------------------------------------------------- 1 | """ 2 | From https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/envs.py 3 | """ 4 | 5 | import gym 6 | from gym.spaces.box import Box 7 | import cv2 8 | import numpy as np 9 | 10 | 11 | def make_env(env_id, seed, rank): 12 | def _thunk(): 13 | env = gym.make(env_id) 14 | env.seed(seed + rank) 15 | env = WrapPyTorch(env) 16 | return env 17 | 18 | return _thunk 19 | 20 | 21 | class WrapPyTorch(gym.ObservationWrapper): 22 | def __init__(self, env=None): 23 | super(WrapPyTorch, self).__init__(env) 24 | self.observation_space = Box(0, 255, [1, 84, 84], dtype=np.uint8) 25 | 26 | def observation(self, observation): 27 | x = cv2.resize(observation, (84, 84), interpolation=cv2.INTER_AREA) 28 | return np.expand_dims(x, 0) 29 | -------------------------------------------------------------------------------- /data/IGTCards.csv: -------------------------------------------------------------------------------- 1 | a,b,c,d 2 | 100,100,50,50 3 | 100,100,50,50 4 | -150,100,-25,50 5 | 100,100,50,50 6 | -300,100,-75,50 7 | 100,100,50,50 8 | -200,100,-25,50 9 | 100,100,50,50 10 | -250,-1250,-75,50 11 | -350,100,-50,-250 12 | 100,100,50,50 13 | -350,100,-25,50 14 | 100,100,-75,50 15 | -250,-1250,50,50 16 | -200,100,50,-250 17 | 100,100,50,50 18 | -300,100,-25,50 19 | -150,100,-75,50 20 | 100,100,50,50 21 | 100,100,-50,50 22 | 100,-1250,50,-250 23 | -300,100,50,50 24 | 100,100,50,50 25 | -350,100,-50,50 26 | 100,100,-25,50 27 | -200,100,-50,50 28 | -250,100,50,50 29 | -150,100,50,50 30 | 100,100,-75,50 31 | 100,100,-50,50 32 | -350,100,50,50 33 | -250,-1250,50,-250 34 | -250,100,50,50 35 | 100,100,-25,50 36 | 100,100,-25,50 37 | 100,100,50,50 38 | -150,100,-75,50 39 | -300,100,50,50 40 | 100,100,-50,50 41 | 100,100,-25,50 42 | 100,-1250,-50,-250 43 | -300,100,50,50 44 | 100,100,50,50 45 | -350,100,-50,50 46 | 100,100,-25,50 47 | -200,100,-50,50 48 | -250,100,50,50 49 | -150,100,50,50 50 | 100,100,-75,50 51 | 100,100,-50,50 52 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/utils.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | import numpy as np 5 | 6 | def itergroups(items, group_size): 7 | assert group_size >= 1 8 | group = [] 9 | for x in items: 10 | group.append(x) 11 | if len(group) == group_size: 12 | yield tuple(group) 13 | del group[:] 14 | if group: 15 | yield tuple(group) 16 | 17 | 18 | 19 | def batched_weighted_sum(weights, vecs, batch_size): 20 | total = 0 21 | num_items_summed = 0 22 | for batch_weights, batch_vecs in zip(itergroups(weights, batch_size), 23 | itergroups(vecs, batch_size)): 24 | assert len(batch_weights) == len(batch_vecs) <= batch_size 25 | total += np.dot(np.asarray(batch_weights, dtype=np.float64), 26 | np.asarray(batch_vecs, dtype=np.float64)) 27 | num_items_summed += len(batch_weights) 28 | return total, num_items_summed 29 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/shared_noise.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es 3 | 4 | import ray 5 | import numpy as np 6 | 7 | @ray.remote 8 | def create_shared_noise(): 9 | """ 10 | Create a large array of noise to be shared by all workers. Used 11 | for avoiding the communication of the random perturbations delta. 12 | """ 13 | 14 | seed = 12345 15 | count = 250000000 16 | noise = np.random.RandomState(seed).randn(count).astype(np.float64) 17 | return noise 18 | 19 | 20 | class SharedNoiseTable(object): 21 | def __init__(self, noise, seed = 11): 22 | 23 | self.rg = np.random.RandomState(seed) 24 | self.noise = noise 25 | assert self.noise.dtype == np.float64 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def sample_index(self, dim): 31 | return self.rg.randint(0, len(self.noise) - dim + 1) 32 | 33 | def get_delta(self, dim): 34 | idx = self.sample_index(dim) 35 | return idx, self.get(idx, dim) 36 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/optimizers.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | 10 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES 11 | class Optimizer(object): 12 | def __init__(self, w_policy): 13 | self.w_policy = w_policy.flatten() 14 | self.dim = w_policy.size 15 | self.t = 0 16 | 17 | def update(self, globalg): 18 | self.t += 1 19 | step = self._compute_step(globalg) 20 | ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5) 21 | return self.w_policy + step, ratio 22 | 23 | def _compute_step(self, globalg): 24 | raise NotImplementedError 25 | 26 | 27 | class SGD(Optimizer): 28 | def __init__(self, pi, stepsize): 29 | Optimizer.__init__(self, pi) 30 | self.stepsize = stepsize 31 | 32 | def _compute_step(self, globalg): 33 | step = -self.stepsize * globalg 34 | return step 35 | 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 CoAxLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ########################## 2 | ########################## 3 | ## radd .gitignore file ## 4 | ########################## 5 | ########################## 6 | 7 | 8 | # Compiled source # 9 | ################### 10 | *.com 11 | *.class 12 | *.dll 13 | *.exe 14 | *.o 15 | *.so 16 | 17 | 18 | # Packages # 19 | ############ 20 | # it's better to unpack these files and commit the raw source 21 | # git has its own built in compression methods 22 | *.7z 23 | *.dmg 24 | *.gz 25 | *.iso 26 | *.jar 27 | *.rar 28 | *.tar 29 | *.zip 30 | 31 | # Logs and databases # 32 | ###################### 33 | *.log 34 | *.sql 35 | *.sqlite 36 | 37 | # OS generated files # 38 | ###################### 39 | .DS_Store 40 | .DS_Store? 41 | ._* 42 | .Spotlight-V100 43 | .Trashes 44 | ehthumbs.db 45 | Thumbs.db 46 | 47 | # Temporary Files # 48 | ################### 49 | .ipynb_checkpoints/ 50 | ADMCode/__pycache__/ 51 | __pycache__/ 52 | (alias)/ 53 | *.pyc 54 | 55 | # Wheel build folder # 56 | ###################### 57 | build/ 58 | 59 | # Setuptools distribution folder # 60 | ################################## 61 | dist/ 62 | 63 | # Python egg metadata# 64 | ###################### 65 | *.egg-info 66 | *.egg 67 | -------------------------------------------------------------------------------- /ADMCode/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code and lab resources for "Neural and Cognitive Models of 3 | Adaptive Decision Making" course (2018) 4 | 5 | Authors: 6 | CoAxLab at github.com 7 | 8 | """ 9 | 10 | from __future__ import print_function 11 | import os 12 | import sys 13 | from glob import glob 14 | 15 | modules = glob(os.path.dirname(__file__) + "/*.py") 16 | __all__ = [os.path.basename(f)[:-3] for f in modules] 17 | 18 | major = 0 19 | minor = 5 20 | patch = 2 21 | __version__ = '.'.join([str(v) for v in [major, minor, patch]]) 22 | 23 | _package_dir = os.path.dirname(os.path.realpath(__file__)) 24 | 25 | def style_notebook(): 26 | from IPython.core.display import HTML 27 | _styles_dir = os.path.join(_package_dir, 'styles') 28 | style = os.path.join(_styles_dir, 'custom.css') 29 | csscontent = open(style, "r").read() 30 | return HTML(csscontent) 31 | 32 | 33 | def load_attractor_animation(): 34 | import io, base64 35 | from IPython.display import HTML 36 | _examples_dir = os.path.join(_package_dir, '../notebooks/images') 37 | mov_fpath = os.path.join(_examples_dir, 'attractor.mp4') 38 | video = io.open(mov_fpath, 'r+b').read() 39 | encoded = base64.b64encode(video) 40 | data=''''''.format(encoded.decode('ascii')) 41 | return HTML(data=data) 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import numpy as np 3 | import os 4 | 5 | package_data = {'ADMCode':['notebooks/*.ipynb', 'notebooks/images/*.png', 'notebooks/images/*.mp4', 'data/*.csv']} 6 | 7 | major = 0 8 | minor = 5 9 | patch = 2 10 | __version__ = '.'.join([str(v) for v in [major, minor, patch]]) 11 | 12 | setup( 13 | name='ADMCode', 14 | version=__version__, 15 | author='Kyle Dunovan, Timothy Verstynen', 16 | author_email='dunovank@gmail.com', 17 | url='http://github.com/CoAxLab/AdaptiveDecisionMaking_2018', 18 | packages=['ADMCode', 'ADMCode.snuz', 'ADMCode.snuz.ars', 'ADMCode.snuz.ppo', 'ADMCode.snuz.ppo.agents'], 19 | package_data=package_data, 20 | description='Code and lab resources for Neural and Cognitive Models of Adaptive Decision Making course (2018)', 21 | install_requires=['numpy', 'scipy', 'pandas', 'matplotlib', 'seaborn', 'scikit-learn', 'numba', 'future'], 22 | include_dirs = [np.get_include()], 23 | classifiers=[ 24 | 'Environment :: Console', 25 | 'Operating System :: OS Independent', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Development Status :: 3 - Alpha', 28 | 'Programming Language :: Python', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.4', 31 | 'Programming Language :: Python :: 3.6', 32 | 'Topic :: Scientific/Engineering', 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /ADMCode/sdt.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import pandas as pd 4 | from scipy.stats import norm 5 | from scipy.signal import detrend 6 | 7 | 8 | def sdt_mle(h, m, cr, fa): 9 | 10 | """Calculates maximum-likelihood estimates of sensitivity and bias. 11 | 12 | Args: 13 | h: Hits 14 | m: Misses 15 | cr: Correct Rejections. 16 | fa: False Alarms 17 | 18 | Returns: 19 | d (d-prime) 20 | c (criterion) 21 | 22 | """ 23 | 24 | H, M, CR, FA = h, m, cr, fa 25 | 26 | n0, n1 = float(FA + CR), float(H + M) 27 | if H == 0: H += 0.5 28 | if H == n1: H -= 0.5 29 | if FA == 0: FA += 0.5 30 | if FA == n0: FA -= 0.5 31 | 32 | pH = H / float(n1) 33 | pFA = FA / float(n0) 34 | d = norm.ppf(pH) - norm.ppf(pFA) 35 | c = -0.5 * (norm.ppf(pH) + norm.ppf(pFA)) 36 | 37 | return d, c 38 | 39 | 40 | 41 | def analyze_yesno(sdtData): 42 | 43 | hits, misses, cr, fa = sdtData[['H','M','CR','FA']].sum().values 44 | 45 | numSignal = hits + misses 46 | numNoise = cr + fa 47 | signalAcc = hits/numSignal 48 | noiseAcc = cr/numNoise 49 | 50 | d, c = sdt_mle(hits, misses, cr, fa) 51 | 52 | print("Signal Accuracy = {:.0f}%".format(signalAcc*100)) 53 | print("\tHits = {}".format(hits)) 54 | print("\tMisses = {}\n".format(misses)) 55 | 56 | print("Noise Accuracy = {:.0f}%".format(noiseAcc*100)) 57 | print("\tCorr. Rej. = {}".format(cr)) 58 | print("\tFalse Alarms = {}\n".format(fa)) 59 | 60 | print("d-prime (d') = {:.2f}".format(d)) 61 | print("criterion (c) = {:.2f}".format(c)) 62 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/policies.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Policy class for computing action from weights and observation vector. 3 | Horia Mania --- hmania@berkeley.edu 4 | Aurelia Guy 5 | Benjamin Recht 6 | ''' 7 | 8 | import numpy as np 9 | from ADMCode.snuz.ars.filter import get_filter 10 | 11 | 12 | class Policy(object): 13 | def __init__(self, policy_params): 14 | 15 | self.ob_dim = policy_params['ob_dim'] 16 | self.ac_dim = policy_params['ac_dim'] 17 | self.weights = np.empty(0) 18 | 19 | # a filter for updating statistics of the observations and normalizing inputs to the policies 20 | self.observation_filter = get_filter( 21 | policy_params['ob_filter'], shape=(self.ob_dim, )) 22 | self.update_filter = True 23 | 24 | def update_weights(self, new_weights): 25 | self.weights[:] = new_weights[:] 26 | return 27 | 28 | def get_weights(self): 29 | return self.weights 30 | 31 | def get_observation_filter(self): 32 | return self.observation_filter 33 | 34 | def act(self, ob): 35 | raise NotImplementedError 36 | 37 | def copy(self): 38 | raise NotImplementedError 39 | 40 | 41 | class LinearPolicy(Policy): 42 | """ 43 | Linear policy class that computes action as . 44 | """ 45 | 46 | def __init__(self, policy_params): 47 | Policy.__init__(self, policy_params) 48 | self.weights = np.zeros((self.ac_dim, self.ob_dim), dtype=np.float64) 49 | 50 | def act(self, ob): 51 | ob = self.observation_filter(ob, update=self.update_filter) 52 | return np.dot(self.weights, ob) 53 | 54 | def get_weights_plus_stats(self): 55 | 56 | mu, std = self.observation_filter.get_stats() 57 | aux = np.asarray([self.weights, mu, std]) 58 | return aux 59 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/lqr_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | import numpy as np 5 | from os import path 6 | 7 | class LQR_Env(gym.Env): 8 | 9 | def __init__(self): 10 | 11 | self.viewer = None 12 | 13 | self.A = np.array([[1.01, 0.01, 0.0],[0.01, 1.01, 0.01], [0., 0.01, 1.01]]) 14 | self.B = np.eye(3) 15 | 16 | self.d, self.p = self.B.shape 17 | 18 | self.R = np.eye(self.p) 19 | self.Q = np.eye(self.d) / 1000 20 | 21 | self.time = 0 22 | 23 | self.action_space = spaces.Box(low=-1e+8, high=1e+8, shape=(self.p,)) 24 | self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(self.d, )) 25 | 26 | self.state = np.random.normal(0,1,size = self.d) 27 | 28 | self._seed() 29 | 30 | 31 | def _seed(self, seed=None): 32 | self.np_random, seed = seeding.np_random(seed) 33 | return [seed] 34 | 35 | def _step(self,u): 36 | 37 | x = self.state 38 | 39 | cost = np.dot(x, np.dot(self.Q, x)) + np.dot(u, np.dot(self.R, u)) 40 | new_x = np.dot(self.A, x) + np.dot(self.B, u) + self.np_random.normal(0,1,size = self.d) 41 | 42 | self.state = new_x 43 | 44 | terminated = False 45 | if self.time > 300: 46 | terminated = True 47 | 48 | self.time += 1 49 | 50 | return self._get_obs(), - cost, terminated, {} 51 | 52 | def _reset(self): 53 | self.state = self.np_random.normal(0, 1, size = self.d) 54 | self.last_u = None 55 | self.time = 0 56 | 57 | return self._get_obs() 58 | 59 | def _get_obs(self): 60 | return self.state 61 | 62 | def get_params(self): 63 | return self.A, self.B, self.Q, self.R 64 | -------------------------------------------------------------------------------- /ADMCode/utils.py: -------------------------------------------------------------------------------- 1 | #!usr/bin/env python 2 | from __future__ import division 3 | import pandas as pd 4 | import numpy as np 5 | from future.utils import listvalues 6 | from scipy.stats.stats import sem 7 | 8 | def get_optimal_auc(df, nblocks=25, verbose=False, as_percent=True): 9 | xdf = blockify_trials(df, nblocks=nblocks) 10 | muOptDF = xdf.groupby(['agent', 'block']).mean().reset_index() 11 | auc = pd.pivot_table(muOptDF, values='optimal', index='block').values.sum() 12 | if as_percent: 13 | auc = (auc / nblocks) * 100 14 | if verbose: 15 | print("Optimal Choice: {:.2f}".format(auc)) 16 | 17 | return auc 18 | 19 | def analyze_bandits(df, nblocks=25, get_err=False): 20 | xdf = blockify_trials(df, nblocks=nblocks) 21 | optDF = xdf.groupby(['agent', 'block']).mean().reset_index() 22 | muOpt = pd.pivot_table(optDF, values='optimal', index='block').values 23 | #muOpt = pd.pivot_table(optDF, values='optimal', index='block').rolling(window=15) 24 | #rolling_mean = muOpt.mean() 25 | muOpt = np.hstack(muOpt) 26 | if get_err: 27 | errOpt = pd.pivot_table(optDF, values='optimal', index='block', aggfunc=sem).values*1.96 28 | errOpt = np.hstack(errOpt) 29 | else: 30 | errOpt = np.zeros_like(muOpt) 31 | return muOpt, errOpt 32 | 33 | 34 | def blockify_trials(data, nblocks=5, conds=None, groups=['agent']): 35 | 36 | datadf = data.copy() 37 | if conds is not None: 38 | if type(conds) is str: 39 | conds = [conds] 40 | groups = groups + conds 41 | 42 | idxdflist = [] 43 | for dfinfo, idxdf in datadf.groupby(groups): 44 | ixblocks = np.array_split(idxdf.trial.values, nblocks) 45 | blocks = np.hstack([[i+1]*arr.size for i, arr in enumerate(ixblocks)]) 46 | idxdf = idxdf.copy() 47 | colname = 'block' 48 | idxdf[colname] = blocks 49 | idxdflist.append(idxdf) 50 | 51 | return pd.concat(idxdflist) 52 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/run_policy.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Code to load a policy and generate rollout data. Adapted from https://github.com/berkeleydeeprlcourse. 4 | Example usage: 5 | python run_policy.py ../trained_policies/Humanoid-v1/policy_reward_11600/lin_policy_plus.npz Humanoid-v1 --render \ 6 | --num_rollouts 20 7 | """ 8 | import numpy as np 9 | import gym 10 | 11 | def main(): 12 | import argparse 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('expert_policy_file', type=str) 15 | parser.add_argument('envname', type=str) 16 | parser.add_argument('--render', action='store_true') 17 | parser.add_argument('--num_rollouts', type=int, default=20, 18 | help='Number of expert rollouts') 19 | args = parser.parse_args() 20 | 21 | print('loading and building expert policy') 22 | lin_policy = np.load(args.expert_policy_file) 23 | lin_policy = lin_policy.items()[0][1] 24 | 25 | M = lin_policy[0] 26 | # mean and std of state vectors estimated online by ARS. 27 | mean = lin_policy[1] 28 | std = lin_policy[2] 29 | 30 | env = gym.make(args.envname) 31 | 32 | returns = [] 33 | observations = [] 34 | actions = [] 35 | for i in range(args.num_rollouts): 36 | print('iter', i) 37 | obs = env.reset() 38 | done = False 39 | totalr = 0. 40 | steps = 0 41 | while not done: 42 | action = np.dot(M, (obs - mean)/std) 43 | observations.append(obs) 44 | actions.append(action) 45 | 46 | 47 | obs, r, done, _ = env.step(action) 48 | totalr += r 49 | steps += 1 50 | if args.render: 51 | env.render() 52 | if steps % 100 == 0: print("%i/%i"%(steps, env.spec.timestep_limit)) 53 | if steps >= env.spec.timestep_limit: 54 | break 55 | returns.append(totalr) 56 | 57 | print('returns', returns) 58 | print('mean return', np.mean(returns)) 59 | print('std of return', np.std(returns)) 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /notebooks/Lab TEMPLATE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 117, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from __future__ import division\n", 10 | "import ADMCode\n", 11 | "from ADMCode import visualize as vis\n", 12 | "# from ADMCode import \n", 13 | "\n", 14 | "import numpy as np\n", 15 | "import pandas as pd\n", 16 | "\n", 17 | "# from ipywidgets import interactive\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import seaborn as sns\n", 20 | "import warnings\n", 21 | "\n", 22 | "warnings.simplefilter('ignore', np.RankWarning)\n", 23 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n", 24 | "warnings.filterwarnings(\"ignore\")\n", 25 | "sns.set(style='white', font_scale=1.3)\n", 26 | "\n", 27 | "%matplotlib inline" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Section Header" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Section Header" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Section Header" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 3", 62 | "language": "python", 63 | "name": "python3" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 3 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython3", 75 | "version": "3.6.5" 76 | }, 77 | "latex_envs": { 78 | "LaTeX_envs_menu_present": true, 79 | "autocomplete": true, 80 | "bibliofile": "biblio.bib", 81 | "cite_by": "apalike", 82 | "current_citInitial": 1, 83 | "eqLabelWithNumbers": true, 84 | "eqNumInitial": 1, 85 | "hotkeys": { 86 | "equation": "Ctrl-E", 87 | "itemize": "Ctrl-I" 88 | }, 89 | "labels_anchors": false, 90 | "latex_user_defs": false, 91 | "report_style_numbering": false, 92 | "user_envs_cfg": false 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 2 97 | } 98 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/logz.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/berkeleydeeprlcourse 3 | 4 | import json 5 | 6 | """ 7 | 8 | Some simple logging functionality, inspired by rllab's logging. 9 | Assumes that each diagnostic gets logged each iteration 10 | 11 | Call logz.configure_output_dir() to start logging to a 12 | tab-separated-values file (some_folder_name/log.txt) 13 | 14 | """ 15 | 16 | import os.path as osp, shutil, time, atexit, os, subprocess 17 | 18 | color2num = dict( 19 | gray=30, 20 | red=31, 21 | green=32, 22 | yellow=33, 23 | blue=34, 24 | magenta=35, 25 | cyan=36, 26 | white=37, 27 | crimson=38 28 | ) 29 | 30 | def colorize(string, color, bold=False, highlight=False): 31 | attr = [] 32 | num = color2num[color] 33 | if highlight: num += 10 34 | attr.append(str(num)) 35 | if bold: attr.append('1') 36 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) 37 | 38 | class G(object): 39 | output_dir = None 40 | output_file = None 41 | first_row = True 42 | log_headers = [] 43 | log_current_row = {} 44 | 45 | def configure_output_dir(d=None): 46 | """ 47 | Set output directory to d, or to /tmp/somerandomnumber if d is None 48 | """ 49 | G.first_row = True 50 | G.log_headers = [] 51 | G.log_current_row = {} 52 | 53 | G.output_dir = d or "/tmp/experiments/%i"%int(time.time()) 54 | if not osp.exists(G.output_dir): 55 | os.makedirs(G.output_dir) 56 | G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w') 57 | atexit.register(G.output_file.close) 58 | print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True)) 59 | 60 | def log_tabular(key, val): 61 | """ 62 | Log a value of some diagnostic 63 | Call this once for each diagnostic quantity, each iteration 64 | """ 65 | if G.first_row: 66 | G.log_headers.append(key) 67 | else: 68 | assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key 69 | assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key 70 | G.log_current_row[key] = val 71 | 72 | 73 | def save_params(params): 74 | with open(osp.join(G.output_dir, "params.json"), 'w') as out: 75 | out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True)) 76 | 77 | 78 | def dump_tabular(): 79 | """ 80 | Write all of the diagnostics from the current iteration 81 | """ 82 | vals = [] 83 | key_lens = [len(key) for key in G.log_headers] 84 | max_key_len = max(15,max(key_lens)) 85 | keystr = '%'+'%d'%max_key_len 86 | fmt = "| " + keystr + "s | %15s |" 87 | n_slashes = 22 + max_key_len 88 | print("-"*n_slashes) 89 | for key in G.log_headers: 90 | val = G.log_current_row.get(key, "") 91 | if hasattr(val, "__float__"): valstr = "%8.3g"%val 92 | else: valstr = val 93 | print(fmt%(key, valstr)) 94 | vals.append(val) 95 | print("-"*n_slashes) 96 | if G.output_file is not None: 97 | if G.first_row: 98 | G.output_file.write("\t".join(G.log_headers)) 99 | G.output_file.write("\n") 100 | G.output_file.write("\t".join(map(str,vals))) 101 | G.output_file.write("\n") 102 | G.output_file.flush() 103 | G.log_current_row.clear() 104 | G.first_row=False 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AdaptiveDecisionMaking_2018 (ADM) 2 | Repository for code and lab resources for "Neural and Cognitive Models of Adaptive Decision Making" course (2018) 3 | 4 | 5 | ### Jupyter notebooks [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/CoAxLab/AdaptiveDecisionMaking_2018/master) 6 | Click on binder badge above to run jupyter notebooks for labs and homework. Or download the ipynb files [**here**](https://nbviewer.jupyter.org/github/CoAxLab/AdaptiveDecisionMaking_2018/tree/master/notebooks/) to run locally. 7 | 8 | 9 | ## Instructions for getting started 10 | #### Install **Anaconda** with **Python 3.6**: 11 | - [**OSX**](https://www.anaconda.com/download/#macos) 12 | - [**Linux**](https://www.anaconda.com/download/#linux) 13 | - [**Windows**](https://www.anaconda.com/download/#windows) 14 | 15 | #### Confirm installs 16 | ```bash 17 | # check that your system is now using Anaconda's python 18 | which python 19 | ``` 20 | ```bash 21 | # and that you installed Python 3.6 22 | python -V 23 | ``` 24 | 25 | 26 | 27 | ## Install ADMCode package 28 | [**ADMCode**](https://pypi.org/project/ADMCode/) is a python package with custom code that can be used to complete the labs and homeworks (which will both be in the form of jupyter notebooks) 29 | ```bash 30 | pip install --upgrade ADMCode 31 | ``` 32 | 33 | 34 | 35 | ## Working with `git` 36 | Git is full of weird nonsense terminology. [**This tutorial**](http://rogerdudler.github.io/git-guide/) is a super useful resource for understanding how to use it. 37 | 38 | - If you don't already have a github account, create one [**here**](https://github.com) 39 | - Install git command-line tools (see *setup* section [**here**](http://rogerdudler.github.io/git-guide/)) 40 | 41 | #### Clone ADMCode 42 | * Open a terminal and `cd` to a directory where you want to download the ADMCode repo (example: `cd ~/Dropbox/Git/`) 43 | * Next, use `git` to `clone` the *remote* ADMCode repository to create a *local* repo on your machine 44 | ```bash 45 | # make sure you've done steps 1 and 2 46 | # before executing this in your terminal 47 | git clone https://github.com/CoAxLab/AdaptiveDecisionMaking_2018.git 48 | ``` 49 | 50 | #### Pull updates 51 | * Use `git pull` to update your local repo with any changes to the *remote* ADMCode repo 52 | * In the command below, `origin` is the default name pointing to the remote repo on Github 53 | * `master` is the `branch` of the remote that you want to sync with 54 | ```bash 55 | # first cd into your local ADMCode repo 56 | # (same directory as step 1 in "Clone ADMCode" ^^^) 57 | git pull origin master 58 | ``` 59 | 60 | ## Useful resources 61 | - [**Anaconda distribution**](https://www.anaconda.com/): package management for scientific python (& R) 62 | - [**Jupyter**](http://jupyter.org/): interactive python interpreter in your browser ([tutorial](https://medium.com/codingthesmartway-com-blog/getting-started-with-jupyter-notebook-for-python-4e7082bd5d46)) 63 | - [**pandas**](http://pandas.pydata.org/pandas-docs/stable/): tabular dataframe manager ([tutorial](https://medium.com/init27-labs/intro-to-pandas-and-numpy-532a2d5293c8)) 64 | - [**numpy**](http://www.numpy.org/): numerical computing library ([tutorial](https://www.machinelearningplus.com/python/101-numpy-exercises-python/)) 65 | - [**scikit-learn**](http://scikit-learn.org/stable/): data science and machine learning library ([tutorial](http://ogrisel.github.io/scikit-learn.org/sklearn-tutorial/tutorial/text_analytics/general_concepts.html)) 66 | - [**matplotlib**](https://matplotlib.org/index.html): plotting and visualization library ([tutorial](https://www.datacamp.com/community/tutorials/matplotlib-tutorial-python)) 67 | - [**seaborn**](https://seaborn.pydata.org/): wrapper for making matplotlib pretty, plays nice w/ pandas ([tutorial](https://elitedatascience.com/python-seaborn-tutorial)) 68 | - [**and more...** ](https://docs.anaconda.com/anaconda/packages/pkg-docs/) 69 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | 4 | import numpy as np 5 | from collections import deque 6 | 7 | 8 | # ---------------------------------------------------------------------------- 9 | # Code from: 10 | # https://github.com/reinforcement-learning-kr/pg_travel/blob/master/mujoco/utils/utils.py 11 | def get_action(mu, std): 12 | action = torch.normal(mu, std) 13 | action = action.data.numpy() 14 | return action 15 | 16 | 17 | def log_probability(x, mu, std, logstd): 18 | var = std.pow(2) 19 | log_density = ( 20 | -(x - mu).pow(2) / (2 * var) - 0.5 * math.log(2 * math.pi) - logstd) 21 | return log_density.sum(1, keepdim=True) 22 | 23 | 24 | def flat_grad(grads): 25 | grad_flatten = [] 26 | for grad in grads: 27 | grad_flatten.append(grad.view(-1)) 28 | grad_flatten = torch.cat(grad_flatten) 29 | return grad_flatten 30 | 31 | 32 | def flat_hessian(hessians): 33 | hessians_flatten = [] 34 | for hessian in hessians: 35 | hessians_flatten.append(hessian.contiguous().view(-1)) 36 | hessians_flatten = torch.cat(hessians_flatten).data 37 | return hessians_flatten 38 | 39 | 40 | def flat_params(model): 41 | params = [] 42 | for param in model.parameters(): 43 | params.append(param.data.view(-1)) 44 | params_flatten = torch.cat(params) 45 | return params_flatten 46 | 47 | 48 | def update_model(model, new_params): 49 | index = 0 50 | for params in model.parameters(): 51 | params_length = len(params.view(-1)) 52 | new_param = new_params[index:index + params_length] 53 | new_param = new_param.view(params.size()) 54 | params.data.copy_(new_param) 55 | index += params_length 56 | 57 | 58 | def kl_divergence(new_actor, old_actor, states): 59 | mu, std, logstd = new_actor(torch.Tensor(states)) 60 | mu_old, std_old, logstd_old = old_actor(torch.Tensor(states)) 61 | mu_old = mu_old.detach() 62 | std_old = std_old.detach() 63 | logstd_old = logstd_old.detach() 64 | 65 | # kl divergence between old policy and new policy : D( pi_old || pi_new ) 66 | # pi_old -> mu0, logstd0, std0 / pi_new -> mu, logstd, std 67 | # be careful of calculating KL-divergence. It is not symmetric metric 68 | kl = logstd_old - logstd + (std_old.pow(2) + (mu_old - mu).pow(2)) / \ 69 | (2.0 * std.pow(2)) - 0.5 70 | return kl.sum(1, keepdim=True) 71 | 72 | 73 | def save_checkpoint(state, filename='checkpoint.pth.tar'): 74 | torch.save(state, filename) 75 | 76 | 77 | # from https://github.com/joschu/modular_rl 78 | # http://www.johndcook.com/blog/standard_deviation/ 79 | class RunningStat(object): 80 | def __init__(self, shape): 81 | self._n = 0 82 | self._M = np.zeros(shape) 83 | self._S = np.zeros(shape) 84 | 85 | def push(self, x): 86 | x = np.asarray(x) 87 | assert x.shape == self._M.shape 88 | self._n += 1 89 | if self._n == 1: 90 | self._M[...] = x 91 | else: 92 | oldM = self._M.copy() 93 | self._M[...] = oldM + (x - oldM) / self._n 94 | self._S[...] = self._S + (x - oldM) * (x - self._M) 95 | 96 | @property 97 | def n(self): 98 | return self._n 99 | 100 | @n.setter 101 | def n(self, n): 102 | self._n = n 103 | 104 | @property 105 | def mean(self): 106 | return self._M 107 | 108 | @mean.setter 109 | def mean(self, M): 110 | self._M = M 111 | 112 | @property 113 | def sum_square(self): 114 | return self._S 115 | 116 | @sum_square.setter 117 | def sum_square(self, S): 118 | self._S = S 119 | 120 | @property 121 | def var(self): 122 | return self._S / (self._n - 1) if self._n > 1 else np.square(self._M) 123 | 124 | @property 125 | def std(self): 126 | return np.sqrt(self.var) 127 | 128 | @property 129 | def shape(self): 130 | return self._M.shape 131 | 132 | 133 | class ZFilter: 134 | """ 135 | y = (x-mean)/std 136 | using running estimates of mean,std 137 | """ 138 | 139 | def __init__(self, shape, demean=True, destd=True, clip=10.0): 140 | self.demean = demean 141 | self.destd = destd 142 | self.clip = clip 143 | 144 | self.rs = RunningStat(shape) 145 | 146 | def __call__(self, x, update=True): 147 | if update: self.rs.push(x) 148 | if self.demean: 149 | x = x - self.rs.mean 150 | if self.destd: 151 | x = x / (self.rs.std + 1e-8) 152 | if self.clip: 153 | x = np.clip(x, -self.clip, self.clip) 154 | return x 155 | 156 | def output_shape(self, input_space): 157 | return input_space.shape -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/agents/ppo_gae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from ADMCode.snuz.ppo.utils import log_probability 4 | 5 | 6 | class Hyperparameters: 7 | gamma = 0.99 8 | lam = 0.98 9 | actor_hidden1 = 64 10 | actor_hidden2 = 64 11 | actor_hidden3 = 64 12 | critic_hidden1 = 64 13 | critic_lr = 0.0003 14 | actor_lr = 0.0003 15 | batch_size = 64 16 | l2_rate = 0.001 17 | clip_param = 0.2 18 | num_training_epochs = 10 19 | num_episodes = 100 20 | num_memories = 24 21 | num_training_epochs = 10 22 | clip_actions = True 23 | clip_std = 1.0 #0.25 24 | seed_value = None 25 | 26 | 27 | def get_returns(rewards, masks, values, hp): 28 | rewards = torch.Tensor(rewards) 29 | masks = torch.Tensor(masks) 30 | returns = torch.zeros_like(rewards) 31 | advantages = torch.zeros_like(rewards) 32 | 33 | running_returns = 0 34 | previous_value = 0 35 | running_advants = 0 36 | 37 | for t in reversed(range(0, len(rewards))): 38 | running_returns = rewards[t] + hp.gamma * running_returns * masks[t] 39 | running_tderror = ( 40 | rewards[t] + hp.gamma * previous_value * masks[t] - values.data[t]) 41 | running_advants = ( 42 | running_tderror + hp.gamma * hp.lam * running_advants * masks[t]) 43 | 44 | returns[t] = running_returns 45 | previous_value = values.data[t] 46 | advantages[t] = running_advants 47 | 48 | advantages = (advantages - advantages.mean()) / advantages.std() 49 | return returns, advantages 50 | 51 | 52 | def surrogate_loss(actor, advantages, states, old_policy, actions, index): 53 | mu, std, logstd = actor(torch.Tensor(states)) 54 | new_policy = log_probability(actions, mu, std, logstd) 55 | old_policy = old_policy[index] 56 | 57 | ratio = torch.exp(new_policy - old_policy) 58 | surrogate = ratio * advantages 59 | return surrogate, ratio 60 | 61 | 62 | def train_model(actor, 63 | critic, 64 | memory, 65 | actor_optim, 66 | critic_optim, 67 | hp, 68 | num_training_epochs=10): 69 | memory = np.array(memory) 70 | states = np.vstack(memory[:, 0]) 71 | actions = list(memory[:, 1]) 72 | rewards = list(memory[:, 2]) 73 | masks = list(memory[:, 3]) 74 | values = critic(torch.Tensor(states)) 75 | 76 | # ---------------------------- 77 | # step 1: get returns and GAEs and log probability of old policy 78 | returns, advantages = get_returns(rewards, masks, values, hp) 79 | mu, std, logstd = actor(torch.Tensor(states)) 80 | old_policy = log_probability(torch.Tensor(actions), mu, std, logstd) 81 | old_values = critic(torch.Tensor(states)) 82 | 83 | criterion = torch.nn.MSELoss() 84 | n = len(states) 85 | arr = np.arange(n) 86 | 87 | # ---------------------------- 88 | # step 2: get value loss and actor loss and update actor & critic 89 | for epoch in range(num_training_epochs): 90 | np.random.shuffle(arr) 91 | 92 | for i in range(n // hp.batch_size): 93 | batch_index = arr[hp.batch_size * i:hp.batch_size * (i + 1)] 94 | batch_index = torch.LongTensor(batch_index) 95 | inputs = torch.Tensor(states)[batch_index] 96 | returns_samples = returns.unsqueeze(1)[batch_index] 97 | advantages_samples = advantages.unsqueeze(1)[batch_index] 98 | actions_samples = torch.Tensor(actions)[batch_index] 99 | oldvalue_samples = old_values[batch_index].detach() 100 | 101 | loss, ratio = surrogate_loss(actor, advantages_samples, inputs, 102 | old_policy.detach(), actions_samples, 103 | batch_index) 104 | 105 | values = critic(inputs) 106 | clipped_values = oldvalue_samples + \ 107 | torch.clamp(values - oldvalue_samples, 108 | -hp.clip_param, 109 | hp.clip_param) 110 | critic_loss1 = criterion(clipped_values, returns_samples) 111 | critic_loss2 = criterion(values, returns_samples) 112 | critic_loss = torch.max(critic_loss1, critic_loss2).mean() 113 | 114 | clipped_ratio = torch.clamp(ratio, 1.0 - hp.clip_param, 115 | 1.0 + hp.clip_param) 116 | clipped_loss = clipped_ratio * advantages_samples 117 | actor_loss = -torch.min(loss, clipped_loss).mean() 118 | 119 | loss = actor_loss + 0.5 * critic_loss 120 | 121 | critic_optim.zero_grad() 122 | loss.backward(retain_graph=True) 123 | critic_optim.step() 124 | 125 | actor_optim.zero_grad() 126 | loss.backward() 127 | actor_optim.step() -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/run_ppo.py: -------------------------------------------------------------------------------- 1 | """Test games with flowing actions.""" 2 | import os 3 | import errno 4 | 5 | from collections import deque 6 | 7 | import gym 8 | from gym import wrappers 9 | 10 | import numpy as np 11 | import torch 12 | import torch.optim as optim 13 | 14 | from ADMCode.snuz.ppo.models import Actor3Sigma 15 | from ADMCode.snuz.ppo.models import Critic3 16 | from ADMCode.snuz.ppo.utils import get_action 17 | from ADMCode.snuz.ppo.utils import save_checkpoint 18 | from ADMCode.snuz.ppo.utils import ZFilter 19 | 20 | 21 | def run_ppo(env_name='MountainCarContinuous-v0', 22 | update_every=100, 23 | save=None, 24 | progress=True, 25 | debug=False, 26 | render=False, 27 | **algorithm_hyperparameters): 28 | 29 | # ------------------------------------------------------------------------ 30 | from ADMCode.snuz.ppo.agents.ppo_gae import train_model 31 | from ADMCode.snuz.ppo.agents.ppo_gae import Hyperparameters 32 | 33 | # and its hyperparams 34 | hp = Hyperparameters() 35 | for k, v in algorithm_hyperparameters.items(): 36 | setattr(hp, k, v) 37 | 38 | # ------------------------------------------------------------------------ 39 | # Setup the world 40 | prng = np.random.RandomState(hp.seed_value) 41 | 42 | env = gym.make(env_name) 43 | env.seed(hp.seed_value) 44 | 45 | num_inputs = env.observation_space.shape[0] 46 | running_state = ZFilter((num_inputs, ), clip=5) 47 | num_actions = env.action_space.shape[0] 48 | 49 | # ------------------------------------------------------------------------ 50 | # Actor-critic init 51 | actor = Actor3Sigma(num_inputs, num_actions, hp, max_std=hp.clip_std) 52 | critic = Critic3(num_inputs, hp) 53 | 54 | actor_optim = optim.Adam(actor.parameters(), lr=hp.actor_lr) 55 | critic_optim = optim.Adam( 56 | critic.parameters(), lr=hp.critic_lr, weight_decay=hp.l2_rate) 57 | 58 | # ------------------------------------------------------------------------ 59 | # Play many games 60 | episode = 0 61 | episodes_scores = [] 62 | for n_e in range(hp.num_episodes): 63 | # Re-init 64 | actor.eval() 65 | critic.eval() 66 | memory = deque() 67 | 68 | # - 69 | scores = [] 70 | steps = 0 71 | for n_m in range(hp.num_memories): 72 | episode += 1 73 | state = env.reset() 74 | state = running_state(state) 75 | 76 | score = 0 77 | done = False 78 | while not done: 79 | if render: 80 | env.render() 81 | 82 | # Move 83 | steps += 1 84 | mu, std, _ = actor(torch.Tensor(state).unsqueeze(0)) 85 | action = get_action(mu, std)[0] # Flattens too 86 | action_std = std.clone().detach().numpy().flatten( 87 | ) # Match action 88 | 89 | if hp.clip_actions: 90 | action = np.clip(action, env.action_space.low, 91 | env.action_space.high) 92 | 93 | next_state, reward, done, _ = env.step(action) 94 | next_state = running_state(next_state) 95 | 96 | # Process outcome 97 | if done: 98 | mask = 0 99 | else: 100 | mask = 1 101 | 102 | # Save/update 103 | memory.append([state, action, reward, mask, action_std]) 104 | score += reward 105 | scores.append(score) 106 | 107 | # Shift 108 | state = next_state 109 | 110 | if debug and (n_m % update_every) == 0: 111 | print(">>> Mem. {}".format(n_m)) 112 | print(">>> Last score {}".format(score)) 113 | print(">>> Mu, Sigma ({}, {})".format(mu.tolist(), 114 | std.tolist())) 115 | 116 | score_avg = np.mean(scores) 117 | if progress: 118 | print(">>> Episode {} avg. score {}".format(n_e, score_avg)) 119 | episodes_scores.append(score_avg) 120 | 121 | # -------------------------------------------------------------------- 122 | # Learn! 123 | actor.train() 124 | critic.train() 125 | train_model( 126 | actor, 127 | critic, 128 | memory, 129 | actor_optim, 130 | critic_optim, 131 | hp, 132 | num_training_epochs=hp.num_training_epochs) 133 | 134 | # -------------------------------------------------------------------- 135 | if (save is not None) and (n_e % update_every) == 0: 136 | save_checkpoint({ 137 | 'actor': actor.state_dict(), 138 | 'critic': critic.state_dict(), 139 | 'z_filter_n': running_state.rs.n, 140 | 'z_filter_m': running_state.rs.mean, 141 | 'z_filter_s': running_state.rs.sum_square, 142 | 'score': score_avg 143 | }, 144 | filename=save + "_ep_{}.pytorch.tar".format(n_e)) 145 | 146 | return list(range(hp.num_episodes)), episodes_scores 147 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/storage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Modified from 3 | https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/storage.py 4 | """ 5 | 6 | import torch 7 | from torch.utils.data.sampler import BatchSampler 8 | from torch.utils.data.sampler import SubsetRandomSampler 9 | 10 | 11 | class RolloutStorage(object): 12 | def __init__(self, num_steps, num_processes, obs_shape, action_space, 13 | state_size): 14 | self.observations = torch.zeros(num_steps + 1, num_processes, 15 | *obs_shape) 16 | self.states = torch.zeros(num_steps + 1, num_processes, state_size) 17 | self.rewards = torch.zeros(num_steps, num_processes, 1) 18 | self.value_preds = torch.zeros(num_steps + 1, num_processes, 1) 19 | self.returns = torch.zeros(num_steps + 1, num_processes, 1) 20 | self.action_log_probs = torch.zeros(num_steps, num_processes, 1) 21 | if action_space.__class__.__name__ == 'Discrete': 22 | action_shape = 1 23 | else: 24 | action_shape = action_space.shape[0] 25 | self.actions = torch.zeros(num_steps, num_processes, action_shape) 26 | if action_space.__class__.__name__ == 'Discrete': 27 | self.actions = self.actions.long() 28 | self.masks = torch.ones(num_steps + 1, num_processes, 1) 29 | 30 | def cuda(self): 31 | self.observations = self.observations.cuda() 32 | self.states = self.states.cuda() 33 | self.rewards = self.rewards.cuda() 34 | self.value_preds = self.value_preds.cuda() 35 | self.returns = self.returns.cuda() 36 | self.action_log_probs = self.action_log_probs.cuda() 37 | self.actions = self.actions.cuda() 38 | self.masks = self.masks.cuda() 39 | 40 | def insert(self, step, current_obs, state, action, action_log_prob, 41 | value_pred, reward, mask): 42 | self.observations[step + 1].copy_(current_obs) 43 | self.states[step + 1].copy_(state) 44 | self.actions[step].copy_(action) 45 | self.action_log_probs[step].copy_(action_log_prob) 46 | self.value_preds[step].copy_(value_pred) 47 | self.rewards[step].copy_(reward) 48 | self.masks[step + 1].copy_(mask) 49 | 50 | def after_update(self): 51 | self.observations[0].copy_(self.observations[-1]) 52 | self.states[0].copy_(self.states[-1]) 53 | self.masks[0].copy_(self.masks[-1]) 54 | 55 | def compute_returns(self, next_value, use_gae, gamma, tau): 56 | if use_gae: 57 | self.value_preds[-1] = next_value 58 | gae = 0 59 | for step in reversed(range(self.rewards.size(0))): 60 | delta = self.rewards[step] + gamma * self.value_preds[step + 61 | 1] * self.masks[step 62 | + 63 | 1] - self.value_preds[step] 64 | gae = delta + gamma * tau * self.masks[step + 1] * gae 65 | self.returns[step] = gae + self.value_preds[step] 66 | else: 67 | self.returns[-1] = next_value 68 | for step in reversed(range(self.rewards.size(0))): 69 | self.returns[step] = self.returns[step + 1] * \ 70 | gamma * self.masks[step + 1] + self.rewards[step] 71 | 72 | def feed_forward_generator(self, advantages, num_mini_batch): 73 | num_steps, num_processes = self.rewards.size()[0:2] 74 | batch_size = num_processes * num_steps 75 | assert batch_size >= num_mini_batch, "ppo req batch size to be greater than number of mini batches" 76 | mini_batch_size = batch_size // num_mini_batch 77 | sampler = BatchSampler( 78 | SubsetRandomSampler(range(batch_size)), 79 | mini_batch_size, 80 | drop_last=False) 81 | for indices in sampler: 82 | indices = torch.LongTensor(indices) 83 | 84 | if advantages.is_cuda: 85 | indices = indices.cuda() 86 | 87 | observations_batch = self.observations[:-1].view( 88 | -1, 89 | *self.observations.size()[2:])[indices] 90 | states_batch = self.states[:-1].view(-1, 91 | self.states.size(-1))[indices] 92 | actions_batch = self.actions.view(-1, 93 | self.actions.size(-1))[indices] 94 | return_batch = self.returns[:-1].view(-1, 1)[indices] 95 | masks_batch = self.masks[:-1].view(-1, 1)[indices] 96 | old_action_log_probs_batch = self.action_log_probs.view(-1, 97 | 1)[indices] 98 | adv_targ = advantages.view(-1, 1)[indices] 99 | 100 | yield observations_batch, states_batch, actions_batch, \ 101 | return_batch, masks_batch, old_action_log_probs_batch, adv_targ 102 | 103 | def recurrent_generator(self, advantages, num_mini_batch): 104 | num_processes = self.rewards.size(1) 105 | num_envs_per_batch = num_processes // num_mini_batch 106 | perm = torch.randperm(num_processes) 107 | for start_ind in range(0, num_processes, num_envs_per_batch): 108 | observations_batch = [] 109 | states_batch = [] 110 | actions_batch = [] 111 | return_batch = [] 112 | masks_batch = [] 113 | old_action_log_probs_batch = [] 114 | adv_targ = [] 115 | #pdb.set_trace() 116 | for offset in range(num_envs_per_batch): 117 | ind = perm[start_ind + offset] 118 | observations_batch.append(self.observations[:-1, ind]) 119 | states_batch.append(self.states[:-1, ind]) 120 | actions_batch.append(self.actions[:, ind]) 121 | return_batch.append(self.returns[:-1, ind]) 122 | masks_batch.append(self.masks[:-1, ind]) 123 | old_action_log_probs_batch.append( 124 | self.action_log_probs[:, ind]) 125 | adv_targ.append(advantages[:, ind]) 126 | #pdb.set_trace() 127 | observations_batch = torch.cat(observations_batch, 0) 128 | states_batch = torch.cat(states_batch, 0) 129 | actions_batch = torch.cat(actions_batch, 0) 130 | return_batch = torch.cat(return_batch, 0) 131 | masks_batch = torch.cat(masks_batch, 0) 132 | old_action_log_probs_batch = torch.cat(old_action_log_probs_batch, 133 | 0) 134 | adv_targ = torch.cat(adv_targ, 0) 135 | 136 | yield observations_batch, states_batch, actions_batch, \ 137 | return_batch, masks_batch, old_action_log_probs_batch, adv_targ 138 | -------------------------------------------------------------------------------- /ADMCode/ddm.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import pandas as pd 4 | import numba as nb 5 | from numba import jit 6 | from numpy.random import random_sample 7 | from numba import float64, int64, vectorize, boolean 8 | 9 | 10 | def sim_ddm_trials(parameters, ntrials=500, deadline=1.5): 11 | """ main user interface function for simulating multiple trials 12 | with the DDM (wrapper for numba compiled _sim_ddm_trials_ func) 13 | 14 | :: Arguments :: 15 | parameters: 1d array (Nparams) of DDM parameters 16 | a: boundary height 17 | tr: non-decision time 18 | v: drift-rate 19 | z: starting point (frac of a; 0.0 < z < 1.0) 20 | si: diffusion constant (sigma param in DDM equation from lecture) 21 | dx: step-size of evidence 22 | dt: time step 23 | ntrials : (int) number of trials to simulate 24 | deadline (max time for accumualtion in seconds) 25 | 26 | :: Returns :: 27 | data (pd.DataFrame): pandas DF with rt and choice data 28 | traces (np.ndarray): 2d array (Ntrials x Ntime) of evidence traces 29 | """ 30 | 31 | # generate storage objects for data/traces 32 | data, rProb, traces = gen_ddm_storage_objects(parameters, ntrials, deadline) 33 | 34 | # simulate ntrials w/ DDM and fill data & traces array 35 | _sim_ddm_trials_(parameters, data, rProb, traces) 36 | 37 | # filter data/traces and return as pd.DataFrame 38 | df, traces = clean_output(data, traces, deadline=deadline) 39 | return df, traces 40 | 41 | 42 | 43 | @jit(nb.typeof((1.0, 1.0))(float64[:], float64[:], float64[:]), nopython=True) 44 | def sim_ddm(parameters, rProb, trace): 45 | """ single trial simulation of the DDM (discrete time / random walk) 46 | 47 | ::Arguments:: 48 | parameters: 1d array (Nparams) of DDM parameters 49 | rProb: 1d array (Ntimesteps) of random floats between 0 and 1 50 | trace: 1d array (Ntimesteps) for storing the evidence trace 51 | 52 | ::Returns:: 53 | RT (float): the time that evidence crossed one of the boundaries 54 | choice: 1 if evidence terminated at upper bound, 0 if lower bound 55 | """ 56 | 57 | # extract parameters 58 | a, tr, v, z, si, dx, dt = parameters 59 | 60 | # convert drift-rate into a probability, 61 | # & scale by sigma (si) and timestep (dt) 62 | # if v > 0, then 0.5 < vProb < 1.0 63 | # if v < 0, then 0.0 < vProb < 0.5 64 | vProb = .5 * (1 + (v * np.sqrt(dt))/si) 65 | 66 | # define starting point with respect to boundary height 67 | zStart = z * a 68 | 69 | #initialize evidence variable at zStart 70 | evidence = zStart 71 | trace[0] = evidence 72 | 73 | # define deadline (max time allowed for accumulation) 74 | deadline = trace.size 75 | 76 | for nsteps in range(1, deadline): 77 | # sample a random probability (r) and compare w/ vProb 78 | if rProb[nsteps] < vProb: 79 | # if r < vProb, step evidence up (towards a) 80 | evidence += dx 81 | else: 82 | # if r > vProb, step evidence down (towards 0) 83 | evidence -= dx 84 | # store new value of evidence at current timestep 85 | trace[nsteps] = evidence 86 | 87 | # check if new value of evidence crossed bound 88 | if evidence >= a: 89 | # calculate RT (in milliseconds) 90 | rt = tr + (nsteps * dt) 91 | # set choice to 1.0 (upper bound) 92 | choice = 1.0 93 | 94 | # terminate simulation, return rt & choice 95 | return rt, choice 96 | 97 | elif evidence <= 0: 98 | # calculate RT (in milliseconds) 99 | rt = tr + (nsteps * dt) 100 | # set choice to 0.0 (lower bound) 101 | choice = 0.0 102 | 103 | # terminate simulation, return rt & choice 104 | return rt, choice 105 | 106 | # return -1.0 for rt and choice so we can filter out 107 | # trials where evidence never crossed 0 or a 108 | return -1.0, -1.0 109 | 110 | 111 | 112 | @jit((float64[:], float64[:,:], float64[:,:], float64[:,:]), nopython=True) 113 | def _sim_ddm_trials_(parameters, data, rProb, traces): 114 | """ called by sim_ddm_trials() func to speed up trial iteraion 115 | """ 116 | ntrials = data.shape[0] 117 | for t in range(ntrials): 118 | data[t, :] = sim_ddm(parameters, rProb[t], traces[t]) 119 | 120 | 121 | def gen_ddm_storage_objects(parameters, ntrials=200, deadline=1.5): 122 | """ create pandas dataframes from data (numpy array) 123 | and filter data/traces to remove failed decision trials 124 | ::Arguments:: 125 | parameters (array): 1d array (Nparams) of DDM parameters 126 | ntrials : (int) number of trials to simulate 127 | deadline (float): (max time for accumualtion in seconds) 128 | 129 | ::Returns:: 130 | data (ndarray): ndarray with rt and choice data 131 | rProb (ndarray): 2d array (Ntrials x Ntimesteps) w. random floats (0-1) 132 | traces (ndarray): 2d array (Ntrials x Ntime) of evidence traces 133 | """ 134 | dt = parameters[-1] 135 | ntime = np.int(np.floor(deadline / dt)) 136 | 137 | # empty matrix Ntrials x 2 (cols for RT & Choice) 138 | data = np.zeros((ntrials, 2)) 139 | # 1d array (Ntimesteps) of random floats between 0 and 1 140 | rProb = random_sample((ntrials, ntime)) 141 | # 1d array (Ntimesteps) for storing evidence traces 142 | traces = np.zeros_like(rProb) 143 | return data, rProb, traces 144 | 145 | 146 | def clean_output(data, traces, deadline=1.2, stimulus=None): 147 | """ create pandas dataframes from data (numpy array) 148 | and filter data/traces to remove failed decision trials 149 | ::Arguments:: 150 | data (ndarray): ndarray with rt and choice data 151 | traces (ndarray): 2d array (Ntrials x Ntime) of evidence traces 152 | ::Returns:: 153 | data (pd.DataFrame): pandas DF with rt and choice data 154 | traces (ndarray): 2d array (Ntrials x Ntime) filtered traces 155 | """ 156 | # store RT/choice matrix in a pandas dataframe (DF) 157 | df = pd.DataFrame(data, columns=['rt', 'choice']) 158 | 159 | # add a column for trial number 160 | df.insert(0, 'trial', np.arange(1, 1+df.shape[0])) 161 | 162 | # remove trials with no boundary crossing 163 | df = df[(df.rt>0)&(df.rt1: 18 | feedback = feedback.append([feedback]*(nblocks-1)).reset_index() 19 | 20 | feedback.rename(columns={'index':'t'}, inplace=True) 21 | self.feedback = feedback 22 | 23 | self.names = np.sort(self.feedback.columns.values) 24 | self.ntrials=self.feedback.shape[0] 25 | 26 | self.choices, self.all_traces = [], [] 27 | self.rts={k:[] for k in self.names} 28 | 29 | self.qdict={k:[0] for k in self.names} 30 | self.choice_prob={k:[1/self.names.size] for k in self.names} 31 | 32 | 33 | def get_feedback(self, trial, action_ix): 34 | 35 | choice_name = self.names[action_ix] 36 | return self.feedback.loc[trial, choice_name] 37 | 38 | 39 | def play_IGT(p, feedback, alphaGo=.1, alphaNo=.1, beta=.2, nblocks=2, singleProcess=True): 40 | """ 41 | ::Arguments:: 42 | p (dict): parameter dictionary for accumulator 43 | feedback (dataframe): IGT card deck values 44 | alphaGo (float): learning rate for vd (direct pathway) 45 | alphaNo (float): learning rate for vi (indirect pathway) 46 | beta (float): inverse temperature parameter 47 | nblocks (int): number of IGT blocks to simulate 48 | singleProcess (bool): if true simulate accumulator with v = v_d - v_i 49 | 50 | ::Returns:: 51 | qpDF, trialwise Q/P values for each IGT deck 52 | agentDF, trialwise choice, response time, and drift-rates (vd, vi) 53 | """ 54 | 55 | names = np.sort(feedback.columns.values) 56 | nact = len(names) 57 | actions = np.arange(nact) 58 | IGT = IowaGamblingTask(feedback, nblocks=nblocks) 59 | ntrials=IGT.feedback.shape[0] 60 | 61 | Qmatrix = np.ones((ntrials, nact))*.05 62 | Pmatrix=np.zeros_like(Qmatrix) 63 | Qvalues = Qmatrix[0, :] 64 | Pvalues = np.array([1/nact]*nact) 65 | 66 | agent = np.zeros((ntrials, 3 + nact*3)) 67 | agent[0, 3:] = np.hstack([p['vd'], p['vi'], p['vd']-p['vi']]) 68 | 69 | #driftRates = np.zeros(ntrials, ) 70 | for t in range(ntrials): 71 | # select bandit arm (action) 72 | act_i, rt, rt_i = simulate_multirace(p, singleProcess=singleProcess) 73 | agent[t, :3] = act_i, rt, rt_i 74 | 75 | # observe feedback 76 | r = IGT.get_feedback(t, act_i) 77 | 78 | # get expected value 79 | Qexpected = Qvalues[act_i] 80 | 81 | # get prediction error 82 | RPE = r - Qexpected 83 | 84 | # get alpha for Q-value update 85 | alpha = alphaGo 86 | if RPE<0: 87 | alpha = alphaNo 88 | 89 | # update expected value and store in Qvalues array 90 | # update v_d or v_i (depending on RPE sign) 91 | Qvalues[act_i] = update_Qi(Qexpected, RPE, alpha=alpha) 92 | 93 | # update action selection probabilities 94 | Pvalues = update_Pall(Qvalues, beta) 95 | 96 | # store new values in output matrices 97 | Qmatrix[t, :] = Qvalues 98 | Pmatrix[t, :] = Pvalues 99 | 100 | # re-scale drift-rates by change in Softmax probability 101 | deltaP = Pmatrix[t] - Pmatrix[t-1] 102 | p = update_drift(p, deltaP, alphaGo, alphaNo) 103 | agent[t, 3:] = np.hstack([p['vd'], p['vi'], p['vd']-p['vi']]) 104 | 105 | return make_output_df(Qmatrix, Pmatrix, agent) 106 | 107 | 108 | def temporal_dynamics(p, t): 109 | return np.cosh(p['xb'] * t) 110 | 111 | 112 | def simulate_multirace(p, dt=.001, si=.1, tb=1.5, singleProcess=False): 113 | 114 | temporal_dynamics = lambda p, t: np.cosh(p['xb'] * t) 115 | 116 | nresp = p['vd'].size 117 | dx = si * np.sqrt(dt) 118 | 119 | nTime = np.ceil((tb-p['tr'])/dt).astype(int) 120 | xtb = temporal_dynamics(p, np.cumsum([dt]*nTime)) 121 | 122 | if singleProcess: 123 | Pdelta = .5 * (1 + ((p['vd']-p['vi']) * np.sqrt(dt))/si) 124 | execution = xtb * np.cumsum(np.where((rs((nresp, nTime)).T < Pdelta), dx, -dx).T, axis=1) 125 | else: 126 | Pd = .5 * (1 + (p['vd'] * np.sqrt(dt))/si) 127 | Pi = .5 * (1 + (p['vi'] * np.sqrt(dt))/si) 128 | direct = xtb * np.where((rs((nresp, nTime)).T < Pd),dx,-dx).T 129 | indirect = np.where((rs((nresp, nTime)).T < Pi),dx,-dx).T 130 | execution = np.cumsum(direct-indirect, axis=1) 131 | 132 | act_ix, rt, rt_ix = analyze_multiresponse(execution, p, dt=dt) 133 | return act_ix, rt, rt_ix 134 | 135 | 136 | def analyze_multiresponse(execution, p, dt=.001): 137 | """analyze multi-race execution processes 138 | """ 139 | nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1) 140 | rts = p['tr'] + nsteps_to_rt*dt 141 | 142 | # set non responses to 999 143 | rts[rts==p['tr']]=999 144 | 145 | # get accumulator with fastest RT (winner) in each cond 146 | act_ix = np.argmin(rts) 147 | rt = rts[act_ix] 148 | rt_ix = np.ceil((rt-p['tr'])/dt).astype(int) 149 | 150 | return act_ix, rt, rt_ix 151 | 152 | 153 | def update_drift(p, delta_prob, alphaGo=.3, alphaNo=.3): 154 | """ update direct & indirect drift-rates for all IGT actions 155 | """ 156 | vd_exp = p['vd'] 157 | vi_exp = p['vi'] 158 | p['vd'] = vd_exp + (alphaGo * delta_prob) 159 | p['vi'] = vi_exp + (alphaNo * -delta_prob) 160 | return p 161 | 162 | 163 | def update_Qi(Qval, RPE, alpha=.3): 164 | """ update q-value of selected action, given RPE and alpha 165 | """ 166 | QUpdate = Qval + alpha*RPE 167 | return QUpdate 168 | 169 | 170 | def update_Pall(Qvector, beta): 171 | """ update vector of action selection probabilities given 172 | associated q-values 173 | """ 174 | return np.array([np.exp(beta*Q_i) / np.sum(np.exp(beta * Qvector)) for Q_i in Qvector]) 175 | 176 | 177 | def make_output_df(Qmatrix, Pmatrix, agent): 178 | """ generate output dataframe with trialwise Q and P measures 179 | for each "card" in IGT, as well as choice selection, rt, & vd, vi (drift-rates) 180 | ::Arguments:: 181 | Qmatrix (ndarray): q-value array with dims [Ntrials x Ncards] 182 | Pmatrix (ndarray): softmax prob array with dims [Ntrials x Ncards] 183 | agent (ndarray): array with behavior and agent vd and vi (drift rates) 184 | ::Returns:: 185 | df (DataFrame): pandas df containing Q and SoftmaxP values for each card 186 | agentdf (DataFrame): DF of agent (with non-response trials removed) 187 | """ 188 | actions = np.arange(Qmatrix.shape[1]) 189 | df = pd.concat([pd.DataFrame(dat) for dat in [Qmatrix, Pmatrix]], axis=1) 190 | cols = [['{}{}'.format(x,c) for c in actions] for x in ['q', 'p']] 191 | df.columns = np.hstack(cols) 192 | df.insert(0, 'trial', np.arange(1, df.shape[0]+1)) 193 | vdCols = ['vd{}'.format(i) for i in range(actions.size)] 194 | viCols = ['vi{}'.format(i) for i in range(actions.size)] 195 | vDeltaCols = ['vDelta{}'.format(i) for i in range(actions.size)] 196 | agentdf = pd.DataFrame(agent, columns=['choice', 'rt', 'rt_i']+vdCols+viCols+vDeltaCols) 197 | RT_ix = agentdf.rt[agentdf.rt>1.5].index.values 198 | agentdf.iloc[RT_ix, :] = np.nan 199 | agentdf = agentdf.dropna() 200 | return df, agentdf 201 | -------------------------------------------------------------------------------- /ADMCode/snuz/ppo/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # ---------------------------------------------------------------------------- 6 | # Code from 7 | # https://github.com/reinforcement-learning-kr/pg_travel/blob/master/unity/model.py 8 | 9 | 10 | class Actor3Linear(nn.Module): 11 | """Three layer MLP.""" 12 | 13 | def __init__(self, num_inputs, num_outputs, hp, max_std=1): 14 | self.num_inputs = num_inputs 15 | self.num_outputs = num_outputs 16 | self.max_std = max_std 17 | 18 | super(Actor3Linear, self).__init__() 19 | 20 | # Latent 21 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1) 22 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2) 23 | 24 | # Note: fc3 and fc4 are in parallel! 25 | # Mu 26 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs) 27 | self.fc3.weight.data.mul_(0.1) 28 | self.fc3.bias.data.mul_(0.0) 29 | # Sigma 30 | self.fc4 = nn.Linear(hp.actor_hidden2, num_outputs) 31 | self.fc4.weight.data.mul_(0.1) 32 | self.fc4.bias.data.mul_(0.0) 33 | 34 | def forward(self, x): 35 | x = self.fc1(x) 36 | x = self.fc2(x) 37 | mu = self.fc3(x) 38 | 39 | std = torch.exp(self.fc4(x)) 40 | std = torch.clamp(std, 0, self.max_std) 41 | logstd = torch.log(std) 42 | 43 | return mu, std, logstd 44 | 45 | 46 | class Actor3Sigma(nn.Module): 47 | """Three layer MLP.""" 48 | 49 | def __init__(self, num_inputs, num_outputs, hp, max_std=1): 50 | self.num_inputs = num_inputs 51 | self.num_outputs = num_outputs 52 | self.max_std = max_std 53 | 54 | super(Actor3Sigma, self).__init__() 55 | 56 | # Latent 57 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1) 58 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2) 59 | 60 | # Note: fc3 and fc4 are in parallel! 61 | # Mu 62 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs) 63 | self.fc3.weight.data.mul_(0.1) 64 | self.fc3.bias.data.mul_(0.0) 65 | # Sigma 66 | self.fc4 = nn.Linear(hp.actor_hidden2, num_outputs) 67 | self.fc4.weight.data.mul_(0.1) 68 | self.fc4.bias.data.mul_(0.0) 69 | 70 | def forward(self, x): 71 | x = F.tanh(self.fc1(x)) 72 | x = F.tanh(self.fc2(x)) 73 | mu = self.fc3(x) 74 | 75 | std = torch.exp(self.fc4(x)) 76 | std = torch.clamp(std, 0, self.max_std) 77 | logstd = torch.log(std) 78 | 79 | return mu, std, logstd 80 | 81 | 82 | class Actor3(nn.Module): 83 | """Three layer MLP.""" 84 | 85 | def __init__(self, num_inputs, num_outputs, hp): 86 | self.num_inputs = num_inputs 87 | self.num_outputs = num_outputs 88 | 89 | super(Actor3, self).__init__() 90 | 91 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1) 92 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2) 93 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs) 94 | self.fc3.weight.data.mul_(0.1) 95 | self.fc3.bias.data.mul_(0.0) 96 | 97 | def forward(self, x): 98 | x = F.tanh(self.fc1(x)) 99 | x = F.tanh(self.fc2(x)) 100 | mu = self.fc3(x) 101 | logstd = torch.zeros_like(mu) 102 | std = torch.exp(logstd) 103 | return mu, std, logstd 104 | 105 | 106 | class Actor4(nn.Module): 107 | """Four layer MLP.""" 108 | 109 | def __init__(self, num_inputs, num_outputs, hp): 110 | self.num_inputs = num_inputs 111 | self.num_outputs = num_outputs 112 | super(Actor4, self).__init__() 113 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1) 114 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2) 115 | self.fc3 = nn.Linear(hp.actor_hidden2, hp.actor_hidden3) 116 | self.fc4 = nn.Linear(hp.actor_hidden4, num_outputs) 117 | 118 | self.fc4.weight.data.mul_(0.1) 119 | self.fc4.bias.data.mul_(0.0) 120 | 121 | def forward(self, x): 122 | if self.args.activation == 'tanh': 123 | x = F.tanh(self.fc1(x)) 124 | x = F.tanh(self.fc2(x)) 125 | x = F.tanh(self.fc3(x)) 126 | mu = self.fc4(x) 127 | elif self.args.activation == 'swish': 128 | x = self.fc1(x) 129 | x = x * F.sigmoid(x) 130 | x = self.fc2(x) 131 | x = x * F.sigmoid(x) 132 | x = self.fc3(x) 133 | x = x * F.sigmoid(x) 134 | mu = self.fc4(x) 135 | else: 136 | raise ValueError 137 | 138 | logstd = torch.zeros_like(mu) 139 | std = torch.exp(logstd) 140 | return mu, std, logstd 141 | 142 | 143 | class Critic3Linear(nn.Module): 144 | def __init__(self, num_inputs, hp): 145 | super(Critic3Linear, self).__init__() 146 | self.fc1 = nn.Linear(num_inputs, hp.critic_hidden1) 147 | self.fc2 = nn.Linear(hp.critic_hidden1, hp.critic_hidden1) 148 | self.fc3 = nn.Linear(hp.critic_hidden1, 1) 149 | self.fc3.weight.data.mul_(0.1) 150 | self.fc3.bias.data.mul_(0.0) 151 | 152 | def forward(self, x): 153 | x = self.fc1(x) 154 | x = self.fc2(x) 155 | v = self.fc3(x) 156 | return v 157 | 158 | 159 | class Critic3(nn.Module): 160 | def __init__(self, num_inputs, hp): 161 | super(Critic3, self).__init__() 162 | self.fc1 = nn.Linear(num_inputs, hp.critic_hidden1) 163 | self.fc2 = nn.Linear(hp.critic_hidden1, hp.critic_hidden1) 164 | self.fc3 = nn.Linear(hp.critic_hidden1, 1) 165 | self.fc3.weight.data.mul_(0.1) 166 | self.fc3.bias.data.mul_(0.0) 167 | 168 | def forward(self, x): 169 | x = F.tanh(self.fc1(x)) 170 | x = F.tanh(self.fc2(x)) 171 | v = self.fc3(x) 172 | return v 173 | 174 | 175 | # ---------------------------------------------------------------------------- 176 | # Other models (experimental) 177 | class ActorSigma1(nn.Module): 178 | """A N(mu, sigma) parameterized policy model. 179 | 180 | Note: sigma is learnable; this implementation is shallow.""" 181 | 182 | def __init__(self, 183 | in_channels, 184 | action_space, 185 | num_hidden1=128, 186 | gain=1, 187 | sigma=None): 188 | super(ActorSigma1, self).__init__() 189 | self.gain = gain 190 | 191 | # Est sigma? 192 | if sigma is not None: 193 | self.sigma0 = torch.tensor(sigma) 194 | else: 195 | self.sigma0 = sigma 196 | 197 | # Def number of outputs, per param (mu, sigma) 198 | num_outputs = action_space.shape[0] 199 | self.action_space = action_space 200 | 201 | # Def the network 202 | # Shared intial 203 | self.fc1 = nn.Linear(in_channels, num_hidden1) 204 | 205 | # Mu 206 | self.fc_mu = nn.Linear(num_hidden1, num_outputs) 207 | self.fc_mu.bias.data.zero_() 208 | 209 | # Sigma? 210 | if self.sigma0 is None: 211 | self.fc_sigma = nn.Linear(num_hidden1, num_outputs) 212 | self.fc_sigma.bias.data.zero_() 213 | 214 | def forward(self, x): 215 | # Shared nonlin. projection 216 | x = F.softmax(self.fc1(x)) 217 | 218 | # Linear mu 219 | mu = self.fc_mu(x) 220 | 221 | # Exp. sigma 222 | if self.sigma0 is None: 223 | sigma = torch.exp(self.fc_sigma(x) - self.gain) 224 | # print(sigma) 225 | else: 226 | sigma = self.sigma0 227 | 228 | return mu, sigma 229 | 230 | 231 | class DiscreteMLPPolicy(nn.Module): 232 | """A discrete-action policy model.""" 233 | 234 | def __init__(self, in_channels, num_action=2, num_hidden1=128): 235 | super(DiscreteMLPPolicy, self).__init__() 236 | self.affine1 = nn.Linear(in_channels, num_hidden1) 237 | self.affine2 = nn.Linear(num_hidden1, num_action) 238 | 239 | def forward(self, x): 240 | x = F.relu(self.affine1(x)) 241 | -------------------------------------------------------------------------------- /notebooks/Lab 5 - SNUZ.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SNUZ - white noise to drive _and_ relax you!\n", 8 | "\n", 9 | "> Arrive at your destination safely, comfortably, and well rested. We combine state-of-the-art methods in random search to get you safely to your destination. Using random methods lets us generate efficient routes, and high quality (mandatory) white noise for your journey -- across the town or across the country!\n", 10 | "\n", 11 | "In this experiment an autonomous car will learn to drive up a hill. We'll compare random search ([ARS](https://arxiv.org/abs/1803.07055)) to Proximal Policy Optimization ([PPO](https://blog.openai.com/openai-baselines-ppo/)).\n", 12 | "\n", 13 | "# Aims\n", 14 | "1. Install pytorch, et al\n", 15 | "2. Answer the question: does random search do better than a state of the 'cart' RL method in ...one of the simplest continuous control tasks?\n", 16 | "3. _Acquirehire_.\n", 17 | "\n", 18 | "\n", 19 | "# Install\n", 20 | "Before doing anything else, we need to install some libraries.\n", 21 | "\n", 22 | "From the command line, run:\n", 23 | "\n", 24 | "`pip install gym`\n", 25 | "\n", 26 | "`pip install ray`\n", 27 | "\n", 28 | "`pip install opencv-python`\n", 29 | " \n", 30 | "Then for your OS, do:\n", 31 | "\n", 32 | "## Mac\n", 33 | "`conda install pytorch torchvision -c pytorch`\n", 34 | "## Linux\n", 35 | "`conda install pytorch torchvision -c pytorch`\n", 36 | "## Windows\n", 37 | "`conda install pytorch -c pytorch`\n", 38 | "\n", 39 | "`pip3 install torchvision`" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "from ADMCode import visualize as vis\n", 49 | "from ADMCode.snuz import run_ppo\n", 50 | "from ADMCode.snuz import run_ars\n", 51 | "import numpy as np\n", 52 | "import pandas as pd\n", 53 | "\n", 54 | "import matplotlib.pyplot as plt\n", 55 | "import seaborn as sns\n", 56 | "import warnings\n", 57 | "\n", 58 | "warnings.simplefilter('ignore', np.RankWarning)\n", 59 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n", 60 | "warnings.filterwarnings(\"ignore\")\n", 61 | "sns.set(style='white', font_scale=1.3)\n", 62 | "\n", 63 | "%matplotlib inline\n", 64 | "%config InlineBackend.figure_format = 'png'\n", 65 | "%config InlineBackend.savefig.dpi = 150" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "# Task\n", 73 | "\n", 74 | "We're going to teaching a car to drive up a hill! This is the `MountainCarContinuous-v0` from the OpenAI [gym].(https://gym.openai.com)\n", 75 | "\n", 76 | "![car](images/car.gif)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Vrooooom!\n", 84 | "Let's get driving, uphill! First let's try PPO.\n", 85 | "\n", 86 | "\n", 87 | "## PPO\n", 88 | "\n", 89 | "The default hyperparameters are:\n", 90 | "\n", 91 | " gamma = 0.99 # Try me?\n", 92 | " lam = 0.98 # Try me?\n", 93 | " actor_hidden1 = 64 # Try me?\n", 94 | " actor_hidden2 = 64 # Try me?\n", 95 | " actor_hidden3 = 64 # Try me?\n", 96 | " critic_hidden1 = 64 # Try me?\n", 97 | " critic_lr = 0.0003 # Try me? (small changes)\n", 98 | " actor_lr = 0.0003 # Try me? (small changes)\n", 99 | " batch_size = 64 # Leave me be\n", 100 | " l2_rate = 0.001 # Leave me be\n", 101 | " clip_param = 0.2 # Leave me be\n", 102 | " num_training_epochs = 10 # Try me?\n", 103 | " num_episodes = 10 # Try me?\n", 104 | " num_memories = 24 # Try me?\n", 105 | " num_training_epochs = 10 # Try me?\n", 106 | " clip_actions = True # Leave me be\n", 107 | " clip_std = 1.0 # Leave me be\n", 108 | " seed_value = None # Try me (with int only)\n", 109 | " \n", 110 | "Parameters can be changed by passing to `run_ppo`. For example `run_ppo(num_episodes=20, actor_lr=0.0006`) doubles the train time and the learning rate of the PPO." 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "ename": "NameError", 120 | "evalue": "name 'run_ppo' is not defined", 121 | "output_type": "error", 122 | "traceback": [ 123 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 124 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 125 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mepisodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_ppo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_episodes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 126 | "\u001b[0;31mNameError\u001b[0m: name 'run_ppo' is not defined" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "episodes, scores = run_ppo(render=True, num_episodes=10)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Plot the average reward / episode." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "plt.plot(episodes, scores)\n", 148 | "plt.xlabel(\"Episode\")\n", 149 | "plt.xlabel(\"Reward\")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "Compare, say, 10 episodes of PPO to 10 of...\n", 157 | "\n", 158 | "\n", 159 | "## ARS\n", 160 | "\n", 161 | "The [ARS](https://arxiv.org/abs/1803.07055) code was modified from Recht's [original source](https://github.com/modestyachts/ARS). \n", 162 | "\n", 163 | "\n", 164 | "The default hyperparameters are:\n", 165 | "\n", 166 | " num_episodes = 10 # Try me?\n", 167 | " n_directions = 8 # Try me?\n", 168 | " deltas_used = 8 # Try me?\n", 169 | " step_size = 0.02 # Try me?\n", 170 | " delta_std = 0.03 # Try me?\n", 171 | " n_workers = 1 # Leave me be\n", 172 | " rollout_length = 240 # Try me?\n", 173 | " shift = 0 # Leave me be (all below)\n", 174 | " seed = 237\n", 175 | " policy_type = 'linear'\n", 176 | " dir_path = 'data'\n", 177 | " filter = 'MeanStdFilter' # Leave me be\n", 178 | " \n", 179 | " _Note_: Due to the way the backend of ARS works (it uses a [ray](https://ray.readthedocs.io/en/latest/), a dist. job system) we can't render exps here. Sorry. :(" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "episodes, scores = run_ars(num_episodes=10)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "plt.plot(episodes, scores)\n", 198 | "plt.xlabel(\"Episode\")\n", 199 | "plt.xlabel(\"Reward\")" 200 | ] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "Python 3", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.7.0" 220 | } 221 | }, 222 | "nbformat": 4, 223 | "nbformat_minor": 2 224 | } 225 | -------------------------------------------------------------------------------- /ADMCode/qlearn.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/env python 2 | from __future__ import division 3 | import numpy as np 4 | from numpy import array 5 | from numpy.random import sample as rs 6 | from numpy import newaxis as na 7 | import pandas as pd 8 | from scipy.stats import sem 9 | import seaborn as sns 10 | import string 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | 15 | def update_Qi(Qval, reward, alpha): 16 | """ update q-value of selected action, given reward and alpha 17 | """ 18 | return Qval + alpha * (reward - Qval) 19 | 20 | 21 | def update_Pall(Qvector, beta): 22 | """ update vector of action selection probabilities given 23 | associated q-values 24 | """ 25 | return np.array([np.exp(beta*Q_i) / np.sum(np.exp(beta * Qvector)) for Q_i in Qvector]) 26 | 27 | 28 | 29 | class IowaGamblingTask(object): 30 | """ defines a multi-armed bandit task 31 | 32 | ::Arguments:: 33 | preward (list): 1xN vector of reward probaiblities for each of N bandits 34 | rvalues (list): 1xN vector of payout values for each of N bandits 35 | """ 36 | def __init__(self, feedback, nblocks=1): 37 | 38 | if nblocks>1: 39 | feedback = feedback.append([feedback]*(nblocks-1)).reset_index() 40 | 41 | feedback.rename(columns={'index':'t'}, inplace=True) 42 | self.feedback = feedback 43 | 44 | self.names = np.sort(self.feedback.columns.values) 45 | self.ntrials=self.feedback.shape[0] 46 | 47 | self.choices, self.all_traces = [], [] 48 | self.rts={k:[] for k in self.names} 49 | 50 | self.qdict={k:[0] for k in self.names} 51 | self.choice_prob={k:[1/self.names.size] for k in self.names} 52 | 53 | 54 | def get_feedback(self, trial, action_ix): 55 | 56 | choice_name = self.names[action_ix] 57 | return self.feedback.loc[trial, choice_name] 58 | 59 | #new_col = self.feedback[choice_name].shift(-1) 60 | #new_col.set_value(new_col.index[-1], oldval) 61 | #self.feedback.loc[:, choice_name] = new_col 62 | #return self.feedback.loc[:, choice_name] = new_col 63 | 64 | 65 | 66 | 67 | class MultiArmedBandit(object): 68 | """ defines a multi-armed bandit task 69 | 70 | ::Arguments:: 71 | preward (list): 1xN vector of reward probaiblities for each of N bandits 72 | rvalues (list): 1xN vector of payout values for each of N bandits 73 | """ 74 | def __init__(self, preward=[.9, .8, .7], rvalues=[1, 1, 1]): 75 | self.preward = preward 76 | self.rvalues = rvalues 77 | try: 78 | assert(len(self.rvalues)==len(self.preward)) 79 | except AssertionError: 80 | self.rvalues = np.ones(len(self.preward)) 81 | 82 | def set_params(self, **kwargs): 83 | error_msg = """preward and rvalues must be same size 84 | setting all rvalues to 1""" 85 | kw_keys = list(kwargs) 86 | if 'preward' in kw_keys: 87 | self.preward = kwargs['preward'] 88 | if 'rvalues' not in kw_keys: 89 | try: 90 | assert(len(self.rvalues)==len(self.preward)) 91 | except AssertionError: 92 | self.rvalues = np.ones(len(self.preward)) 93 | 94 | if 'rvalues' in kw_keys: 95 | self.rvalues = kwargs['rvalues'] 96 | try: 97 | assert(len(self.rvalues)==len(self.preward)) 98 | except AssertionError: 99 | raise(AssertionError, error_msg) 100 | 101 | 102 | def get_feedback(self, action_ix): 103 | pOutcomes = np.array([self.preward[action_ix], 1-self.preward[action_ix]]) 104 | Outcomes = np.array([self.rvalues[action_ix], 0]) 105 | feedback = np.random.choice(Outcomes, p=pOutcomes) 106 | return feedback 107 | 108 | 109 | 110 | 111 | class Qagent(object): 112 | """ defines the learning parameters of single q-learning agent 113 | in a multi-armed bandit task 114 | 115 | ::Arguments:: 116 | alpha (float): learning rate 117 | beta (float): inverse temperature parameter 118 | preward (list): 1xN vector of reward probaiblities for each of N bandits 119 | rvalues (list): 1xN vector of payout values for each of N bandits 120 | IF rvalues is None, all values set to 1 121 | 122 | """ 123 | def __init__(self, alpha=.04, beta=3.5, epsilon=.1, preward=[.9, .8, .7], rvalues=None): 124 | if rvalues is None: 125 | rvalues = np.ones(len(preward)) 126 | self.bandits = MultiArmedBandit(preward=preward, rvalues=rvalues) 127 | self.updateQ = lambda Qval, r, alpha: Qval + alpha*(r - Qval) 128 | self.updateP = lambda Qvector, act_i, beta: np.exp(beta*Qvector[act_i])/np.sum(np.exp(beta*Qvector)) 129 | self.set_params(alpha=alpha, beta=beta, epsilon=epsilon) 130 | 131 | 132 | def set_params(self, **kwargs): 133 | """ update learning rate, inv. temperature, and/or 134 | epsilon parameters of q-learning agent 135 | """ 136 | 137 | kw_keys = list(kwargs) 138 | 139 | if 'alpha' in kw_keys: 140 | self.alpha = kwargs['alpha'] 141 | 142 | if 'beta' in kw_keys: 143 | self.beta = kwargs['beta'] 144 | 145 | if 'epsilon' in kw_keys: 146 | self.epsilon = kwargs['epsilon'] 147 | 148 | if 'preward' in kw_keys: 149 | self.bandits.set_params(preward=kwargs['preward']) 150 | 151 | if 'rvalues' in kw_keys: 152 | self.bandits.set_params(rvalues=kwargs['rvalues']) 153 | 154 | self.nact = len(self.bandits.preward) 155 | self.actions = np.arange(self.nact) 156 | 157 | 158 | def play_bandits(self, ntrials=1000, get_output=True): 159 | """ simulates agent performance on a multi-armed bandit task 160 | 161 | ::Arguments:: 162 | ntrials (int): number of trials to play bandits 163 | get_output (bool): returns output DF if True (default) 164 | 165 | ::Returns:: 166 | DataFrame (Ntrials x Nbandits) with trialwise Q and P 167 | values for each bandit 168 | """ 169 | pdata = np.zeros((ntrials+1, self.nact)) 170 | pdata[0, :] = np.array([1/self.nact]*self.nact) 171 | qdata = np.zeros_like(pdata) 172 | self.choices = [] 173 | self.feedback = [] 174 | 175 | for t in range(ntrials): 176 | 177 | # select bandit arm (action) 178 | act_i = np.random.choice(self.actions, p=pdata[t, :]) 179 | 180 | # observe feedback 181 | r = self.bandits.get_feedback(act_i) 182 | 183 | # update value of selected action 184 | qdata[t+1, act_i] = update_Qi(qdata[t, act_i], r, self.alpha) 185 | 186 | # broadcast old q-values for unchosen actions 187 | for act_j in self.actions[np.where(self.actions!=act_i)]: 188 | qdata[t+1, act_j] = qdata[t, act_j] 189 | 190 | # update action selection probabilities and store data 191 | pdata[t+1, :] = update_Pall(qdata[t+1, :], self.beta) 192 | self.choices.append(act_i) 193 | self.feedback.append(r) 194 | 195 | self.pdata = pdata[1:, :] 196 | self.qdata = qdata[1:, :] 197 | self.make_output_df() 198 | 199 | if get_output: 200 | return self.data.copy() 201 | 202 | 203 | def make_output_df(self): 204 | """ generate output dataframe with trialwise Q and P measures for each bandit, 205 | as well as choice selection, and feedback 206 | """ 207 | df = pd.concat([pd.DataFrame(dat) for dat in [self.qdata, self.pdata]], axis=1) 208 | columns = np.hstack(([['{}{}'.format(x, c) for c in self.actions] for x in ['q', 'p']])) 209 | df.columns = columns 210 | df.insert(0, 'trial', np.arange(1, df.shape[0]+1)) 211 | df['choice'] = self.choices 212 | df['feedback'] = self.feedback 213 | r = np.array(self.bandits.rvalues) 214 | p = np.array(self.bandits.preward) 215 | df['optimal'] = np.where(df['choice']==np.argmax(p * r), 1, 0) 216 | df.insert(0, 'agent', 1) 217 | self.data = df.copy() 218 | 219 | 220 | def simulate_multiple(self, nsims=10, ntrials=1000): 221 | """ simulates multiple identical agents on multi-armed bandit task 222 | """ 223 | dflist = [] 224 | for i in range(nsims): 225 | data_i = self.play_bandits(ntrials=ntrials, get_output=True) 226 | data_i['agent'] += i 227 | dflist.append(data_i) 228 | return pd.concat(dflist) 229 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/filter.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py 3 | 4 | 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import numpy as np 10 | 11 | 12 | class Filter(object): 13 | """Processes input, possibly statefully.""" 14 | 15 | def update(self, other, *args, **kwargs): 16 | """Updates self with "new state" from other filter.""" 17 | raise NotImplementedError 18 | 19 | def copy(self): 20 | """Creates a new object with same state as self. 21 | 22 | Returns: 23 | copy (Filter): Copy of self""" 24 | raise NotImplementedError 25 | 26 | def sync(self, other): 27 | """Copies all state from other filter to self.""" 28 | raise NotImplementedError 29 | 30 | 31 | class NoFilter(Filter): 32 | def __init__(self, *args): 33 | pass 34 | 35 | def __call__(self, x, update=True): 36 | return np.asarray(x, dtype = np.float64) 37 | 38 | def update(self, other, *args, **kwargs): 39 | pass 40 | 41 | def copy(self): 42 | return self 43 | 44 | def sync(self, other): 45 | pass 46 | 47 | def stats_increment(self): 48 | pass 49 | 50 | def clear_buffer(self): 51 | pass 52 | 53 | def get_stats(self): 54 | return 0, 1 55 | 56 | @property 57 | def mean(self): 58 | return 0 59 | 60 | @property 61 | def var(self): 62 | return 1 63 | 64 | @property 65 | def std(self): 66 | return 1 67 | 68 | 69 | 70 | # http://www.johndcook.com/blog/standard_deviation/ 71 | class RunningStat(object): 72 | 73 | def __init__(self, shape=None): 74 | self._n = 0 75 | self._M = np.zeros(shape, dtype = np.float64) 76 | self._S = np.zeros(shape, dtype = np.float64) 77 | self._M2 = np.zeros(shape, dtype = np.float64) 78 | 79 | def copy(self): 80 | other = RunningStat() 81 | other._n = self._n 82 | other._M = np.copy(self._M) 83 | other._S = np.copy(self._S) 84 | return other 85 | 86 | def push(self, x): 87 | x = np.asarray(x) 88 | # Unvectorized update of the running statistics. 89 | assert x.shape == self._M.shape, ("x.shape = {}, self.shape = {}" 90 | .format(x.shape, self._M.shape)) 91 | n1 = self._n 92 | self._n += 1 93 | if self._n == 1: 94 | self._M[...] = x 95 | else: 96 | delta = x - self._M 97 | deltaM2 = np.square(x) - self._M2 98 | self._M[...] += delta / self._n 99 | self._S[...] += delta * delta * n1 / self._n 100 | 101 | 102 | def update(self, other): 103 | n1 = self._n 104 | n2 = other._n 105 | n = n1 + n2 106 | delta = self._M - other._M 107 | delta2 = delta * delta 108 | M = (n1 * self._M + n2 * other._M) / n 109 | S = self._S + other._S + delta2 * n1 * n2 / n 110 | self._n = n 111 | self._M = M 112 | self._S = S 113 | 114 | def __repr__(self): 115 | return '(n={}, mean_mean={}, mean_std={})'.format( 116 | self.n, np.mean(self.mean), np.mean(self.std)) 117 | 118 | @property 119 | def n(self): 120 | return self._n 121 | 122 | @property 123 | def mean(self): 124 | return self._M 125 | 126 | @property 127 | def var(self): 128 | return self._S / (self._n - 1) if self._n > 1 else np.square(self._M) 129 | 130 | @property 131 | def std(self): 132 | return np.sqrt(self.var) 133 | 134 | @property 135 | def shape(self): 136 | return self._M.shape 137 | 138 | 139 | class MeanStdFilter(Filter): 140 | """Keeps track of a running mean for seen states""" 141 | 142 | def __init__(self, shape, demean=True, destd=True): 143 | self.shape = shape 144 | self.demean = demean 145 | self.destd = destd 146 | self.rs = RunningStat(shape) 147 | # In distributed rollouts, each worker sees different states. 148 | # The buffer is used to keep track of deltas amongst all the 149 | # observation filters. 150 | 151 | self.buffer = RunningStat(shape) 152 | 153 | self.mean = np.zeros(shape, dtype = np.float64) 154 | self.std = np.ones(shape, dtype = np.float64) 155 | 156 | def clear_buffer(self): 157 | self.buffer = RunningStat(self.shape) 158 | return 159 | 160 | def update(self, other, copy_buffer=False): 161 | """Takes another filter and only applies the information from the 162 | buffer. 163 | 164 | Using notation `F(state, buffer)` 165 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, 166 | `update` modifies `Filter1` to `Filter1(x1 + yt, y1)` 167 | If `copy_buffer`, then `Filter1` is modified to 168 | `Filter1(x1 + yt, yt)`. 169 | """ 170 | self.rs.update(other.buffer) 171 | if copy_buffer: 172 | self.buffer = other.buffer.copy() 173 | return 174 | 175 | def copy(self): 176 | """Returns a copy of Filter.""" 177 | other = MeanStdFilter(self.shape) 178 | other.demean = self.demean 179 | other.destd = self.destd 180 | other.rs = self.rs.copy() 181 | other.buffer = self.buffer.copy() 182 | return other 183 | 184 | def sync(self, other): 185 | """Syncs all fields together from other filter. 186 | 187 | Using notation `F(state, buffer)` 188 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, 189 | `sync` modifies `Filter1` to `Filter1(x2, yt)` 190 | """ 191 | assert other.shape == self.shape, "Shapes don't match!" 192 | self.demean = other.demean 193 | self.destd = other.destd 194 | self.rs = other.rs.copy() 195 | self.buffer = other.buffer.copy() 196 | return 197 | 198 | def __call__(self, x, update=True): 199 | x = np.asarray(x, dtype = np.float64) 200 | if update: 201 | if len(x.shape) == len(self.rs.shape) + 1: 202 | # The vectorized case. 203 | for i in range(x.shape[0]): 204 | self.rs.push(x[i]) 205 | self.buffer.push(x[i]) 206 | else: 207 | # The unvectorized case. 208 | self.rs.push(x) 209 | self.buffer.push(x) 210 | if self.demean: 211 | x = x - self.mean 212 | if self.destd: 213 | x = x / (self.std + 1e-8) 214 | return x 215 | 216 | def stats_increment(self): 217 | self.mean = self.rs.mean 218 | self.std = self.rs.std 219 | 220 | # Set values for std less than 1e-7 to +inf to avoid 221 | # dividing by zero. State elements with zero variance 222 | # are set to zero as a result. 223 | self.std[self.std < 1e-7] = float("inf") 224 | return 225 | 226 | def get_stats(self): 227 | return self.rs.mean, (self.rs.std + 1e-8) 228 | 229 | def __repr__(self): 230 | return 'MeanStdFilter({}, {}, {}, {}, {}, {})'.format( 231 | self.shape, self.demean, 232 | self.rs, self.buffer) 233 | 234 | 235 | def get_filter(filter_config, shape = None): 236 | if filter_config == "MeanStdFilter": 237 | return MeanStdFilter(shape) 238 | elif filter_config == "NoFilter": 239 | return NoFilter() 240 | else: 241 | raise Exception("Unknown observation_filter: " + 242 | str(filter_config)) 243 | 244 | 245 | def test_running_stat(): 246 | for shp in ((), (3,), (3, 4)): 247 | li = [] 248 | rs = RunningStat(shp) 249 | for _ in range(5): 250 | val = np.random.randn(*shp) 251 | rs.push(val) 252 | li.append(val) 253 | m = np.mean(li, axis=0) 254 | assert np.allclose(rs.mean, m) 255 | v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0) 256 | assert np.allclose(rs.var, v) 257 | 258 | 259 | def test_combining_stat(): 260 | for shape in [(), (3,), (3, 4)]: 261 | li = [] 262 | rs1 = RunningStat(shape) 263 | rs2 = RunningStat(shape) 264 | rs = RunningStat(shape) 265 | for _ in range(5): 266 | val = np.random.randn(*shape) 267 | rs1.push(val) 268 | rs.push(val) 269 | li.append(val) 270 | for _ in range(9): 271 | rs2.push(val) 272 | rs.push(val) 273 | li.append(val) 274 | rs1.update(rs2) 275 | assert np.allclose(rs.mean, rs1.mean) 276 | assert np.allclose(rs.std, rs1.std) 277 | 278 | 279 | test_running_stat() 280 | test_combining_stat() 281 | -------------------------------------------------------------------------------- /ADMCode/visualize.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import pandas as pd 4 | import seaborn as sns 5 | from scipy.stats import norm 6 | from mpl_toolkits.axes_grid1 import make_axes_locatable 7 | from ADMCode import sdt, utils 8 | import matplotlib.pyplot as plt 9 | from scipy.stats.stats import sem 10 | 11 | def convert_params(parameters, maxtime=1.5): 12 | a, tr, v, z, si, dx, dt = parameters 13 | zStart = z * a 14 | trSteps = int(tr/dt) 15 | deadline = (maxtime / dt) * 1.1 16 | return a, trSteps, v, zStart, si, dx, dt, deadline 17 | 18 | 19 | def build_ddm_axis(parameters, maxtime=1.5): 20 | 21 | sns.set(style='white') 22 | f, ax = plt.subplots(1, figsize=(8.5, 7), sharex=True) 23 | 24 | a, tr, v, z, si, dx, dt, deadline = convert_params(parameters, maxtime) 25 | w = deadline 26 | xmin=tr - 100 27 | 28 | plt.setp(ax, xlim=(xmin - 51, w + 1), ylim=(0 - (.01 * a), a + (.01 * a))) 29 | ax.hlines(y=a, xmin=xmin, xmax=w, color='#3572C6', linewidth=4) 30 | ax.hlines(y=0, xmin=xmin, xmax=w, color='#e5344a', linewidth=4) 31 | ax.hlines(y=z, xmin=xmin, xmax=w, color='k', alpha=.4, linestyles='--', linewidth=3) 32 | ax.vlines(x=xmin-50, ymin=-.1, ymax=a+.1, color='k', alpha=.15, linewidth=5) 33 | ax.hlines(y=z, xmin=xmin, xmax=tr, color='k', linewidth=4) 34 | 35 | ax.set_xticklabels([]) 36 | ax.set_yticklabels([]) 37 | ax.set_xticks([]) 38 | ax.set_yticks([]) 39 | sns.despine(top=True, right=True, bottom=True, left=True, ax=ax) 40 | 41 | divider = make_axes_locatable(ax) 42 | axx1 = divider.append_axes("top", size=1.2, pad=0.0, sharex=ax) 43 | axx2 = divider.append_axes("bottom", size=1.2, pad=0.0, sharex=ax) 44 | plt.setp(axx1, xlim=(xmin - 51, w + 1),ylim=(0 - (.01 * a), a + (.01 * a))) 45 | plt.setp(axx2, xlim=(xmin - 51, w + 1),ylim=(0 - (.01 * a), a + (.01 * a))) 46 | axx2.invert_yaxis() 47 | axx1.hist([0], density=False, bins=np.linspace(200, w, num=9), alpha=1., color='White') 48 | axx2.hist([0], density=False, bins=np.linspace(200, w, num=9), alpha=1., color='White') 49 | 50 | for axx in [axx1, axx2]: 51 | for spine in ['top', 'left', 'bottom', 'right']: 52 | axx.spines[spine].set_visible(False) 53 | axx.set_xticklabels([]) 54 | axx.set_yticklabels([]) 55 | return f, [ax, axx1, axx2] 56 | 57 | 58 | 59 | def plot_ddm_sims(df, parameters, traces=None, plot_v=False, fig=None, colors=None, vcolor='k', kdeplot=True): 60 | 61 | maxtime = df.rt.max() 62 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters, maxtime) 63 | 64 | if colors is None: 65 | colors = ['#3572C6', '#e5344a'] 66 | if fig is None: 67 | f, axes = build_ddm_axis(parameters, maxtime) 68 | else: 69 | f = fig; axes = fig.axes 70 | 71 | plot_bound_rts(df, parameters, f=f, colors=colors, kdeplot=kdeplot) 72 | 73 | if traces is not None: 74 | plot_traces(df, parameters, traces, f=f, colors=colors) 75 | 76 | if plot_v: 77 | plot_drift_line(df, parameters, color=vcolor, ax=f.axes[0]) 78 | 79 | return f 80 | 81 | 82 | def compare_drift_effects(df, param_list): 83 | 84 | sDF = df[df.stim=='signal'] 85 | nDF = df[df.stim=='noise'] 86 | colors = [['#009e07','#009e07'], ["#e5344a", "#e5344a"]] 87 | 88 | maxtime = df.rt.max() 89 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(param_list[0], maxtime) 90 | f=None 91 | for i, dfi in enumerate([sDF, nDF]): 92 | clrs = colors[i] 93 | f = plot_ddm_sims(dfi, param_list[i], colors=clrs, plot_v=True, fig=f, vcolor=clrs[0], kdeplot=True)#False) 94 | 95 | ax, axx1, axx2 = f.axes 96 | xmin = trSteps-100 97 | ax.hlines(y=a, xmin=xmin, xmax=deadline, color='k', linewidth=4) 98 | ax.hlines(y=0, xmin=xmin, xmax=deadline, color='k', linewidth=4) 99 | 100 | if sDF.shape[0] > nDF.shape[0]: 101 | ymax, ymin = axx1.get_ylim()[::-1] 102 | axx2.set_ylim(ymax, ymin) 103 | else: 104 | ymax, ymin = axx2.get_ylim()[::-1] 105 | axx1.set_ylim(ymax, ymin) 106 | return ax 107 | 108 | 109 | def plot_bound_rts(df, parameters, f, colors=None, kdeplot=True): 110 | 111 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters) 112 | rt1 = df[df.choice==1].rt.values / dt 113 | rt0 = df[df.choice==0].rt.values / dt 114 | 115 | if colors is None: 116 | colors = ['#3572C6', '#e5344a'] 117 | ax, axx1, axx2 = f.axes 118 | clip = (df.rt.min()/dt, deadline/dt) 119 | 120 | if kdeplot: 121 | #print('kdeplot') 122 | sns.kdeplot(rt1, alpha=.5, linewidth=0, color=colors[0], ax=axx1, shade=True, clip=clip)#, bw=15) 123 | sns.kdeplot(rt0, alpha=.5, linewidth=0, color=colors[1], ax=axx2, shade=True, clip=clip)#, bw=15) 124 | 125 | ymax = (.005, .01) 126 | if rt1.size < rt0.size: 127 | ymax = (.01, .005) 128 | axx1.set_ylim(0, ymax[0]) 129 | axx2.set_ylim(ymax[1], 0.0) 130 | # axx2.invert_yaxis() 131 | 132 | else: 133 | #print('not_kdeplot') 134 | #print(repr(rt1)) 135 | sns.histplot(rt1, color=colors[0], ax=axx1, kde=False)#, norm_hist=False) 136 | sns.histplot(rt0, color=colors[1], ax=axx2, kde=False)#, norm_hist=False) 137 | 138 | 139 | def plot_traces(df, parameters, traces, f, colors): 140 | 141 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters) 142 | ax = f.axes[0] 143 | ntrials = int(traces.shape[0]) 144 | for i in range(ntrials): 145 | trace = traces[i] 146 | c = colors[0] 147 | nsteps = np.argmax(trace[trace<=a]) + 2 148 | if df.iloc[i]['choice']==0: 149 | # if trace[nsteps]=0]) + 2 152 | ax.plot(np.arange(trSteps, trSteps + nsteps), traces[i, :nsteps], color=c, alpha=.1) 153 | 154 | 155 | def plot_drift_line(df, parameters, color='k', ax=None): 156 | 157 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters) 158 | rt = np.mean(df[df.choice==1].rt.values / dt) 159 | if v<0: 160 | rt = np.mean(df[df.choice==0].rt.values / dt) 161 | accum_x = np.arange(rt)*.001 162 | driftRate = zStart + (accum_x * v) 163 | x = np.linspace(trSteps, rt, accum_x.size) 164 | ax.plot(x, driftRate, color=color, linewidth=3) 165 | 166 | 167 | def sdt_interact(pH=.80, pFA=.10): 168 | 169 | plt.figure(2) 170 | ax = plt.gca() 171 | 172 | #n0, n1 = float(FA + CR), float(Hits + Misses) 173 | n0 = 100; n1 = 100 174 | if pH == 0: pH += 0.01 175 | if pH == n1: pH -= 0.01 176 | if pFA == 0: pFA += 0.01 177 | if pFA == n0: pFA -= 0.01 178 | 179 | Hits = pH * n1 180 | Misses = n1 - Hits 181 | FA = pFA * n0 182 | CR = n0 - FA 183 | 184 | d, c = sdt.sdt_mle(Hits, Misses, CR, FA) 185 | cLine = norm.ppf(1-pFA) 186 | dstr = "$d'={:.2f}$".format(d) 187 | cstr = "$c={:.2f}$".format(c) 188 | 189 | x = np.linspace(-4, 7, 1000) 190 | noiseDist = norm.pdf(loc=0, scale=1, x=x) 191 | signalDist = norm.pdf(loc=d, scale=1, x=x) 192 | 193 | plt.plot(x, noiseDist, color='k', alpha=.4) 194 | plt.plot(x, signalDist, color='k') 195 | 196 | yupper = ax.get_ylim()[-1] 197 | ax.vlines(cLine, 0, yupper, linestyles='-', linewidth=1.5) 198 | ax.set_ylim(0, yupper) 199 | ax.set_xlim(-4, 7) 200 | ax.set_yticklabels([]) 201 | sns.despine(left=True, right=True, top=True) 202 | 203 | ax.text(4, yupper*.9, dstr, fontsize=14) 204 | ax.text(4, yupper*.8, cstr, fontsize=14) 205 | 206 | plt.show() 207 | 208 | 209 | def plot_qlearning(data, nblocks=25, analyze=True): 210 | 211 | if analyze: 212 | auc = utils.get_optimal_auc(data, nblocks, verbose=True) 213 | 214 | sns.set(style='white', font_scale=1.3) 215 | clrs = ['#3778bf', '#feb308', '#9b59b6', '#2ecc71', '#e74c3c', 216 | '#3498db', '#fd7f23', '#694098', '#319455', '#f266db', 217 | '#13579d', '#fa8d67' '#a38ff1' '#3caca4', '#c24f54'] 218 | 219 | f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12,3.5)) 220 | df = data.copy() 221 | nactions = int(df.columns[-4].split('p')[-1])+1 222 | actions = np.arange(nactions) 223 | 224 | mudf = df.groupby('trial').mean().reset_index() 225 | errdf = df.groupby('trial').sem().reset_index()*1.96 226 | x = mudf.trial.values 227 | 228 | plot_err = True 229 | if np.isnan(errdf.loc[1, 'q0']): 230 | plot_err = False 231 | 232 | x3 = np.arange(1, nblocks+1) 233 | chance = 1/nactions 234 | mu3, err3 = utils.analyze_bandits(df, nblocks=nblocks, get_err=plot_err) 235 | ax3.plot(x3, mu3, color='k') 236 | ax3.hlines(chance, 1, x3[-1], color='k', linestyles='--', label='chance') 237 | 238 | for i, act in enumerate(actions): 239 | muQ = mudf['q{}'.format(act)].values 240 | muP = mudf['p{}'.format(act)].values 241 | ax1.plot(x, muQ, label='$arm_{}$'.format(i), color=clrs[i]) 242 | ax2.plot(x, muP, color=clrs[i]) 243 | 244 | if plot_err: 245 | errQ = errdf['q{}'.format(act)].values 246 | errP = errdf['p{}'.format(act)].values 247 | ax1.fill_between(x, muQ-errQ, muQ+errQ, color=clrs[i], alpha=.2) 248 | ax2.fill_between(x, muP-errP, muP+errP, color=clrs[i], alpha=.2) 249 | if i==0: 250 | ax3.fill_between(x3, mu3-err3, mu3+err3, color='k', alpha=.15) 251 | else: 252 | ychance = np.ones(mu3.size) * chance 253 | mu3A = np.copy(mu3) 254 | mu3B = np.copy(mu3) 255 | mu3A[np.where(mu3<=chance)] = chance 256 | mu3B[np.where(mu3>=chance)] = chance 257 | ax3.fill_between(x3, ychance, mu3A, color='#2ecc71', alpha=.15) 258 | ax3.fill_between(x3, ychance, mu3B, color='#e74c3c', alpha=.15) 259 | 260 | ax1.legend(loc=4) 261 | ax1.set_ylabel('$Q(arm)$') 262 | ax1.set_title('Value') 263 | 264 | ax2.set_ylabel('$P(arm)$') 265 | ax2.set_ylim(0,1) 266 | ax2.set_title('Softmax Prob.') 267 | 268 | ax3.set_ylim(0,1) 269 | ax3.set_ylabel('% Optimal Arm') 270 | ax3.set_xticks([1, nblocks+1]) 271 | ax3.set_xticklabels([1, df.trial.max()]) 272 | ax3.legend(loc=4) 273 | 274 | for ax in f.axes: 275 | ax.set_xlabel('Trials') 276 | plt.tight_layout() 277 | sns.despine() 278 | -------------------------------------------------------------------------------- /ADMCode/neural.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import pandas as pd 4 | import random 5 | import matplotlib.pyplot as plt 6 | from numpy.random import sample as rs 7 | from numpy import hstack as hs 8 | from numpy import newaxis as na 9 | from scipy.stats.distributions import norm, uniform 10 | from mpl_toolkits.axes_grid1 import make_axes_locatable 11 | import matplotlib as mpl 12 | import seaborn as sns 13 | 14 | sns.set(style='white', font_scale=1.8) 15 | clrs = ['#3778bf', '#e74c3c', '#9b59b6', '#319455', '#feb308', '#fd7f23'] 16 | 17 | def LCA_Model(I1=10, I2=8, I0=2, k=5, B=5, si=1., Z=1, dt=.01, tau=.1, tmax=1.5): 18 | 19 | timepoints = np.arange(0, tmax, dt) 20 | ntime = timepoints.size 21 | 22 | y1 = np.zeros(ntime) 23 | y2 = np.zeros(ntime) 24 | dx=np.sqrt(si*dt/tau) 25 | 26 | E1=si*np.sqrt(dt/tau)*rs(ntime) 27 | E2=si*np.sqrt(dt/tau)*rs(ntime) 28 | 29 | onset=100 30 | for i in range(onset, ntime): 31 | y1[i] = y1[i-1] + (I1 + -k*y1[i-1] + -B*y2[i-1]) * dt/tau + E1[i] 32 | y2[i] = y2[i-1] + (I2 + -k*y2[i-1] + -B*y1[i-1]) * dt/tau + E2[i] 33 | y_t = np.array([y1[i], y2[i]]) 34 | 35 | if np.any(y_t>=Z): 36 | rt = i; act = np.argmax(y_t) 37 | return y1[:i], y2[:i], rt, act 38 | return y1[:i], y2[:i], np.nan, np.nan 39 | 40 | 41 | def attractor_network(I1=6, I2=3, I0=2, k=.85, B=.28, si=.3, rmax=50, b=30, g=9, Z=20, dt=.001, tau=.05, tmax=1.5): 42 | 43 | timepoints = np.arange(0, tmax, dt) 44 | ntime = timepoints.size 45 | 46 | r1 = np.zeros(ntime) 47 | r2 = np.zeros(ntime) 48 | dv = np.zeros(ntime) 49 | 50 | NInput = lambda x, r: rmax/(1+np.exp(-(x-b)/g))-r 51 | dspace = lambda r1, r2: (r1-r2)/np.sqrt(2) 52 | 53 | E1=si*np.sqrt(dt/tau)*rs(ntime) 54 | E2=si*np.sqrt(dt/tau)*rs(ntime) 55 | 56 | onset=100 57 | r1[:onset], r2[:onset] = [v[0][:onset] + I0+v[1][:onset] for v in [[r1,E1],[r2,E2]]] 58 | 59 | subZ=True 60 | for i in range(onset, ntime): 61 | r1[i] = r1[i-1] + dt/tau * (NInput(I1 + I0 + k*r1[i-1] + -B*r2[i-1], r1[i-1])) + E1[i] 62 | r2[i] = r2[i-1] + dt/tau * (NInput(I2 + I0 + k*r2[i-1] + -B*r1[i-1], r2[i-1])) + E2[i] 63 | dv[i] = (r1[i]-r2[i])/np.sqrt(2) 64 | if np.abs(dv[i])>=Z: 65 | rt = i+1 66 | return r1[:i+1], r2[:i+1], dv[:i+1], rt 67 | rt = i+1 68 | return r1[:i], r2[:i], dv[:i], rt 69 | 70 | 71 | 72 | def simulate_attractor_competition(Imax=12, I0=0.05, k=1.15, B=.6, g=15, b=30, rmax=100, si=6.5, dt=.002, tau=.075, Z=100, ntrials=250): 73 | 74 | sns.set(style='white', font_scale=1.8) 75 | f, ax = plt.subplots(1, figsize=(8,7)) 76 | cmap = mpl.colors.ListedColormap(sns.blend_palette([clrs[1], clrs[0]], n_colors=ntrials)) 77 | Iscale = np.hstack(np.tile(np.linspace(.5*Imax, Imax, ntrials/2)[::-1], 2)) 78 | Ivector=np.linspace(-1,1,len(Iscale)) 79 | norm = mpl.colors.Normalize( 80 | vmin=np.min(Ivector), 81 | vmax=np.max(Ivector)) 82 | sm = mpl.cm.ScalarMappable(cmap=cmap, norm=norm) 83 | sm.set_array([]) 84 | 85 | for i, I_t in enumerate(Iscale): 86 | if i < (ntrials/2.): 87 | I1 = Imax; I2 = I_t 88 | else: 89 | I1=I_t; I2 = Imax 90 | r1, r2, dv, rt = attractor_network(I1=I1, I2=I2, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z) 91 | ax.plot(r1, r2, color=sm.to_rgba(Ivector[i]), alpha=.5) 92 | 93 | c_ax = plt.colorbar(sm, ax=plt.gca()) 94 | c_ax.set_ticks([-1, 1]) 95 | c_ax.set_ticklabels(['$I_1<>I_2$']) 96 | ax.plot([0,rmax], [0,rmax], color='k', alpha=.5, linestyle='-', lw=3.5) 97 | _=plt.setp(ax, ylim=[0,rmax], xlim=[0,rmax], xticks=[0,rmax], xticklabels=[0,rmax], 98 | yticks=[0,rmax],yticklabels=[0,rmax], ylabel='$r_1$ (Hz)', xlabel='$r_2$ (Hz)') 99 | 100 | 101 | def simulate_attractor_behavior(I1=12, I2=9, I0=0.05, k=1.15, B=1., g=12, b=35, rmax=100, si=5., dt=.001, tau=.075, Z=30, ntrials=250): 102 | 103 | behavior = np.zeros((ntrials, 3)) 104 | for t in range(ntrials): 105 | r1, r2, dv, rt = attractor_network(I1=I1, I2=I2, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z) 106 | 107 | choice=0 108 | acc=0 109 | if dv[-1]>=Z: 110 | choice=1 111 | acc=0 112 | if I1>I2: acc=1 113 | elif dv[-1]<=-Z: 114 | choice=2 115 | if I2>I1: acc=1 116 | elif I2==I1: 117 | acc=.5 118 | 119 | 120 | behavior[t, :] = choice, acc, rt 121 | 122 | return pd.DataFrame(behavior, columns=['choice', 'accuracy', 'rt'], index=np.arange(ntrials)) 123 | 124 | 125 | def SAT_experiment(dfa, dfb): 126 | 127 | f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) 128 | dfa['cond']='Control' 129 | dfb['cond'] = 'Test' 130 | dfx = pd.concat([dfa,dfb]) 131 | dfacc = dfx[dfx.accuracy==1] 132 | 133 | accY=dfx.groupby('cond').mean()['accuracy'].values 134 | ax1.scatter([0], accY[0], s=205, color='k', alpha=1.) 135 | ax1.scatter([1], accY[1], s=205, color=clrs[0], alpha=1.) 136 | ax1.plot([0,1], accY, color='k', alpha=.3, linewidth=3.5) 137 | 138 | sns.kdeplot(dfacc[dfacc.cond=='Control'].rt.values, ax=ax2, shade=True, color='k', alpha=.15, lw=0) 139 | sns.kdeplot(dfacc[dfacc.cond=='Test'].rt.values, ax=ax2, shade=True, color=clrs[0], linewidth=0) 140 | 141 | rtmu = dfacc.groupby('cond').mean()['rt'].values 142 | xmax = ax2.get_ylim()[-1] 143 | ax2.vlines(rtmu[0], 0, xmax, color='k', linestyles='--', linewidth=2, label='Control') 144 | ax2.vlines(rtmu[1], 0, xmax, color=clrs[0], linewidth=2, label='Test') 145 | ax2.set_yticklabels([]) 146 | ax1.set_ylim(0,1) 147 | ax1.set_xlim(-.5,1.5) 148 | ax1.set_xticks([0,1]) 149 | ax1.set_xticklabels(['Control','Test']) 150 | ax1.set_ylabel('% Correct') 151 | ax1.set_xlabel('Condition') 152 | ax2.set_xlabel('RT (ms)') 153 | ax2.legend() 154 | sns.despine() 155 | 156 | def noisy_attractor_endpoints(I=12, I0=0.05, k=1.15, B=1.15, g=25, b=50, rmax=100, si=6.5, dt=.002, tau=.05, Z=100, ntrials=250): 157 | 158 | f, axes = plt.subplots(1, 4, figsize=(12,3.5)) 159 | 160 | for i in range(4): 161 | attractor_endpoints(I=I[i], I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z, ntrials=ntrials, ax=axes[i]) 162 | if i>0: 163 | axes[i].set_yticklabels([]) 164 | axes[i].set_ylabel('') 165 | else: 166 | axes[i].set_yticks([0, 120]) 167 | axes[i].set_yticklabels([0, 120]) 168 | axes[i].set_ylim(0,120) 169 | plt.tight_layout() 170 | 171 | 172 | def attractor_endpoints(I=12, I0=0.05, k=1.15, B=1.15, g=25, b=50, rmax=100, si=6.5, dt=.002, tau=.05, Z=100, ntrials=250, ax=None): 173 | 174 | sns.set(style='white', font_scale=1.8) 175 | if ax is None: 176 | f, ax = plt.subplots(1, figsize=(4,4)) 177 | 178 | r1d1,r2d1,r1d2,r2d2 = [],[],[],[] 179 | for i in range(ntrials): 180 | r1, r2, dv, rt = attractor_network(I1=I, I2=I, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z) 181 | if r1[-1]>r2[-1]: 182 | r1d1.append(r1[-1]) 183 | r2d1.append(r2[-1]) 184 | if r2[-1]>r1[-1]: 185 | r1d2.append(r1[-1]) 186 | r2d2.append(r2[-1]) 187 | 188 | ax.scatter(r2d1, r1d1, s=30, color=clrs[0], marker='o', alpha=.1) 189 | ax.scatter(r2d2, r1d2, s=30, color=clrs[1], marker='o', alpha=.1) 190 | 191 | #xymax = np.max(np.hstack([r1d1, r2d1])) 192 | #xymax = np.max(np.hstack([r1d2, r2d2])) 193 | xymax=120 194 | rmax=int(xymax) 195 | ax.plot([0,xymax], [0,xymax], color='k', alpha=.5, linestyle='-', lw=3.5) 196 | _ = plt.setp(ax, ylim=[0,xymax], xlim=[0,xymax], xticks=[0,xymax], xticklabels=[0,rmax], yticks=[0,xymax],yticklabels=[0,rmax], ylabel='$r_1$ (Hz)', xlabel='$r_2$ (Hz)') 197 | 198 | 199 | def plot_sigmoid_response(b=50, g=20, rmax=1): 200 | 201 | x = np.linspace(0,100,100) 202 | y = rmax/(1+np.exp(-(x-b)/g)) 203 | 204 | plt.vlines(b, 0, y[b], color='r', label='b') 205 | plt.hlines(.5, 0, b, color='k', linestyles='--') 206 | plt.fill_between(x[:b+1], 0, y[:b+1], alpha=.05, color='k') 207 | plt.text(b+2, .045, 'b', color='r') 208 | 209 | # plot g slope 210 | w_lo = int(x[b]) 211 | w_hi = int(x[b+10]) 212 | plt.plot([w_lo, w_hi], [y[w_lo]+.03, y[w_hi]+.03], color='b') 213 | plt.text(b, y[b+5]+.04, 'g', color='b') 214 | 215 | # plot f-i curve 216 | plt.plot(x, y, color='k') 217 | 218 | ax = plt.gca() 219 | ax.set_xlabel('Input Current') 220 | ax.set_ylabel('Neural Response') 221 | ax.set_xticks([0,100]) 222 | ax.set_xlim(0,100) 223 | ax.set_xticklabels([0,100]) 224 | ax.set_ylim(0, rmax*1.05) 225 | sns.despine() 226 | 227 | 228 | def plot_decision_dynamics(r1, r2, dv, Z=20, axes=None, alpha=.7, label=None, xlim=None): 229 | 230 | if axes is None: 231 | f, axes = plt.subplots(2, 1, figsize=(6,9)) 232 | ax2, ax1 = axes 233 | rt=len(dv)-1 234 | 235 | l1, l2, l3 = [None]*3 236 | 237 | if label: 238 | l1, l2, l3 = '$y_1$', '$y_2$', '$\Delta y$' 239 | ylabel = 'Activation' 240 | ax1.plot(r1, color=clrs[0], label=l1, linewidth=2.5, alpha=alpha) 241 | ax1.plot(r2, color=clrs[1], label=l2, linewidth=2.5, alpha=alpha) 242 | ax1.vlines(rt, ymin=r2[rt], ymax=r1[rt], color=clrs[0], linestyles='--', alpha=alpha) 243 | ax2.plot(dv, color=clrs[2], label=l3, linewidth=2.5, alpha=alpha) 244 | 245 | if xlim is None: 246 | xlim = ax1.get_xlim() 247 | 248 | xmin, xmax = xlim 249 | for ax in [ax1,ax2]: 250 | ax.set_xlim(xmin, xmax) 251 | ax.legend(loc=2) 252 | ax2.set_yticklabels([]) 253 | ax1.set_xlabel('Time (ms)') 254 | ax1.set_ylabel(ylabel) 255 | ax2.set_ylabel('Decision State') 256 | ax2.set_ylim(-Z, Z) 257 | ax2.hlines(0, xmin=0, xmax=xmax, color='k', linestyles='--', alpha=.5) 258 | ax2.hlines(Z-.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4) 259 | ax2.hlines(-Z+.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4) 260 | ax2.set_xticklabels([]) 261 | sns.despine(ax=ax1) 262 | 263 | sns.despine(ax=ax2, right=True, top=True, bottom=True) 264 | 265 | def plot_rt_distributions(ax1, ax2, rts, xlim=None, alpha=.8): 266 | 267 | divider = make_axes_locatable(ax2) 268 | axx = divider.append_axes("top", size=1.6, pad=0, sharex=ax2) 269 | for rt in rts: 270 | sns.kdeplot(rt, ax=axx, shade=True, color=clrs[2], alpha=alpha) 271 | alpha=alpha-.5 272 | 273 | for spine in ['top', 'left', 'bottom', 'right']: 274 | axx.spines[spine].set_visible(False) 275 | 276 | axx.set_xticklabels([]) 277 | axx.set_yticklabels([]) 278 | ax2.set_yticklabels([]) 279 | xmin, xmax = ax1.get_xlim() 280 | ax1.set_xlim(0, xmax) 281 | Z = ax2.get_ylim()[-1] 282 | 283 | ax2.hlines(0, xmin=0, xmax=xmax, color='k', linestyles='--', alpha=.5) 284 | ax2.hlines(Z-.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4) 285 | ax2.hlines(-Z+.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4) 286 | -------------------------------------------------------------------------------- /ADMCode/snuz/ars/ars.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Parallel implementation of the Augmented Random Search method. 3 | Horia Mania --- hmania@berkeley.edu 4 | Aurelia Guy 5 | Benjamin Recht 6 | ''' 7 | 8 | # import parser 9 | import time 10 | import os 11 | import numpy as np 12 | import gym 13 | from ADMCode.snuz.ars import logz 14 | import ray 15 | from ADMCode.snuz.ars import utils 16 | from ADMCode.snuz.ars import optimizers 17 | from ADMCode.snuz.ars.policies import * 18 | import socket 19 | from ADMCode.snuz.ars.shared_noise import * 20 | 21 | 22 | @ray.remote 23 | class Worker(object): 24 | """ 25 | Object class for parallel rollout generation. 26 | """ 27 | 28 | def __init__(self, 29 | env_seed, 30 | env_name='', 31 | policy_params=None, 32 | deltas=None, 33 | rollout_length=1000, 34 | delta_std=0.02): 35 | 36 | # initialize OpenAI environment for each worker 37 | self.env = gym.make(env_name) 38 | self.env.seed(env_seed) 39 | 40 | # each worker gets access to the shared noise table 41 | # with independent random streams for sampling 42 | # from the shared noise table. 43 | self.deltas = SharedNoiseTable(deltas, env_seed + 7) 44 | self.policy_params = policy_params 45 | if policy_params['type'] == 'linear': 46 | self.policy = LinearPolicy(policy_params) 47 | else: 48 | raise NotImplementedError 49 | 50 | self.delta_std = delta_std 51 | self.rollout_length = rollout_length 52 | 53 | def get_weights_plus_stats(self): 54 | """ 55 | Get current policy weights and current statistics of past states. 56 | """ 57 | assert self.policy_params['type'] == 'linear' 58 | return self.policy.get_weights_plus_stats() 59 | 60 | def rollout(self, shift=0., rollout_length=None): 61 | """ 62 | Performs one rollout of maximum length rollout_length. 63 | At each time-step it substracts shift from the reward. 64 | """ 65 | 66 | if rollout_length is None: 67 | rollout_length = self.rollout_length 68 | 69 | total_reward = 0. 70 | steps = 0 71 | 72 | ob = self.env.reset() 73 | for i in range(rollout_length): 74 | action = self.policy.act(ob) 75 | ob, reward, done, _ = self.env.step(action) 76 | steps += 1 77 | total_reward += (reward - shift) 78 | if done: 79 | break 80 | 81 | return total_reward, steps 82 | 83 | def do_rollouts(self, w_policy, num_rollouts=1, shift=1, evaluate=False): 84 | """ 85 | Generate multiple rollouts with a policy parametrized by w_policy. 86 | """ 87 | 88 | rollout_rewards, deltas_idx = [], [] 89 | steps = 0 90 | 91 | for i in range(num_rollouts): 92 | 93 | if evaluate: 94 | self.policy.update_weights(w_policy) 95 | deltas_idx.append(-1) 96 | 97 | # set to false so that evaluation rollouts are not used for updating state statistics 98 | self.policy.update_filter = False 99 | 100 | # for evaluation we do not shift the rewards (shift = 0) and we use the 101 | # default rollout length (1000 for the MuJoCo locomotion tasks) 102 | reward, r_steps = self.rollout( 103 | shift=0., rollout_length=self.env.spec.timestep_limit) 104 | rollout_rewards.append(reward) 105 | 106 | else: 107 | idx, delta = self.deltas.get_delta(w_policy.size) 108 | 109 | delta = (self.delta_std * delta).reshape(w_policy.shape) 110 | deltas_idx.append(idx) 111 | 112 | # set to true so that state statistics are updated 113 | self.policy.update_filter = True 114 | 115 | # compute reward and number of timesteps used for positive perturbation rollout 116 | self.policy.update_weights(w_policy + delta) 117 | pos_reward, pos_steps = self.rollout(shift=shift) 118 | 119 | # compute reward and number of timesteps used for negative pertubation rollout 120 | self.policy.update_weights(w_policy - delta) 121 | neg_reward, neg_steps = self.rollout(shift=shift) 122 | steps += pos_steps + neg_steps 123 | 124 | rollout_rewards.append([pos_reward, neg_reward]) 125 | 126 | return { 127 | 'deltas_idx': deltas_idx, 128 | 'rollout_rewards': rollout_rewards, 129 | "steps": steps 130 | } 131 | 132 | def stats_increment(self): 133 | self.policy.observation_filter.stats_increment() 134 | return 135 | 136 | def get_weights(self): 137 | return self.policy.get_weights() 138 | 139 | def get_filter(self): 140 | return self.policy.observation_filter 141 | 142 | def sync_filter(self, other): 143 | self.policy.observation_filter.sync(other) 144 | return 145 | 146 | 147 | class ARSLearner(object): 148 | """ 149 | Object class implementing the ARS algorithm. 150 | """ 151 | 152 | def __init__(self, 153 | env_name='HalfCheetah-v1', 154 | policy_params=None, 155 | num_workers=32, 156 | num_deltas=320, 157 | deltas_used=320, 158 | delta_std=0.02, 159 | logdir=None, 160 | rollout_length=1000, 161 | step_size=0.01, 162 | shift='constant zero', 163 | params=None, 164 | seed=123): 165 | 166 | logz.configure_output_dir(logdir) 167 | 168 | env = gym.make(env_name) 169 | 170 | self.timesteps = 0 171 | self.action_size = env.action_space.shape[0] 172 | self.ob_size = env.observation_space.shape[0] 173 | self.num_deltas = num_deltas 174 | self.deltas_used = deltas_used 175 | self.rollout_length = rollout_length 176 | self.step_size = step_size 177 | self.delta_std = delta_std 178 | self.logdir = logdir 179 | self.shift = shift 180 | self.max_past_avg_reward = float('-inf') 181 | self.num_episodes_used = float('inf') 182 | 183 | # create shared table for storing noise 184 | # print("Creating deltas table.") 185 | deltas_id = create_shared_noise.remote() 186 | self.deltas = SharedNoiseTable(ray.get(deltas_id), seed=seed + 3) 187 | # print('Created deltas table.') 188 | 189 | # initialize workers with different random seeds 190 | print('Initializing workers.') 191 | self.num_workers = num_workers 192 | self.workers = [ 193 | Worker.remote( 194 | seed + 7 * i, 195 | env_name=env_name, 196 | policy_params=policy_params, 197 | deltas=deltas_id, 198 | rollout_length=rollout_length, 199 | delta_std=delta_std) for i in range(num_workers) 200 | ] 201 | 202 | # initialize policy 203 | if policy_params['type'] == 'linear': 204 | self.policy = LinearPolicy(policy_params) 205 | self.w_policy = self.policy.get_weights() 206 | else: 207 | raise NotImplementedError 208 | 209 | # initialize optimization algorithm 210 | self.optimizer = optimizers.SGD(self.w_policy, self.step_size) 211 | # print("Initialization of ARS complete.") 212 | 213 | def aggregate_rollouts(self, num_rollouts=None, evaluate=False): 214 | """ 215 | Aggregate update step from rollouts generated in parallel. 216 | """ 217 | 218 | if num_rollouts is None: 219 | num_deltas = self.num_deltas 220 | else: 221 | num_deltas = num_rollouts 222 | 223 | # put policy weights in the object store 224 | policy_id = ray.put(self.w_policy) 225 | 226 | t1 = time.time() 227 | num_rollouts = int(num_deltas / self.num_workers) 228 | 229 | # parallel generation of rollouts 230 | rollout_ids_one = [ 231 | worker.do_rollouts.remote( 232 | policy_id, 233 | num_rollouts=num_rollouts, 234 | shift=self.shift, 235 | evaluate=evaluate) for worker in self.workers 236 | ] 237 | 238 | rollout_ids_two = [ 239 | worker.do_rollouts.remote( 240 | policy_id, num_rollouts=1, shift=self.shift, evaluate=evaluate) 241 | for worker in self.workers[:(num_deltas % self.num_workers)] 242 | ] 243 | 244 | # gather results 245 | results_one = ray.get(rollout_ids_one) 246 | results_two = ray.get(rollout_ids_two) 247 | 248 | rollout_rewards, deltas_idx = [], [] 249 | 250 | for result in results_one: 251 | if not evaluate: 252 | self.timesteps += result["steps"] 253 | deltas_idx += result['deltas_idx'] 254 | rollout_rewards += result['rollout_rewards'] 255 | 256 | for result in results_two: 257 | if not evaluate: 258 | self.timesteps += result["steps"] 259 | deltas_idx += result['deltas_idx'] 260 | rollout_rewards += result['rollout_rewards'] 261 | 262 | deltas_idx = np.array(deltas_idx) 263 | rollout_rewards = np.array(rollout_rewards, dtype=np.float64) 264 | 265 | # print('Maximum reward of collected rollouts:', rollout_rewards.max()) 266 | # t2 = time.time() 267 | 268 | # print('Time to generate rollouts:', t2 - t1) 269 | 270 | if evaluate: 271 | return rollout_rewards 272 | 273 | # select top performing directions if deltas_used < num_deltas 274 | max_rewards = np.max(rollout_rewards, axis=1) 275 | if self.deltas_used > self.num_deltas: 276 | self.deltas_used = self.num_deltas 277 | 278 | idx = np.arange(max_rewards.size)[max_rewards >= np.percentile( 279 | max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas)))] 280 | deltas_idx = deltas_idx[idx] 281 | rollout_rewards = rollout_rewards[idx, :] 282 | 283 | # normalize rewards by their standard deviation 284 | rollout_rewards /= np.std(rollout_rewards) 285 | 286 | # t1 = time.time() 287 | # aggregate rollouts to form g_hat, the gradient used to compute SGD step 288 | g_hat, count = utils.batched_weighted_sum( 289 | rollout_rewards[:, 0] - rollout_rewards[:, 1], 290 | (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx), 291 | batch_size=500) 292 | g_hat /= deltas_idx.size 293 | # t2 = time.time() 294 | # print('time to aggregate rollouts', t2 - t1) 295 | return g_hat 296 | 297 | def train_step(self): 298 | """ 299 | Perform one update step of the policy weights. 300 | """ 301 | 302 | g_hat = self.aggregate_rollouts() 303 | # print("Euclidean norm of update step:", np.linalg.norm(g_hat)) 304 | self.w_policy -= self.optimizer._compute_step(g_hat).reshape( 305 | self.w_policy.shape) 306 | return 307 | 308 | def train(self, num_iter): 309 | 310 | start = time.time() 311 | iter_scores = [] 312 | for i in range(num_iter): 313 | 314 | t1 = time.time() 315 | self.train_step() 316 | t2 = time.time() 317 | # print('total time of one step', t2 - t1) 318 | # print('iter ', i, ' done') 319 | 320 | # record statistics every 10 iterations 321 | rewards = self.aggregate_rollouts(num_rollouts=100, evaluate=True) 322 | iter_scores.append(np.mean(rewards)) 323 | 324 | if ((i + 1) % 10 == 0): 325 | w = ray.get(self.workers[0].get_weights_plus_stats.remote()) 326 | # np.savez(self.logdir + "/lin_policy_plus", w) 327 | 328 | logz.log_tabular("Time", time.time() - start) 329 | logz.log_tabular("Iteration", i + 1) 330 | logz.log_tabular("AverageReward", np.mean(rewards)) 331 | logz.log_tabular("StdRewards", np.std(rewards)) 332 | logz.log_tabular("MaxRewardRollout", np.max(rewards)) 333 | logz.log_tabular("MinRewardRollout", np.min(rewards)) 334 | logz.log_tabular("timesteps", self.timesteps) 335 | logz.dump_tabular() 336 | 337 | t1 = time.time() 338 | # get statistics from all workers 339 | for j in range(self.num_workers): 340 | self.policy.observation_filter.update( 341 | ray.get(self.workers[j].get_filter.remote())) 342 | self.policy.observation_filter.stats_increment() 343 | 344 | # make sure master filter buffer is clear∫ 345 | self.policy.observation_filter.clear_buffer() 346 | # sync all workers 347 | filter_id = ray.put(self.policy.observation_filter) 348 | setting_filters_ids = [ 349 | worker.sync_filter.remote(filter_id) for worker in self.workers 350 | ] 351 | # waiting for sync of all workers 352 | ray.get(setting_filters_ids) 353 | 354 | increment_filters_ids = [ 355 | worker.stats_increment.remote() for worker in self.workers 356 | ] 357 | # waiting for increment of all workers 358 | ray.get(increment_filters_ids) 359 | t2 = time.time() 360 | # print('Time to sync statistics:', t2 - t1) 361 | 362 | return list(range(num_iter)), iter_scores 363 | 364 | 365 | class Hyperparameters: 366 | num_episodes = 10 367 | n_directions = 8 368 | deltas_used = 8 369 | step_size = 0.02 370 | delta_std = 0.03 371 | n_workers = 1 372 | rollout_length = 240 373 | shift = 0 374 | seed = 237 375 | policy_type = 'linear' 376 | dir_path = 'data' 377 | filter = 'MeanStdFilter' 378 | 379 | 380 | def run_ars(env_name='MountainCarContinuous-v0', 381 | logdir='data', 382 | **algorithm_hyperparameters): 383 | 384 | # Ray init? 385 | if not ray.is_initialized(): 386 | # local_ip = socket.gethostbyname(socket.gethostname()) 387 | # ray.init(redis_address=local_ip + ':6379', local_mode=True) 388 | ray.init(local_mode=True) 389 | 390 | # Params 391 | hp = Hyperparameters() 392 | for k, v in algorithm_hyperparameters.items(): 393 | setattr(hp, k, v) 394 | 395 | dir_path = hp.dir_path 396 | 397 | if not (os.path.exists(dir_path)): 398 | os.makedirs(dir_path) 399 | logdir = dir_path 400 | if not (os.path.exists(logdir)): 401 | os.makedirs(logdir) 402 | 403 | env = gym.make(env_name) 404 | ob_dim = env.observation_space.shape[0] 405 | ac_dim = env.action_space.shape[0] 406 | 407 | # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1. 408 | policy_params = { 409 | 'type': 'linear', 410 | 'ob_filter': hp.filter, 411 | 'ob_dim': ob_dim, 412 | 'ac_dim': ac_dim 413 | } 414 | 415 | ARS = ARSLearner( 416 | env_name=env_name, 417 | policy_params=policy_params, 418 | num_workers=hp.n_workers, 419 | num_deltas=hp.n_directions, 420 | deltas_used=hp.deltas_used, 421 | step_size=hp.step_size, 422 | delta_std=hp.delta_std, 423 | logdir=logdir, 424 | rollout_length=hp.rollout_length, 425 | shift=hp.shift, 426 | params=hp, 427 | seed=hp.seed) 428 | 429 | return ARS.train(hp.num_episodes) 430 | -------------------------------------------------------------------------------- /notebooks/Homework 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Homework 4: Believer-Skeptic Model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 5, 13 | "metadata": { 14 | "slideshow": { 15 | "slide_type": "skip" 16 | } 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from __future__ import division\n", 21 | "import ADMCode\n", 22 | "from ADMCode import visualize as vis\n", 23 | "from ADMCode import believer_skeptic\n", 24 | "\n", 25 | "import numpy as np\n", 26 | "from numpy.random import sample as rs\n", 27 | "import pandas as pd\n", 28 | "import sys\n", 29 | "import os\n", 30 | "\n", 31 | "# from ipywidgets import interactive\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "import seaborn as sns\n", 34 | "import warnings\n", 35 | "\n", 36 | "# Temporary for now until push changes to PIP \n", 37 | "#sys.path.insert(0,'../ADMCode')\n", 38 | "#import believer_skeptic\n", 39 | "\n", 40 | "warnings.simplefilter('ignore', np.RankWarning)\n", 41 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n", 42 | "warnings.filterwarnings(\"ignore\")\n", 43 | "sns.set(style='white', font_scale=1.3)\n", 44 | "\n", 45 | "%matplotlib inline" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "## **Question 1:** " 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "slideshow": { 59 | "slide_type": "slide" 60 | } 61 | }, 62 | "source": [ 63 | "**Answer the following questions about the relationship between the system of equations below.** See the Lab 4 notebook for definition of terms. " 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "slideshow": { 70 | "slide_type": "fragment" 71 | } 72 | }, 73 | "source": [ 74 | "\n", 75 | "* **Eq. 1**: Go process. $$G_{j,t}(\\tau) = G_{j,t}(\\tau - \\Delta \\tau) + \\upsilon ^G _{j,t} \\Delta \\tau + \\epsilon^G_j (\\tau)$$\n", 76 | "\n", 77 | "* **Eq. 2**: No go process. $$N_{j,t}(\\tau) = N_{j,t}(\\tau - \\Delta \\tau) + \\upsilon ^N _{j,t} \\Delta \\tau + \\epsilon^N_j (\\tau)$$\n", 78 | "\n", 79 | "* **Eq. 3**: Execution process. $$\\theta_{j,t}(\\tau) = [G_{j,t}(\\tau) - N_{j,t}(\\tau)] \\cdot cosh(\\gamma \\cdot \\tau)$$\n", 80 | "\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "slide" 88 | } 89 | }, 90 | "source": [ 91 | "\n", 92 | "**1a:** Describe the three components of Eqs. 1 & 2 in laymen's terms.\n", 93 | "\n", 94 | "* **Answer 1a:** \n", 95 | "\n", 96 | "\n", 97 | "\n", 98 | "**1b:** As time ($\\tau$) progresses, how does the exponential term in Eq. 3 ($\\cosh (\\gamma \\cdot \\tau)$) influence the nature of the competition between channels?\n", 99 | "\n", 100 | "* **Answer 1b:** \n", 101 | "\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "## **Question 2:** " 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "**Answer the following questions about the relationship between the system of equations below.**" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "* **Eq. 4**: Action value. $$q_j(t+1) = q_j(t) + \\alpha \\cdot [r(t) - q_j(t)]$$\n", 123 | "\n", 124 | "* **Eq. 5**: Greediness. $$p_j(t) = \\frac{\\exp{\\beta \\cdot q_j(t)}}{\\Sigma^n_i \\exp{\\beta \\cdot q_i(t)}}$$\n", 125 | "\n", 126 | "* **Eq. 6**: (Reward) prediction error. $$\\delta_j(t) = p_j(t) - p_j(t-1)$$\n", 127 | "\n", 128 | "* **Eq. 7**: Update rule: $$\\upsilon^{G/N}_{j,t+1} = \\upsilon^{G/N}_{j,t} + \\alpha^{G/N} \\cdot \\delta_j(t)$$" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "**2a:** How is the estimation of the prediction error (Eq. 6) different than the normative form of the update rule in q-learning?\n", 136 | "* **Answer 2a:** \n", 137 | "\n", 138 | "**2b:** In the Believer-Skeptic model, the Go & NoGo processes have different learning rates (i.e., $\\alpha^G$ & $\\alpha^N$). What biological justification is there for these two pathways having different forms of learning?\n", 139 | "* **Answer 2b:** \n", 140 | "\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "## ** Question 3: **" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": { 154 | "slideshow": { 155 | "slide_type": "fragment" 156 | } 157 | }, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "0.62884\n" 164 | ] 165 | }, 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "0.0 18\n", 170 | "1.0 13\n", 171 | "2.0 33\n", 172 | "3.0 36\n", 173 | "Name: choice, dtype: int64" 174 | ] 175 | }, 176 | "execution_count": 6, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "# Define the DDM parameters as an object to pass\n", 183 | "p={'vd':np.asarray([.7]*4), 'vi':np.asarray([.25]*4), 'a':.25, 'tr':.3, 'xb':.00005}\n", 184 | "\n", 185 | "# Learning rates on the Go (direct) and NoGo (indirect) pathways\n", 186 | "aGo=.1\n", 187 | "aNo=.1\n", 188 | "\n", 189 | "# Run one simulation\n", 190 | "igtData = pd.read_csv(\"https://github.com/CoAxLab/AdaptiveDecisionMaking_2018/blob/master/data/IGTCards.csv?raw=true\")\n", 191 | "\n", 192 | "outdf, agentdf = believer_skeptic.play_IGT(p, feedback=igtData, beta=.09, nblocks=2, \n", 193 | " alphaGo=aGo, alphaNo=aNo, singleProcess=0)\n", 194 | "\n", 195 | "print(agentdf.rt.mean())\n", 196 | "agentdf.iloc[:, :].choice.value_counts().sort_index()" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "The Iowa Gambling task has two general metrics for estimating performance of the agent.\n", 204 | "\n", 205 | "
\n", 206 | "\n", 207 | "**Payoff (P)** is the degree to which the agent chooses the High Value decks over the Low Value decks. Th\\\n", 208 | "is is a measure of efficient value-based decision-making.\n", 209 | "\n", 210 | "P = $\\Sigma (C + D) - \\Sigma (A + B)$\n", 211 | "\n", 212 | "**Sensitivity (Q)** is the sensitivity of the agent to High Frequency rewards over Low Frequency rewards.\n", 213 | "\n", 214 | "Q = $\\Sigma (B + D) - \\Sigma (A + C)$\n", 215 | "\n", 216 | "(In the simulations above Deck A is choice 0, Deck B is choice 1, Deck C is choice 2, and Deck D is choice 3)." 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": { 222 | "slideshow": { 223 | "slide_type": "slide" 224 | } 225 | }, 226 | "source": [ 227 | "**Q3:** From the agent dataframe (agentdf) run in the code cell above, calculate P & Q." 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 7, 233 | "metadata": { 234 | "scrolled": true, 235 | "slideshow": { 236 | "slide_type": "fragment" 237 | } 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "# CODE FOR ANSWERING Q3" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "## ** Question 4: **" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 8, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "# Learning rates on the Go (direct) and NoGo (indirect) pathways\n", 258 | "aGo=.1\n", 259 | "aNo=.1\n", 260 | "\n", 261 | "outdf, agentdf = believer_skeptic.play_IGT(p, feedback=igtData, beta=.09, nblocks=2, \n", 262 | " alphaGo=aGo, alphaNo=aNo, singleProcess=0)\n", 263 | "\n", 264 | "## INSERT CALCULATION CODE FOR PAYOFF & SENSITIVITY FROM QUESTION 3 HERE\n", 265 | "## TO ANSWER THE QUESTIONS BELOW" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "(To answer the questions below, you may need to repeate several runs of the code above in order to see stability in Payoff & Sensitivity scores). " 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "**4a:** Change $\\alpha^N$ (i.e., aNo) above to 0.025, while keeping $\\alpha^G$ (i.e., aGo) at 0.1. How does this impact the Payoff and Sensitivity scores?\n", 280 | "* **Answer 4a:** \n", 281 | "\n", 282 | "\n", 283 | "**4b:** Put $\\alpha^N$ (i.e., aNo) back to 0.1, while reducing $\\alpha^G$ (i.e., aGo) at 0.05. How does this impact the Payoff and Sensitivity scores?\n", 284 | "* **Answer 4b:** \n" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": { 290 | "slideshow": { 291 | "slide_type": "slide" 292 | } 293 | }, 294 | "source": [ 295 | "## **Bonus Problems** \n", 296 | "\n", 297 | "Full credit is only given if the instructor can run your modified code below.\n", 298 | "\n", 299 | "**BP a:** Use the process simulation code below to visualize how varying the drift rate of the Go ($v_d$) and NoGo ($v_i$) processes impacts the dynamics of the four choices. \n", 300 | "\n", 301 | "* **Bonus Answer a:** *copy/paste your modified code into a code cell below* \n", 302 | "\n", 303 | "**BP b:** Write a set of nested for-loops to simulate a set of agent runs with $\\alpha^N$ values ranging from 0.025 to 0.15 (in increments of 0.005), keeping $\\alpha^G$. Simulate 100 runs per value of $\\alpha^N$ and report (or visualize) the average Payoff & Sensitivity score. Report how these values are impacted by different levels of $\\alpha^N$. \n", 304 | "* **Bonus Answer b:** *copy/paste your modified code into a code cell below* \n", 305 | "\n", 306 | "\n", 307 | "**BP c:** Repeat the simulations from Bonus Problem b above but now increase $v_i$ to 0.5. How does this change the results?\n", 308 | "* **Bonus Answer c:** *copy/paste your modified code into a code cell below* " 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": { 314 | "slideshow": { 315 | "slide_type": "slide" 316 | } 317 | }, 318 | "source": [ 319 | "## ** Process Code **" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 9, 325 | "metadata": { 326 | "slideshow": { 327 | "slide_type": "fragment" 328 | } 329 | }, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "2\n" 336 | ] 337 | }, 338 | { 339 | "data": { 340 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD6CAYAAABNu5eFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXl8G+WZ+L+W70s+Y8eOczjX5HZOICEthJS2CWdvKAtt\naWlpt3TLtnRbdvvb7QWFZbst2y6wbSlle2wpLeUulCScIRACuUgyuW/biS/JtnxLvz8evZnRWLJl\nW7Zk+/1+PvpImhnNjEaj533e50wKBAJoNBqNZnzgivcJaDQajWbk0EJfo9FoxhFa6Gs0Gs04Qgt9\njUajGUekxPsEImEYRgpQAZw0TbM73uej0Wg0Y4GEFfqIwD+yYcOGeJ+HRqPRjDaSIq3Q5h2NRqMZ\nR2ihr9FoNOMILfQ1Go1mHBGVTd8wjCXAA8B84ABws2maW8Js9y/A5wE3sB34smmau4PrfgrcBHTZ\nPjLPNM3jQ/oGGo1Go4mafjV9wzAygCeBXwH5wL3AE4Zh5Di2+zRwA3AxUAy8ADxtGIY6xhLgOtM0\nc2wPLfA1Go1mBInGvLMG8JumeZ9pml2maT4I1ALrHdsVAz8wTfNwMMTyJ8AUoCIo+KsQ7V+j0Wg0\ncSIa884cYI9jmRlcbi0wzXsc21wJ1AMngVlAJnCPYRgXBpd92zTNpwZz0hqNRqMZHNFo+tmAz7HM\nB2RF+oBhGBcB9wNfMU3TDxQALwJ3A+XAd4BHDMNYOIhz1mg0Gs0giUbT9yFaup0soCXcxoZhXA/8\nN3CLaZq/Awg6fdfaNvuLYRgbgMuBXQM9aY1Goxlr7N0Ljz0GM2fCU0/BtdfCunWxP040Qn8v8GXH\nMgP4nXNDwzC+DXwVuMo0zY225WuBmaZpPmDbPANoH/AZazQazRjk/POhuTl0WbyE/kYg3TCMWxCT\nzfVAKfCcfSPDMD4D3AqsMk1zn2MffuA/DMPYA2wGPgGcD3x6SGev0Wg0YwS7wF+2DB56aHiO06/Q\nN02zwzCMdYjAvwM4CFxpmmarYRjPAq+YpnkH8C0gF3jLMAz7LlaYprnJMIx/AB5EbPomcIVpmqdi\n+3U0Go1m9NHusHmsXw+uYUqdTUrUdomGYUwjWHCtoqIi3qej0Wg0w8a2bbB8ufX+0UfhIx8Z0i51\nwTWNRqNJRA4cCBX4AIsWDd/xErm0skaj0Yx5nnlGnpOS4I474NAhmDFj+I6nhb5Go9HEke3BOgXv\nvgtz5w7/8bR5R6PRaOLIjh2QkQGzZo3M8bTQ12g0mjjR1SUa/oIFkDJCdhct9DUajSZOmCZ0dsLi\nxSN3TC30NRqNJk4oe35V1cgdUwt9jUajiRM7dsizFvoajUYzDlBCfzjj8p1ooa/RaDRx4Nln4W9/\ng8pKyMsbueNqoa/RaDQjTE8PfOxj8nrVqpE9thb6Go1GM8IcPAitrTBnDjzwQP/bxxIt9DUajWaE\nUbb8z38esrNH9tha6Gs0Gs0IE49QTYWuvaPRaDSD4Lnn4Nvfhu7u0OVZWXDVVfCHP4DfL8u++EW4\n8EJLs/cFu45roa/RaDSjhP/5H9i6FXJypEImiIPW54PXXpP3ubnSEevHP4aTJ63lAJMmQVHRyJ+3\nNu9oNBrNINixQ4S212s99u611k+ZIstWrZJyC2+8Efr5kSy9YEcLfY1Goxkgzc1S976qytLyASZP\ntl4r083ixTIDeO650H3Ew7QDWuhrNJpxSGcn1NaC6hYbCFj2975ob4fqanjpJXnvFNz2AUA1QrFv\nc+ml1mst9DUajWYECARg4UKYOBFuv12W/fM/Q2oq1NdH/lxbm2TPlpfDFVfIsnCCW5VUmDJFnu1m\nHPv2CxYM/jsMBe3I1Wg044pjx2D/fnn9/PNw553yAHG0Xnll+M/t2gU1NTBvngj2vDz40Id6b/f4\n45Jw9aUvyfvly+G22+DMGbj5Zml4/uqrI9MlKxxa6Gs0mnGFSowCaWDS2Wm937cvstBXn7v1Vvjc\n5yLvf9o0axABcLng7rut9zNmwAUXDPi0Y4Y272g0mnGFEt4TJ0JHhxQ+c67r63PxssXHCq3pazSa\nMc1TT4lZRvHMM/L8qU/BXXfBPfdY67a/7OX3dzXRnlNMijuLD31I4vBBhL7LFT9bfKxICij3dYJh\nGMY04MiGDRuoqKiI9+loNJpRyPbtsGRJ7+Xl5fCnP8HKlday5GS4uOcFMmmjjmK2sJI77oBvfUsi\ne/LzoaIC9uwZufMfAkmRVmhNX6PRjFm2bpXnL34xtITx0qXiSH32Wairg8JCWDi7g4P/3QaAr8vD\n5T+1Pn/0qMTmj3bTDmihr9FoxjDKDn/jjRJF4+SDH7S9Oetl8hp5GQh0UfE7Hzt2ZIXsJ15ZtLEk\nKqFvGMYS4AFgPnAAuNk0zS1htvsX4POAG9gOfNk0zd3Bde8DfgxUAm8DnzVNc38svoRGo9GEQ9nh\n58+PYmOvV57z8kjyeLhgnpdHX83C6x07TlyIInrHMIwM4EngV0A+cC/whGEYOY7tPg3cAFwMFAMv\nAE8bhuEyDKMU+DPwLaAguO4xwzAi2p00Go2mP+6/H66+Gp54Qt5v3y4dqb7xDfjP/5R4eMOAzMwo\ndubxyHOwlsIHi7aSTQtXXw0PPiirRkTonzghdqXq6mHZfTSa/hrAb5rmfcH3DxqGcSuwHnjEtl0x\n8APTNA8DGIbxE+B7QAVwGbDdNM0ng+u+D3wVWAG8GYsvotFoxhd+P3z969KB6tgxia//6U/h0UdD\nt1u/Psoder2QkiLlL3fvZulSmPbEcTZtmgeIaWfixNh+h7Ds2SPJA2lpUFYW891HI/TnAE5/tRlc\nbi0wzXsc21wJ1AMnnfswTbPHMIxDweVa6Gs0mgFz6JAIfBA52dXVO87+mmtCQzIj4vdDSwsUFIiw\n/cAHWMJzvLXRS3vQjp+bG1pbZ1hobxeBX1w8bNOKaJKzsgGfY5kPyIr0AcMwLgLuB75imqZ/MPvQ\naDSavlDdp0Dk5LvvSqkEO+99b5Q7a26Wojxut7xPS4OsLDI6POTnS7hmcnJMTrtvlF+hsHDYDhGN\n0PcBTotYFtASbmPDMK4HngJuMU3zd4PZh0aj0fSH0uqvukqeH3lEMmzVexhAtI2y5yuhr153dor2\nPVLYnMnDRTTmnb3Alx3LDOB3zg0Nw/g2Yqu/yjTNjY59fMy2XTIwk95mI41Go4mKHTtgGW/xjXk1\nnHi8jG131vEBAnxw1SoOPX6MqRyjislAGDNJTw+8/LJlH1JJqnahn5cnqbzbt0tPxFWrJBQoHPb9\nJSdLcZ2CAtnvli2iuRtG31/o+HGrC4v9PGJMNEJ/I5BuGMYtiMnmeqAUCGkJYBjGZ4BbgVWmae5z\n7OMx4C7DMD6MzAK+hdj63xna6Ws0mvHKzu1+LiqoYcXyAJ9YfZq6OonSuWrlGaZ99RRNZwJkNZ2G\nwKLexniPR2z4mZlWaE9mZqiGrQTv2bPWZwoKwp+M2l9qqjgXamtlW59Psr98vv6FvorWmTRJGu0O\nE/0KfdM0OwzDWIcI/DuAg8CVpmm2GobxLPCKaZp3III8F3jLCP1yK0zT3GsYxlVInP6vkRj+D5um\nmZg1IDQaTULT0ABNJ5uZvjRAaqqEaJ7DVUvZJV3yurtbCuE7hagyo8yZI7UVwuHUtpubIwt9tb9Z\ns8SrrN6rZ59PziWlD5Hr9UJGhqQLDyNRJWeZprkTWBVm+Trb69n97GMTYedZGo1GMzB27IA8PFRW\nIoK0u1tWuFwyImBb7vH0FvrhbPhOIn0mHEq4T5gggltta/+M1xvZQat8ByUlkY8RI3QZBo1GM2rY\ntk26Wz35JLjxMm0aoqkfPSob5OdbQl8tr63trWE3NsoAkZND1DQ0WKYeRXq6PI4ft/bndkvHlPp6\nazCAyELf6xUTEAyrA1ehhb5GoxkVvP12aP2clXiZMTNJSmYePSqauV3oT5kiy0+ckIeTvLzIjllF\nYaG1P69XnLJOUlOtcE+XS/Z75gxs3iy+hKQkWR9uptDSYjXcVec0zGihr9FoRgWvvy7P114rNe0v\naPJSYWRDURGsWCECPylJzCvKKbt8uQjWcERjSlmxQrTw7GwR5HaamiS6pyvoP1DJVNOnw8mT4ksI\nBCTRqqEhVOtXNDbKc1mZbFda2v85DREt9DUazahAxeV/85uwaKYPNnSBe4IstNdHmDHDej3UMgZp\naTKTgN5aeH291Z2lstJan5YmTXTfeEPe5+eLzV4lgNkjidRAMH36sCZk2dHtEjUazahgxw6xpMyZ\ngyUshzGevV+ciVx9rcvLk1h+lRegiMP30Jq+RqMZVk6cgHvvDW1AnpwMN90kjUwUf/wjvPYa/N3f\nieL74IOS4/Too6Ig79wJ8+aJIj0Smav9kppqvXYK7YyM0HUdHfJ6714xPeXkSAd1j0dMRykpMiAc\nPWolipWViekqxmihr9FohpV77w1f9Ky6Gn7/e3nd3Q3XXy+ycdcu8ck+9VTvz5yrpZMImj6IX+DM\nGanG5kQ5gXNypKAbhDbrdbvFH1BcLO8PHAh1OHd0aKGv0WhGH+8E8+5ff91Kfn3ve0MLppmmpQxv\n327JSICZM0Xbd7lsMwOPR1R+u0YdD1askJMNV41t5UpZl5QkM5L3vU+E/MmTUiJUCXg1cHk8sp/V\nq+X9QMJJB4AW+hqNZtgIBMQWX1kpphrFokUS0ejziVZvL4msIiQVy5c7qgx3d8sHlYYcT1yuyGGf\nznWq5MOECb2Fvirt7HYP++xFO3I1Gs2wUV0tEY/OapeLF4uc271b3iuhv3Zt7330qpSZKKadwaLO\nW9nu8/JE4Pv9Ok5fo9GMTm6+WeqLqTJc5zT1d9+F2lo+mAa7cHHD5YsonFlIWetBFtDGDTcsZMOG\n0H1NmRJ80dUlKbnKgRpPJ+5QUFm8HR1WYpdK0BqBgUwLfY1GE1POnoUHHpDXd9whz1VViCYbLJdw\n3tJU5k/poKvxFJtfL+Ry9nLeBLhy3RyWLUvF45Fet2++CVdcEdzxsWOhZRBGq6YPMHWqlG6oqJAy\nEQpde0ej0Yw27Pb5F1+U56oqLBPGlClM+OACfpb3LFtMLxd9XTy4lZWQ7/Ly1lsRIlbsMZ8DrZuT\naNinQeqCXXzxsJZUVmibvkajiSl2of/886KQT5tGqC0+ORmys5k90UseUpOmspLwpQoUzc3W6+zs\n/uvmjBa8Xvku2dkjcrgxctU0Gk1fdHaGhkUOio4O0dbD7ETJ6u5u+POjfrJpIQk/yXRTtShAUk93\n74SqvDwKcruZiMSuV1YioTstLdbDrt3bB4SxIvADAfleubkj9p20eUejGQdcfTU8+yxceqlo3wPG\n45F2gCBx52vWnNNMX3xR3j78MGzcCIEtW1hDPa1kk00rHyzKgGfbg6m0WLZ4t5ukpFOUcxoIzgZO\nn5aHwuWSkB6XK7RX7Wi259tRJi9dhkGj0cQKv9+S16+80n8Dp7DU18tzdraUC2hoOCf0/+u/ZNXt\nt0Nqsp95NHDb1+Hs2Vbq6uCqy4LCurNTbNbq4EFB97P/7GL/4RTKL1sYGqTf3CzvGxutz0yfLhEv\n06cP8AskKLr2jkajiTWHDll1vtrbJdvfXvMmKpRwmjlTjPY2U4uqLOzxQFJzM0sXB6xyCU7CFCmb\nMQNmrHBLJIu9deHZs1K/3uOxwjQLCqyql2OBONQQGiOGMY1GEwlV7mDy5ND3A0KVCFClim1Cf98+\neW5ulm5WlZVY9Rac2IVbRobEq0N4TVct83oTo8DacKA1fY1GE2tUNM0NN8APfgBvvSWmZJcLPvWp\nfkw9jY3yCJYIONuUyqvPZxPo8ND41GH8fug5AJXBzUupFctLRYVMKZw4C5O53aLRhxN66ekyMDQ2\nyoCTnDwiIY0jiscjA6S9Yucwo4W+RjPGcQr9H/3IWldUJE7eiGzdakXrFBRw552w6acFVHASeBeA\neUBmBrQFTfez5iRHFvpOTb2wUIR+QUH44+fnW5Upi4pCG5CMdjo65DEC3bLsaKGv0Yxxtm8Xq8zs\n2VLS4Phxa922bX0I/fZ2EUqFhWLLLypi2zZ4lwV8/5flpCRL7ZjkZCk2+c47ItPLV2dL4tQll4i2\n3toqTt/29t6a+syZcnLhShODZHVNmSKhjZEGhtFKnGoIaaGv0YxhGhqkku+6dfK+qipU6NsTqXqh\nhNKECVBaeq5i5nQjlctu7K2dXmw4FqhkI6Xdh8ugdbkiC3yQMM8R1oRHjDj5KbQjV6MZwyihrgqe\n2UsUT5zYj1PXI5myShM9dkwWhZQ51gwex/UdKbTQ12jGKAcPioUFrPLE9mjHqiop6f6978l2Dz3k\n2IHD/OAcQDRDxOsVL/oIO6e10Ndoxii//a31WsXNf/Sjktf08MOW8P5//w82bYLvf9+xA4dQUkK/\nV317zcBRTVNyc0fcOa1t+hrNGEUJ6RMnrPB61bQJrP60ikOHJNY+Nxfo6REHrM15qkxBWtOPAc3N\n4pyOQ96B1vQ1mjHKjh3SUXDSpPDrwwnvXbuCL8IIpR07JGpyLCXExo04dv+KStM3DGMJ8AAwHzgA\n3Gya5pY+tv8J0GWa5tdty34K3AR02TadZ5rmcefnNRrNwGlvtzrwNTfD4cNSqyyS9WD2bOv1Rz8q\nzcfffBNWraKXUPJ6+9+fZgDEyYkLUWj6hmFkAE8CvwLygXuBJwzD6BV/ZRhGkWEYDwFfCbOrJcB1\npmnm2B5a4Gs0MeDuuyWxMytLHirKsS9TjD0T94Yb5PnWW4O+AIdQUjMAbdqJEQmu6a8B/KZp3hd8\n/6BhGLcC64FHHNu+CrwG/Mm+0DAMF1AFDKbqh0aj6YcnnpCQ9/e/31qWng6f/Wzfn3vqKSnLcNll\ncNVV8Pjj8PTTcN00r6j0wRh6bc+PMV6v5DEkJ4/4oaMR+nOAPY5lZnC5k7WmaZ4Oavt2ZgGZwD2G\nYVwInAS+bZrmUwM8X41G48DvF3v7nDlSM38gXHaZPAAee0yqHmx/J9jYIyfnnFDS4ZoxpK1NSpNO\nmBCXw0fjyM0GfI5lPqBXcKlpmqedy4IUAC8CdwPlwHeARwzDWBj1mWo0mrAcOSLRf0MNpUxKgkWL\n4ITpo6O1O8T0sGOH1AQbcElmTW/iaM+H6DR9H6Kl28kCWqI9SNDpu9a26C+GYWwALgd2hf+URqOJ\nhkFp4YGACJ/8fPD55HVBAVVVGRx81cszz0Dleje5eVJTZ9cumDfPan6lGSBer9VAJo72fIhO6O8F\nvuxYZgC/i/YghmGsBWaapvmAbXEG0B7hIxqNJkoGJfT37pXA/GXLpBqm1wtFRSxfvoq/4eXBX8Gb\nD+XRmGI1SVm2LOanPj5oaYGXXpL42ZUr494bIBqhvxFINwzjFuB+4HqgFHhuAMfxA/9hGMYeYDPw\nCeB84NMDOluNRtOLQTlZjx2T59OnLSHU1MS11wQoOuzl3Q3w/GY3XV1iNvr85+FDH4rpaY8flDmn\nrk6evV6xlWVkxOV0+rXpm6bZAawDrgUagFuAK03TbDUM41nDMG6PYh+bgH8AHgS8wNeBK0zTPDWU\nk9doNKLpl5ZKAbWoUcH29p60PT2kd7dyxXs9rF2fTifS1Wr9evjiFwe4f41Fc7P1uqtLMp3j2AEs\nquQs0zR3AqvCLF8XYftPh1n2S+CXAzw/jUbTB01NorTbQzX7padHuqOD1SDF7RYNtL4e2tqorLIi\nS3StnSGiNH2A6mp5jpM9H3TtHY0m5vh8cOedUrnyt78VGQsSR/+FL8B554Vu/4tfgGHArFnw3e9K\nRN/s2VBZKVGTr70mcuPKKyVr1r4/FU1TVYUIbJ/Paobb0QH798sHsrPlAIGABOar1F1FRQXs2SOl\nOYGiSkso6TDNQXL2LJw6Je0eFaqbmBb6Gs3Y4TvfkQzZXlUrgdpaSYhSnD0LN90kr++8E+67r/dn\nFH2tW7wY2LxZ3kycKDbjEyfg6FFro/JyqdVw5oy8z8uT0SQtTYT+/v0yaABJE4q5/HJ48UWYMaO/\nb6wJy549oZE6zc1yfZOSpBtZnNBCX6OJMfbWsLm54mhNSoLVq3t3qrK/f+stef7MZ+BXvwq/b+f+\nTgczY6oW+uFocKPmZhEqyqwwebIMAF6vTCMA5s+XGsttbTJApKTApZdCZ6ckZKWn85e/SOJXHJJG\nRz+qdHJeHixfLjUyurrErJaaOqKN0J3oKpsaTYxRQRogyU7Tp4upZvFiaV1YX2+tt3eu+tOfREZ8\n4hOR9+3cn8IotzkLlbBXUSKqzKbHY2meJSXynJlpFeFRtfPTxYGbnBxX2TS6aWkRwZ+fL9c0KUlm\nVFlZcb+oWuhrNDEkEICdO633dtOIso3btXun5r9oUXjHqWo3a9/f/PnW6xSf13rj9Vr18HNzLfux\n1yuCPznZ2qFmeIhz1m1faPOORhNDVB9Zha0HyTmhf/XVYl7fuLG30F+82N4HPMBKXiebVtpKF/Da\n4bKQ/anSLVlZWBo8yHSiutqqh5+eLo8zZ2RZfr6ujzxU6uslVTk1VWpRO7X3OGfd9oUW+hpNDFHm\nmmuukaCNb33LWnfppfCe90girGlKNcu9e0XQl5SITP7kJ2XbH/0Iag80M9GsZ/FiKF92iq88XBay\nv5tukkTPb30LS8hMm2bZlzIzLdPO9Oli109Kkm00Q6O6WvwhbW3yQytzmUILfY1mfKA09xtugHWO\nLJbCQnj5ZQnBXL1a2hV2d8MFF/SOzLn1VuCkF94JLsj28te/hm6Tn2+LBPqrR0w2CyPUMJw5Ux6a\n2OB1mNOcQt8T/D1SEk/Eapu+RhNDoqmDs2iRPG/a1M+2yk7kcol9XiVUOVGlehNQqxyzeL3yu0Co\nPQ8kLDaBfw8t9DWaGKL60qpG5OHIzRVriyKs0O/ulsQekBh6sLI5nai4zTim9o8r1CBbWiqavMcj\nqdFNTTIYNDXJdgkq9BNv7qHRjFIG0kd2yRLZFiJYZLZulYza7GzpRn78uDgMsrNDE3tqayUJCBJW\nyIw5lGaflyd5DfX18Mor1nrl1E3Q30MLfY0mRqhQzWhq1XzvexJ+uWSJlFoIIRCwUverqsR4f/So\nLGtoCBX6Kui/pKS3XVkzPNidtCUl1owsEJCRXNWiTtCZlxb6Gk2MGEhd+7lz4a67IqxsbZU4+8mT\nRcsHWLoUNmwIdSCC9X7pUh2GOVLY6+FnZIQK91OnZIaWmirRUwmItulrNDEiZs3DwyX2qMzZcEI/\nMzPuWZ7jCq9XsmvD1cNXSW8JKvBBa/oazaDo6oKf/czy2QG88ILI3jlzggva2iQ2PidHIj28Xomb\n7ysbtr3dshPZhX5SkrxvbJQgfxBzQkeHLnQ/khw/LjOx4uK+t0vgWZcW+hrNIHj66WAsvYNVq2x9\nZE1ThL4dr1cKcEXi4EErNNNpEy4sFJv+/v2hy+1puprho6PDsuFFuuZTp8pvlMAJcFroazSD4O23\n5flHPxJnrELF4AO947cjLQu3/uKLe5tsDEPCBO218JOSxNGrGX7UbzNhgjQ8CEdFhQwICVzbSAt9\njWYQKIXv2msjWFdUaV0nPp9o8uEyNQMBmQnk5srDicsV1zrs4x57qQtXH+7QBBb4oB25Gs2g6Lcv\nrSqtaxfS6rXTGatQA0KCxnePexK4ns5A0Jq+RhOGb35TZvHPPy++1SuvhD/8Qczxd9wRRV9aJSDK\ny63m4xUV8trjsQYA05QEK7Bs+aNcqIxZPB6r58AoRgt9jcZBfX3vGPqXX5bnrVul9y30k4Rlz9qc\nNk0EunL+qQEhEBDHbSBgmQvS03WSVSKi+hOMAae5FvoajQNnjXsnDz8sz33G49tNAUqr9/ut0E2w\nTEBTpuju44lOc7MMzmNgFqZt+hqNg/6E/pNPynO/Qj8rK9Rh63KJg9brtZy2MCYEyZjHnoU7ytFC\nX6MJ0tgo5vU33wy//qqrrNfp6WDM6JbYbefD65VCXOEEhNst2n1jo1VfRwv9xCeB2x8OFG3eGWvs\n2AE1NbBmjaSITp9uSxHV2LngArGsPPII/PGP8PGPh99u7lzpcHXRRXJJW1vh/LleUl54OTRm3kk4\nAaGWvfZa39tpEguvV3IiwoXSjjK00B9rHD8uzydOiPPpwAEt9MNw9iy88Qbs3i2K9/PPy/LLLpMw\n67Vrpevg2rUSxfPAA/ClL4l8/tvf4OYP1IvALywMX4MlOVkKpjmZNMlqXA6SWKXr5iQ2yhSXnS2/\n6yhHC/2xhCrpCla5V5CbNoFrgcQDZbdvbZWetTt2SPmExx4LL4NVNM9nPysPdnjhOJKCOxDtLz09\nutrLmsShrU2ir8aAPR+iFPqGYSwBHgDmAweAm03T3NLH9j8BukzT/Lpt2fuAHwOVwNvAZ03T3B9h\nF5rB0Nxsvban+7e2hinaPr5RFTEBtm2DXbtg3rwBKN0ejzhm9XUd+4wxh3u/jlzDMDKAJ4FfAfnA\nvcAThmH0utsNwygyDOMh4CuO5aXAn4FvAQXAC8BjhmFo9TNW9PSI4Tkc+/aJDUNzDnuEzqOPSgJW\n1FGTgYAMsLm5egY1HhhDTlyILnpnDeA3TfM+0zS7TNN8EKgF1ofZ9lWgG/iTY/mHge2maT5pmmYn\n8H2gHFgx+FPXhHD0qJX5qVACqbo61NyjOWfOASsEM2qri4qvHyPTfU0/jDdNH5gD7HEsM4PLnaw1\nTfNzgLPSVMg+TNPsAQ5F2IdmMKjC7lVVkjK6ZIl4IQ0jdL2Gjg6ZFC1bJlUSOjtledSa/hgTApp+\n6KtpyigkGqGfDfgcy3xArwIUpmmeHuo+NIPE4xGD9JQpEmVQUSHde2bMEI0/UpGvcciePeKXq6oK\nFfRRC/0xNt3X9EFXlxTCG0OzumiEvg9w9v7Korc2P9z70ERC1QUJJ4SSk2UQUFmgmpBetkrQV1QM\noGqx1vTHDyo4Ygz91tFE7+wFvuxYZgC/G8Bx9gIfO/dhw0gGZtLbbKQZKH4/bAkGUkW6MfPyxA79\n9tvjuoF2TQ3ceaclsxcvtupnDSiKUvelHb2cOWPlsoD8F2bPtsJuT56UZ49H7IDp6fJ+nAn9jUC6\nYRi3APcneEzIAAAgAElEQVQD1wOlwHMDOM5jwF2GYXwYeAqJ4jkJvDOw09X0or7ecuBOmBB+m5IS\nceSePg2zZo2pG3ggfO5z0uYQ5L++cKFYwwoK4IorotyJKrWg+9KOTkyzt38rNVXyLQIBeMchkrq6\nZP0Y+s/0a94xTbMDWAdcCzQAtwBXmqbZahjGs4Zh3B7FPmqAq4B/BeqB9wEfNk1T2xuGil1tLS0N\nv01FhdQSgP7b9Y1h9u2zXs+cKVYvVe7+85+PcifatDN6sYfafuADcOmlof4un9PtiMwMxlg+RlTJ\nWaZp7gRWhVm+LsL2nw6zbBOg68fGGiXE+zNI99e1aRxgT1gedFKsduKOXlpbxf+Vl2fF6+bk9K56\nqggEROjPm9d3e8RRxtj5JuMVrze6bj5KSI1Tod/SIuWIFAsXDnJHWtMfvYQrj+x2y0Dg8/WeBefk\niKag7PpjBF17ZzSjmm/n5/fvnE1JEXtGXZ1l2Ha5pP9fJF/AGKCnBy68UHzY9uCl8vJB7jDaQVaT\nGHR2Stuzjg7rBrAP2Hl54u/yeq1BoadHKvEVF8v73bslZTs9XXxoSUniA6iogP37pahhSQmsGB25\nplroj2ZUN59oY4hnzrQiF3p65Cavrh7TQv/QIammWVoqeWrXXAObN8O11w5iZz09MsgWFIzbCKhR\nR0ODFEzLyhKhnZ4e2vJQDQAej/wf0tNl24oKKCqSkOfSUmtASE0V7b+6WrY5dUqUr5oaSf5ISXyR\nmvhnqInMQE0NU6bIA+RGfeaZMW/uUYXVbrsNvvY1ef3FLw5yZ2OoZd64QZlsFi4M33tY/Zb19TI4\nlJSIht/YKLPgsjJRlFRyx5QpEtapymO3tlr7am4eFT10tU1/NDMUp6Kzdd8YRf1XY1LNeAy1zBs3\n9KcYpadLeQUV9ux29/5f2T/rdsvD55PPBAJWjf1RokBpoT9a6ey0TDWD1TyVE+v06VCNZQzg88Gm\nTWLOhRj0He/pgSNH5LXW9EcPymTTV90cp1B3+m3s/RKU0AcpcghWsxyPRwaBxkbxnalHR0fMvk4s\n0Oad0crWrSKIcnIG380nL0+mqm+/LZr/+98/ZrJMb7sN/vu/5fXkyZZPbtDs2WNpcmOgZd64QNXN\n6c9nlZcnoZkgv60zOML+/8rJsWZ6NTXyXFEBx47J/VFbK/9NO2639NpMELTQH434/VZW4ZIlg9/P\n5MlWLHJdndy0RUWxOcc4s3mzKHe33y7tgoeMmv6vWDEmWuaNC6KtmzN9umj2GRnyfwgXHHHRRTKI\nuFxi558/Xxy3GRlix1fx/vX1sv3UqbJORQYlkJM3Mc5CMzBUPfcpU0QjGSypqVKFMyNDhL7HMyaE\nfmcnvPuujIff/nYMdmgPjdXlF0YP0fq80tIksg2sZA7nZ+zvXS4ZKJzrm5st7X/uXCvSp6VFBH/U\nFf2GF23TH43EOkFIaTWjxBHVH/v2yX9tyHZ8hRpktS1/dDEYx/tg/1vqGD6f+AKUmTQB/1ta0x9t\n+HxWUahYRZFkZ4v2kkA35mDw++FnP5PIO4iB0A8E4OBBccyBjtoZbXi9cl9nZ/ded+qUzAQmThTz\nZk+PLFea+kCFvtMZ7HydQDWvtNAfbRw8aL2OleaZlGRFLfj9o7bOyP/+L3zF1p15yGGaZ8+GVmlL\nkOm5JgpULZ3c3N73c0+PBC+AZO85yc8fuN8mP1+O4/eHmkhzchJOodJCf7Shbp73vS+2jiG3W5zD\nLS2j1oyxf3/o+0WLhrhDpZ0tWCBZmbr0wuihL5OccvDaWbbMmhGEmxn0R2qqtCft6gqtyKkqdKrE\nvgTI5NZCfzShtBe3W5p4xBJ7QbZRKvTtk6Dp02MQWakGWC3wRx992ebDad0TJw59hpuRET4fQM2i\nW1sTokTz6JzHj1dUadjhEMoJ6HAaKCr7FmIUhOT1iganBf7ooy8nrlpnv0mG06SZYP8tremPJoaz\nDMAIl17u7ISrroLDh0Wm/va3MC/jsJXlqEhJsabegQAtL77Fv/xDM62t8PXbUzGuWwHHj+Nr7mH/\n/rnnPjYt7TRsMmV2VFZmNZFRHDhghee5XFIzHSTJJitLkmxaW8dECOuI0NgIO3fKIHn++eFt4keP\nisnM7mGvqxO7+rJlkgD36qtS0bKyEi65RO71ffska1y1Npw0KXS/27bJfpOTpcZOYWHkcM1jxySz\nOilJflsVVz+cqHPYtk1mAnH2DWmhP5oYzlruKu18hKIM3nkH/vpXCZHu7IQ//xnmrTwioTeqwYXf\nL4K3tlbsNS0t7NlYw/5dyfhxseXZVoy1NWCanDCBgMEHPuCivh7u/vtjYtd1ueRPPmdOqD31yBE5\ncFqapMmfOCGCReFyyR+0omJErseo5/Rp6/5saAifBbtrlzwbhmUGef11eT56VIR7TY0I76NHZZ9u\nt/xWfr88jh0LFfptbbKdcqKeOiVCNVIfY6VUVFTIwFJXJwPJcFJQIKUgOjqs84sj2rwzmhjurk1u\ntwjC9vbh2b8NZYq5Pdhsc/c7wZT54mIpB/H+98Pq1bJSCROvlyNHYC9z2cIFUgqnthYQuZBDCx//\nuGTBTyv0yiBWXm41yVCoPrelpVbpCecMp6xM2umpqqSavrFfv3Czxe7uvtfX1clvVFoqHvjWVrnf\n29rks2VlMttzflb9J2bMsFofqt/X+T9RSXYFBRLalZYmzRaGu7R4crI4ee2tGeOIFvqjCa9XNCSl\nCceaEbQ9KqF/+eXyHzy0PUzKfHa2/GHUH9vj4fBh8OKmmVyOHE06F1d95Ajk4RHLQXu7DF724lj2\nGYxzxuR2izCwM0qd2XHDfn3DzRbDDQp+v7Xs7FkR+pmZMutUhcvsv1VenkTHqEQM+74KCqxBIdKM\nOJ5JdsnJoa0Z44gW+qOB7m75I7W3D+8NO4J2/e3b5X8wfz4sX9CO53AddXVyDs3NYusPkCR/FI+H\nznY/J971cvgwJOe7WXdZMscacvDsPsHp0wEOHIACl5f587GKZ+Xlhf9OfZXOVehELIvubtG8W1vl\nHgwErPetrSKEu7okAiYlRa6vWqcEXHBGBli/RV1d6HFaW60GJllZEkJ8+rQI6pwc+Z1UAxNVudLu\n58rLk3M9dcpaBjLTCwTi39/Y7Zbzsw9acUDb9BMdvx82brRu8uEURiOUPej3i89vzhzIaD7Lxwu2\n8DjwmRvhF/vdLFwqit+//iv829Im2LqVz/xkBWc3eukhiwVrUlm8GHY/3civ7qrl1eZSOshgYaWX\njOaz1jRCaYcQXtNU68IJAa3pC877D6wQRIWyz+flyQyroUE+A+KLyckJjaf1eOTxxhuhx2prE3t3\ncbEIfZ8PXnxRYtxTUsQZv327DOqTJsHFF8t5pKXJObjdIvDt9XN8PtiwQez2ysQUT6GvCrDFMSJM\nC/1Ep6XFsk8WFkr1vuEiK0v+XMOs6R85Il+rqgqoq+PyK+DnT06kIZDPw39xc/asbLdpE/zbpFp6\n/FD/2j5K8zqYs2Yil3xDKkKbdOFtBk9WOR+8uIOLV3ks7bGwULogKYes07yTkmLlOpSXi2BJSpLP\n+Hxjrhn2oFH3X26uFdXk9co0bdIkEWLKB+R2i8BWQvfkSYmOUeunTJH9NTZy7kfOzpaIm6NHRcMv\nKJD9lJTIsZQvxu+XQSEQsMxCKsJK1c2ePFmO1dMj+83Othy3+/db28VL6CslQ5V/iBNa6Cc6SlhN\nnQrTpg3/8dxu+VP29AxbCWGliFdVAV4vE0vhn36/mI9ck8rDD4duF/h8Nk2NUNpxjKqVRfzjD91g\nyH/4UQJ0kM7UpcV87TvNMu1XZoTzzrNir91u0Q5VaVxnn9uUFLEzaXqjFIBp00RQ/e1v8j4vT37A\n1lYr7DEvTwZSFZ2i7OuqrHBVlTQZb2iQAQGkVLXKoqupkd8qN1cG3fJy+U2VkD57VvajWhKqfaj1\n6emSPW3Haf/Pzo5fieMRDouOhLbpJzrDGaYZjrw80abCparHCNW3dvFizoXWVS2X0Lrdu2XdpEng\n83RytjWL2lqYwFkqKzmnLc0ob6OLVHxkUVXptbSo5ubeoXp2s5Xuczsw7PefPds0nD/EmSXudlth\nt87tm5utEgXO46jlSqNXppDmZhHamZmh92hfJk+7gFXO/XihGrPHWehrTT/RqKmRm7i2Vm56NQ0e\nqZtVHefIETmPjAzRuGLEwYPwve/J66o5HbCjHUpLqayU/7kKorn+evj5D708+lgyOYfSceNlZmnz\nObtockMDXaTSSAFzC2vAH6HKof39sWOW9q8dtaHU14swKimR1y6XXCNVkMx5TZWA76tOjdst5jal\n6R8+LMdQlSyV4zU1NbxzXQl1u/27qEhmoA0NMlNT5iA77e1WYp3TPxXvwd7tlv90V1fcutRpoZ9I\nVFfDW2/1Xp6bO3JTUtWU5eRJa/qsIidiwBe/KM9Tp0JppqXduVySyLlhgyTHXnop/OGHXp79K0wn\nhxJXBzM4JH/oYIXEFSvgla2TmDWzI7Tampr+O9/bk6+G0nxmrOH3S5JUpFDCnBzr/lN2fGXCUde2\nrKz35wIBEfQgn+/uFrPhiROyrqREbO6zZvXuS1tQIMdJTw+tVzNpkvgYPB4RnosX965n89JLotW/\n972ybVKS9d2c98ZIo4R+HLvUaaGfSDi1kqVLRasZSa3U7ZaElc5O0dKOHJHzioHQDwQkEx2CwR2O\nKJrf/lZkz9Kl4pN75Odeuo9Aw6xPMrd2E1nTXTIwAkyfztf+WMpHGtxMrd0omtOKFaKhOv9MWVnw\nnvdYDsW0tPhrfIlES0vfseMXXGC9XrxYukyp65efL60Ew2n8gYBE3EybJtmvyoeyYIFo8ceOSVim\nPWlKbTN1quzz/PPPleCgpcUqsTBpktgCFy3qXTens1OelUN55kzLiTvkZslDxB5NlshC3zCMJcAD\nwHzgAHCzaZpbwmz3VeA2IBd4AviCaZqtwXVPAWuBHrW9aZrxLzmXSNizRjMze9cYGSmUFpeWJkI/\nRjbIEyfER/zRjwa7zb0dOqUvLYWrr7a2Xz7LA+XJsG4CHFkhPRDVH3nyZLLdbuZPBQ7MkBR+FX0T\nDq3ZR6avEN2KilBbvcvVdytBO6qe/aJF4atLnjkj24Tzs7hcvX9LtY8JE+S3PnXKan6i6OqyXquZ\nal5e/IW9IgGcuf06cg3DyACeBH4F5AP3Ak8YhpHj2O5yROCvASYDhcC/2zZZArzHNM0c9YjNVxhD\n2G+ERAgZjPENqqJ2zjU3UaF/4WKWlfaXm2s1eQH5UyclhQqRBPgjjWrUdQs3MA5llql+30h2/7w8\nUXRU9M9AZl+Rckrs94AaABJpVqe61MWxk1Y00TtrAL9pmveZptllmuaDQC2w3rHd9cAvTdPcb5qm\nB/g2cL1hGMmGYZQAJcDuWJ78qGTvXitj1I4ScooEaLZASorcpEO5QXfvxvP8G/z9Jxt58kkopYZV\nRaZ8X49HrkV1tYwIdq3NmTJvFz7Obkhqm337RLvbvj1y/aCmJkkK2rKld9eVRKa1Va6RXZONFeFK\nDSsGW/993z6rN0Oke1n9bipxayDCWSVk2YW8zwebN4duZ/cTJAIul9y/Hg+YZnxOIYpt5gB7HMvM\n4PK+tjOBHGASouU3A08ZhnHWMIzXDMNYObhTHsW0tMgNfuBA73VqipufLze0M944XqjU98Gkjre2\nwpEj/O7HZ9j8+6P8/Oewgq0sSNsvwl5N77dtg+PHQ8sqO7NmU1MtoeRMbMnMtATGO++ICWh3BP3i\n6FE57tmz8qdzmgcSFXWN7JmtscLjkcF9+nQRpMuXy/2XkzO4ipBdXdY93lcSkkqe6+gQwTyYvrSq\nzhJY5hy1bxCbYSIoUHaU03v//rjcf9HY9LMBn2OZD3AOn87t1OssIAN4HfgGcBC4EXjWMIw5pmnW\nDPSkRy22apG9WqepdVOmDG/W7UDJyxNNXJWqHQjBGUJ9PbjxkoSf3JygefXkSREO9qm/PdU/XJ2U\nVasi9/C96CJJHFIafqRBStVdnzxZBgCvN/4RHdHQ2irP9msUC9rb5XcoLhaBf+ml1rrKysHtU93L\n06eLEzUSxcWwfr31XxiocM7LsxSH4uLQVqKZmYnb73nWLLnucbr/orkiPsD5b88CWvrZTg0KLaZp\nPm6a5mWmab5rmmaHaZr3AScQ09H4Qd2U4YouxbsYVCSGYi8PfqapCXJpJpdmK4ijulpufPvU227e\nipSU1tef2G6TtpfyVdj9BAnWzShqfE79a4gMx33Xl4/ASVKS/KaD0cad96aqw6OUk0QU+Ap1beJw\n/0VzVfYChmOZQW+Tj3M7A2gCThuG8VHDMD7u2D4DGP7C7YmA3x9a5Q8sbV81h4hh5m1npzXjHTJD\nEPpd9V46O2HLsTKSCFDBCUpL/NZ3bm0NnT04i6KFS5lXWZrq2tlDDe3259bW0NK9EOonsH+vQCB+\n5W7t5+g8D3Wd2tutQSyWQsJ+Tw7FYWs/b+WrgeFXYOzO3K6u0MzfREedZ1PTiB86GvPORiDdMIxb\ngPsRh20p8Jxju98A9xuG8SdEi/8u8DvTNP3BSJ8fGoaxGwn5/CoyK3g+Nl8jgTlxQhxwmZmh2uf+\n/ZKIZf+Tq/rxQ+C22+Cee+T1zTfDffcNaXf88/cz8T6Syp3/4WHF38FNN8E//mOEjRsapIPJypV8\n+ituTv3aA2RwkmIq2csneISF7QFQ+Wfp6aHfV5Xo7emRUcvpWNy8WWxFpaWSQ9DTI/u4+GI5rr10\nRHU1PP00rFtnDRx2P4GKCjp7Fp5/Xn6b884b/oYadnbtkin+xIkSz/7KK3I/XHihhMqq8FQ7yr8y\nEFPbgQMS3vie91jXu7ZWrpm6/wYrLDs6pBJmd7fY0VW2rcvVd7ZuLFD/F3si4WgR+ur+O35c7uE5\nThfp8NGvpm+aZgewDrgWaABuAa40TbPVMIxnDcO4Pbjdk8BdwNPAcUTLvy247iHgJ8Bfg8uvBNap\nGP4xTU2N/LF8PhFk9kp7gYA4yiZMkIfhnFANnMceE9NsTg785S9D3h133AHbDrrZ+GQr+/f18LWv\n9bHx9u3Q2Ulg7z6efqyTwsx25p7nZtH7y7j571OYsyKXC66bITGbU6dK/PaSJWJfnzxZ9uHxhJ/1\ndHVZoX21tZbA7+iQTNuGBtGIp04VW/Tx4yLQGxqsfdj3q5patLbK7+L3h4+qGk6UgFRObZWxeuaM\nrEtJse6NiROtazRQbX/fPhkQ7TPN2lrr/ps+feD+GkVDg3X91PeZMEGE2HCbV5KS5DjqGpWWjp5O\nZy6X9X+vGVm3ZlTJWaZp7gRWhVm+zvH+XiSOP9w+7gTuHMQ5jm6cf9CSEtHUOjvlh1+5MmZ/juZm\nKZVyySWiBD35pPy3S0sHvz8AD3m88ko9brw0URDZPxbUGqurIeD1smw1fOPneTAnHV6dDOvconk7\nP1xRYdVB93qtiAa70A8XNjpzpiRsKS0vOVn+SMrp6fOJSUdFcvRV3wVG1r5qb0vp94eWiKitlUGu\nvFwahtuXq2sU7Y9qNx81N4dWwExKGvr95/xdMjNDM3iHm+nTg5l+o5BZs2SAb2wcUadzAns6xgBd\nwb6vzoqP9lKwMfyhVd/pqiorAUolRA1lf17c7DMlAgesGly9CMaQH97Vihuv/BfdbhkMvF7RrCN9\nX7tj1RmuqZZD6LVUoW+NjaHbpaSIQPP5evsJVM8ACB1U7EW/RgJ1LPV9Ghvl2iQnW98nUubrQM4z\nnHM8mt8jWpwD5WgxryQK6v8xjFVtnejaO0MkUFfPO292sWRJMAAhJ0depKay68mjzEuG5GmTrBj0\nvDzLrhrD+OGODvjhD+V1VZXl03zkEXleuza8u6CtTdwLVVXyfvt2MSeDFD8DEfogPWgBHvplD5ed\nX8eqNelWFEIw9K+9HR77TSul1ErEn+pe1NUl5ovGRglRa2mRR1KShNulpYkAUU05UlNlUDxzJtT0\nUl4uNVvAKqFsT1hSMefp6XLchgaZPnd3i3Ztjzu3l2MuLJRj7Npl2fXz88VW1tQkrzs65PxV3fhI\nBAKWKcqZ/h8IyKipZiMVFdYFz8mR72OvTw+WCUWda1ubXK8pU/qOkKmrCzVZqQG1qUlmU7EQ0F6v\nXGu/P66VI0ct9oFc/d5tbfK+oGBYMvO10B8KjY08dftm/ufncNPn4MorOVfRb/NmuPOH8KEPwY3/\nVWgJ/awscVAOxe4ShvvvF3MOiEVA+dB++Ut5/Pa38MlP9v7cjTfC//2fFDqbPVvqWzkjf5rJJUDS\nOU3/kbsOs5N9/PvdMOeWS0MyI//4R9i/P0AxdVTOCmb0VleLfV11QlqzBl591RLWc+bI8pMnxf4M\nYps/eTJ0qpKSItr9sWOW8M7Pt8pPg2UeysoSYamcywq7kHS7rVGvrEyOd+CA+BoyMuR3KiuTRK/5\n8+X86+pEOK/pI9r4zBl48015fcEFoc7hN94QB/OUKWKnnzLFEvr5+fId7WUJmpvhtdesz6t2e7t2\nyYDxhS+EP4e6OvlR7TQ0iLNYmXyGWo9IOZVLSkQ4nTgxuGSu8Uy46Lg335T3EydKEcEYo4X+UGhs\n5KWX4DhTeGhrLld+uvqc41BVk3zo2YncWFYmQqKnRwaF6dNFKMVQ6G8Jlr+7+24rmffxx0Ve3H23\nyJpwQv///k+eN2+2fM3r18P73y/LJ02CGTNcHHowl/Pme3k5O8CxRxvZ8oQktM5papKbM2hyeOZU\nFbvw89Of+Clcly/f1+sVrV4JP5WYlZ8vWmdjoxy8slL2U10t10qZOWbNEs02P1+EyvnnWwktCxeK\nkM3Pl4tgF/olJeL8VMdVLf4U6eniA3C5ZJCZNEmOPWmSnI+qAKnOWWnnLS1Wjfhw2M1NTU2hQl9l\nqmZkyB/a7ZaBoaVFBhgV9ZKZKdsoTb2iQjTBykoZFN54Q2YwkWzB6hymTrWUjFOnZPuSEvnNhlrQ\nz+4jmT1b7udwJZY1kVFlKpTQ7+mxXs+dOyyH1EJ/KHi9+P1wiBmkpeZAedI5oa/kw7tJC0I7BIH8\nyDH+c+zYIfePPbrmyivFrPPv/x7etm+PFj182LI2fepT8HFHVsWSG91w0svfXeLjWKeXLU/AkaNY\nGonXSyAAr+ybQPr0TC7/iu3DXq8IUWUSUaGIFRViymlqktEmM1Mezc1iJjhzRq7d7Nmhgs1efTE7\n28ocVa0eOztlUMjMlOe+HH3JySL8Z82S7TZvtj578qRlE/f5QkNuvd7IWq1da3Pa3+1mHVWiQEWf\nKOxtMdW+KitDNfPTp2HPHtHow1UWVZ+bOVMGwPZ2Efrq2LGo4OqMhtICf+CognT2BDPgXFehYUA7\ncodAoMnDqWoXrWRz8CAEci0bqbLmNLZnDqpszUBoaxOtO1xp8exskWfbt/fOP1IyAGS9amOo7Psh\nqGlofT0VRW20p7k5cpiQm7Xem8qphkyriqaioUE0GCUk7Y203W6xldtL62ZlWYJqIM5G5RTr6LBm\nUf1F5Hg8Mtrl5obaV+1VPe3Pqp9rX/v1ekVLT03tvV1dnRVqGg0qykYdV6G+n73ejPNzqalWxrPd\nfh+r/gwj3cpzrGKvbzUC11Rr+g727YMf/zh8Fj+IgvSv/wou/Jw90sLpVjeQRFMTfOYf3FyTL7lC\nKgIvEJDWf0pJu/pquPzy0H0+/bQopx/6kLVs2zZ44AGxzyuzbU0N/OAHcj+cd55lw/d4ZNYeVlgj\nkTyPPAI33ACrV0uC1Xe/G9qka+dOq71s2HIpqt/p739P8vTpFMyeycG97fznv9SR3P0EZS0mmVNK\nWMQO3l8KbGqQi9jVJRcjM1PMDPYSFEroKxPGpElWhI2aFQxEQNm3LSmRQcPj6bsngHL8JidbZZ4b\nG+Ucla29rMzqDzt5smjYkSJoOjvFUWsYMmDZ2wVu3SrCvqBArss778iANmGCzCg6OuTid3bKOSQn\niyknJ6e3F15p6ps3y3lOnWo5UYuKZEZhr+liFyKRkqYCASno5qyhHwiIWaqtTb7H7NnWgOacxWoG\njtst/5GdOy2FaBgbJ2mh7+Cee8Tx2Rfr18P5c1s4csiPFzeTJ4vF4te/S+UkWZRWdBGgi5bSmVAL\nf/qT9dmNG0OFfiBgve/psZTa734XnngCfv5zuPZauS8efBB++tPI5xXJt7hmjQj93/xGHLozZsB3\nvmOtVx3wdu6UwSRsUnB+vmWbr61lwUUX8fhuHy8/d4yVbKaVs7zwxkqmcJyqAmDjm6GfnzxZbmTV\nLi8vTwRHcbFVObK01MpQ7OqSQWYgzS/s5paiIjEb1dVFrripsB+jqEjOr6lJBK7fL1p2RoZcmIoK\nq2xwOPbvF+07JUVs9XV1MpqmpFhOaqeWfvy49fmUlN6VNMNdg8mT5frV1sqjocEyFx08KD4Qu+BQ\nTbmzsiJHjZ04Ied46pRoLgpnGeCcHDl+c3PfpZM10aF+X6X8pKX1ntnFEC30HWzfLtd8587ewu+x\nx+Ab35Btzp/kFQUSN/fdJ/7E730PfvmLNfxkLzxDgF/c4eI3H7AU2xtvlOAJe3SWPRnv8GFLy1am\nFpBAjQsvDF0GklX/4IPyuq9GW1/4ggxU3/se/OIX8L//K8vvuku0/1//Gr75TVnWyzSjSE2Vlfv3\nQ3k53/77Ev7u1gkknSgi7zev85dfp7Ohey3VlPOza7phn62sUlKSCBy3W4TFrFmWJjlhglR2dLnk\nwith09MjGvJAMkVzcuADHxBBnZEhUT11dbKusjJy1Uh70beqKjk/kMqdmzaJMF26VARyaqocR5XC\ndgo8FUmUnBwamZGSIn6BiROlCqSaOe3e3Tus0k5xcfgy21lZ8PWvy/HefFN+/Koq2d+778oN5jQR\nvO99ka8dWMXcnDHj6pyUdqMGf3u/A83gKSiQyAllXohx/o4TbdO30d0t/5kFC2R2PnNm6GPtWtlu\nx+jC7mEAABhCSURBVA7AK0LfQx6LF0vk3erVEMDFr//XhZ9kqhYnMWmS9fnzz5fP79xpHdPuYFWv\nGxtDlT+13OmMPf98a999+eWSkuT83vMeef/ww/L8vveJDLKbhSKZiAAxP2RkQEqKBCHNSKJycg+F\nRS5ySrLwkUV3WjaTy3tkO/VQscaq3ojqHqTIyBCBD7Lc5RLhOpjSAKq5BoRqukVFctxwD7vgVueX\nnS0zh8JCy+6lzCdqxtIapoqIEuD2+vCqtERrq8yY3G7ZX3Z277K6zlIQZWWRNemsLPlhs7OtMtUF\nBXIcny98hdK+hEm476POH+RYKtJE2/NjS3q6dd9FigqLEVro29i/X+RaJME3b54ocNu3c07opxXm\nUl4u6+2fS06W7e2o9Xbhbdfe1Wu1ft06631rq5hVleB2Hi8aIp1f1ELf3glJTV+CAiG7MIMkAiQn\nQ5I3gr17iMXkBoxdIA1WOCknm72kcV+ZsTU1IljVwKOEpMcj+3AmdtnPK1yxt/7OWzl57Zq3zye/\nz0BNBPbvY/++ypmclyezHI8nNtU5NXFh3Jt3/H742MckSu5HP5JlqycfgzdrJY5aaVmNjWTs3s2i\n2St4440Mrl3v4UxjFvMvST23ydy5Mkh3d8Oi2e1kbN0q0waPBw4c4PzUImAp27fDSy9JaKRdsbvn\nHjG/KHn6iU/ACy/AQw/B039q58LAVlbNXcArr4h2uHDhwL7r3LlWAuucOZZcmjhR5M3ZsxIBFJbu\nbhl5lD36pZfE+XSuZEAui9hFce4e2BvBCz7S2B2Mg22Zp5xs774rdv5AQC7iu++K3W3iRDHBFBeL\nQ7Wuzpo9qNr9KgNWFThz7l/hTDRzru/rHJua4O235VxVWelNm8QkF8kvor6Xaj9p1/Q3bJA/RXW1\nOJaVs1sljKnwtGG0PWuGh3Ev9A8dgj//OXTZB+cdh9omy1EFEurS3s4/feQgP/jDTPJaO+l2F/KR\nm6zPpafDLbfAM8/ANz58UP6Ib70l0rW9ncq0U2SlLmTHjlQyMyWxdNo0GVsyMiTfBuSQ8+bBZZfB\nl78s0T2zOg5SmdPEp+a/xcLfXMozzwy8o2JamuzvmWfgS1+yliclia/i1Kk+/sP2+OG2NhkEamrk\n9eTJrP54Di+2p3PTTUBGighcNVX1eOJTFMvlEvv8YJt0QG/nS3KyaMHJySIMjxyRa+H3W47aqipZ\n5vHID3z4sFUkzakZKxNNfr6YZmpr5ZoVF/edABbuHKur5bzKy+X6t7XJskhC/+RJiRax9xOePdvq\nHXz0qFy7rCwrd2DyZPkufr+YzHTZhVFHUiBezSP6wTCMacCRDRs2UFFRMWzHefRR0fQV294KsLTm\nGbmplyyRaA0QSdnTIzd9eblI6NmzI5dD3r5dnF7KhhrM7Lz4ny/kjQOFLFkiu2hpidJ0rfaXnCxe\n2ZHm6FHRbBcvlmvQ1SVhTs3NMm2YO7fv1nijlfZ2acMIlp1fJZe1t4uDpqhIQkT37pX+suvXw1//\nKtMnVXHy2DHZVl2/WNLQYJVqUMf0++WeLSiQKIBwvPBCaAc3dT+H259mtBFRyxn3Nn2nc3TelBYr\n9V7ZLe2dgVpbw1eBdKIyOVX3o6DwP8/w0N5u1bqJ2leppt7x6vLk/M4qnl7FfI9V266ygUFohVSw\noiza2iwbeHm5lRQVLjN3OByf4XwXKn5edQZzopKB7I5de8npcPvWjAnGtXnH55O49RS66CGZAC4y\nOh2leJubQ1vv2euqpKVZjbbt9mNVjdFORQUcP86KspMkMZUArsjhkXZUGzglQPx+iR+PJn69vT00\n8zM93RJiqqa/+h59jT7d3WL/VdEfLS3yvbOyrOsyHoSDPSIH5DfPypJron4fVYrA7RaTUF2dDAJN\nTeEza2OB3QTkFNjq/lU9ChRqEArelyGfjbQ/zZhgXAv9a66Bo0f8XMZfqaOYwyUrQ7WzM2fgP/5D\nbnwVu33woGxTVSXquhJ6K1aIU8/vl2mzk0mT4ORJ5pY1sZBd7KQqOqH/2mu946Zff93qlBKJzk5x\nxtmbaLhcEhPv98s6uwbY1/62bJHBJzlZnIMgA0V6uiVMhqEEbMJQWCgmj5yc0BlNQYFVn//0aRHq\nKkEqL0+Evr3Spds9fPHXLpeVSGY/3qlTcg9FappcXGwJfbuzOzlZflvtqB1zjFuhHwhIolQuzXzn\n36Curo6ZtwJnghpQcbE44Hp6RODNmiWOuNpa0dxKSqwyuy0tVkZkc7MlaFevthxzRUWwdCnzut/i\nWzfVs7sEPvvZfk6yq0v2l50t+y4rk/oMbW0yk+hL6KsKkUVFIoA8Hpkh2CNJiotFWJw5I+cfbn89\nPVbz5vJyy56tOn+tWTNiHX/ixvLlItQnTxbBvny5ZdpS+QWqnIPSkqdOletv17CHsyDZxRfLb2gf\nlNTrzk4R4lOnhn4mNVXOadUqOVe7s/uii3rvTzMmGLdC//hxkWVfvMzD0qXBhRWdcNBr1Yl5911Z\nXloqQr+nxwpZVNlzM2eKk1XNENS0edEi0QTtyTdlZbgmFHHNFfWwvqf/uHW7yUCVWV26VDQ3j8dy\nMvf12enTZcCoqRGhb29HOHOmnMOZM5HLCqjM02nTetuGc3PDV3gca6Snh2bz2oX3/PmRPzOCza7P\nJfbYcRZZi3Suzgb0kfanGROMcRUtMsqBu2SGTdidPSs2cJXCrpxzmZmhWYhghfAVFYnWp4R9f5mK\n4ZomRCKc8y/azzvPw/45+7r+9md34KokHYXWAhMbu8lN/1aaIONW09+ztZWZnGZhaTA76uxZCYgv\nLrbqoqhEGa9XEl96esRGqgYBVatcOe3a2wcm9J0p+E6c++rpETNDVpZMU/bvl5C6cPvxeMTUoBy0\nyvasioipAlwgGp1qrVdSIrHnhw/LOmW+UCYlt9s6r7Fsxx9rDKakhWZMMm6Ffv1r+5jDaWaVYyXZ\ngAjE1ast0053t9iC7MVwVLKPsgspoa+0aJW9GA57yGN/OEvXHjsm55WWJudlmjIIOctrqrowBQWh\nmnlBgTWQKYejWt7aKqV/L7hAsm1V9/OZM2U2s2+fnEtBgQw0hw6ND9POaGfqVLlvBlKtVDOmGbdC\nv3qfh8zcVAovWw6eJquD0+zZIuRaWiQip7JSGr8qe3ZRkQwMjY2WpqsEeW2tOF/D1VBRqKJjkeqx\nK/z+3qVrlUNVtbpTdd17HP4BZYd3zjaWLbOOa+/CtHCh7LulxUrjLyiw2uBNnSoOZLdb7MKqvaC2\n+SY+CxZYM1KNhnFq0/c2dOOpbqV0dh5JE4pF4Lvd4qQNBCwtfMYMq0wviHnELuyUMFbCVUW29BXb\n3F/SjELlBtj3pc7L5xPNTTngnLOGSMljqn59cXFoLLb6Xuo7+HxyLYqLZTBRTuvKSnmvKlFqEh9V\nRkGjCTIuhf7eN0QoVsxzODHLykTg1dfLe2W/zsgQQZecLCYNZR9VQjszUwSniorpL6HF7ZZt7ZUM\nnTjt+X6/leXrdPBGEvoDSaxRA0RPj9jvS0qsrFJdRlejGTOMSfPO6dNw2+c93HndbqbMShfbezCW\n/NAhuGn9CT7Cm1zSZcLL9WIOyckRk0ZDg4RgHjpkCXLVNENlVCotVwn5pCQRiMGm6P1GSuTliS3+\nzTethJ+aGtHuly0Tjfztt0P3pUw2ICak9nY5l0OHxMyzcqXUXq6sFP9DcbFVxa2zU/wAS5dGLpDl\ndlsNMlJT5Vr09Ihz9+TJ4csm1Wg0I8qYFPp798L2p0/ySk4D112H2N+DppA//hFmsx+D/RgF02SE\ncLsl8UgJ80OHRAB3d4t2f/75Es3icllFxaqrQ0vRlpeLBp6fH1qvJRwlJZLZ29oq2ntNjQj1w4fl\nmMrUkpra2/GbmSmauMcjx2luFqGuioK1t4uzduJEGYTstZvPnuVc8X8nqoZMe7skIZWVidCvr5fn\niRNHvh6+RqOJOVEJfcMwlgAPAPOBA8DNpmluCbPdV4HbgFzgCeALpmm2BtddC/wAKAU2AZ81TbM2\nFl/CyYIF4MZ7LiAHj+ec0N+xA0qp5fLLXeS8f6Vov1OmyHbKzu7zSer9FVdYO7V3F1m0SAS2vel1\nX+34nOTmSlu/U6csjV7VyenpkfN1uaSFmsp2tddKOXBAzrW7W85LhZOCaOiqkYcaKGbNsj4TSeh7\nveK0rawMrdmsro1GoxkT9GvTNwwjA3gS+BWQD9wLPGEYRo5ju8sRgb8GmAwUAv8eXLcIuB+4FigG\naoL7GxZKS2FavudcqLnd5r39bT+TU2vILs0RzdVuD8/JsRpy9xWBAwNLsupvH2DNGnw+iaTJyQkt\nb6COo7Jwla09KcnyMaSmhpqc7AMF9B0xpO32Gs24IBpH7hrAb5rmfaZpdpmm+SBQCziLul8P/NI0\nzf2maXqAbwPXG4aRDFwHPG6a5humabYB/wR80DCM0th9FRvt7cyZ2sbeuiKaW8B/tp66swFOnfDj\n23+CyuIWkvKDZhOPR8wjnZ1WMwzov07KQIV+IGAdB+RYaWkyyHR1WREWPp8Ibmd0jIr/z8kR4W5v\nWaeieOyRGj09lhM6J0eeI51rV5cVDqozNzWaMU005p05wB7HMjO43LndY45tcoBJwXXnyg2apllv\nGEYDYCADSGwxTa7seIRmlnLmZfjt6zN4ccdG3svLfJcjzMzqgMKguaahAZ57zvrs6dPy3J/Q7yvJ\nqqNDqljOnm35ALZtEz8AiOmosVEGgnfekWWzZ8tzTY04YuvqpN5OdrbY2u3x/2632No7O0XLnzBB\nEnBUg/GUFJk5pKVZCVRut9j3lVNXoZqjgHbWajTjgGiEfjbgjC30Ac7gX+d26nXWAPYRG4qKSJ05\nlaZ9+exrzODwzhaM3DaWT2gkzx9g6gWlkng1b54liBWq36gSwpHIybGaXjupqxNNe+9eEfqBgAhc\nZX5RUT5FRfDe94qALi+XWYbHI88FBSLYs7N7h2gqod/VJQPI6tWyfMkScdbm5YnGn5Rk+Rny8qzC\navbsTOXonThRzmesV8zUaMY50Qh9H+As3JEFtPSznRLoLQPYR2woK8P9wQupfiqJv20NcCbg4trF\np7l8bgdkFkk0zkUXicY7adLgjuFyiVasnL/2cgf2qB6wTDYTJ8pr1WBlyZLQmihtbaGDiHrttLfb\nTTBut2j4V10l7yM5Xu3mKLvQ93ols3jFiv6/s0ajGfVEo9btRcwwdgx6m3yc2xlAE3Dauc4wjGLE\n0bt3gOcbHcnJTJqdTXqKn0OHIYkAlaWtYnbJyBBBazdxDJZISVZ2wW0vwpaXFyqwnUWw7E5UuyPW\nmWHrLJkb7blCqDNXtczTzluNZtwQjaa/EUg3DOMWJALneiTs8jnHdr8B7jcM40/ACeC7wO9M0/Qb\nhvF74CXDMB4E3gLuBJ41TbM+Rt+jF8kFbqZObaXuUCNe3ExLrxZ7dk5O7OLNlbA8elS059JSiZu3\nm4yOHLE0f3sdnXDYB4HsbBH2J07IzCA11Yr/V/V7wtXXiYQqAqcGkEBAqnSCdt5qNOOIfjV90zQ7\ngHVIuGUDcAtwpWmarYZhPGsYxu3B7Z4E7gKeBo4jWv5twXXbgZuAB4EzQDnwmZh/Gzv5+dLsiloy\naKOk/ZgIzpycwZt0whwDkKSqN98UW/0WR/rCwYMyCCQlhWr64SpUqhLJkybJvru7JTu4vT20QJrL\nJftJS4u+rorKGm4JNn6vq7PKJ9v3rdFoxjRJgb6KfsURwzCmAUc2bNhARV8doiLR04PnuS2882w1\nE2YXMN+1T+zdy5aJRh4rbV81vz5yRAq0HTokAnbtWjGldHXJdllZVmhlQ4MI4JQwE636ekvg27Np\ni4tDZwLKTzCQaJudOyXK56KL5LxNU7Jvq6r6noFoNJrRRsQ/9JgswwBAcjJ558/h4p4GSO+BjilS\nQjhSRupgmThRZg9HjkiNGpAM2MzMyI0rCgsj708NDMnJIpAjMZjKiXa7vjLzzJmjBb5GM44Y2/F5\nSsh1dIS+jzXKXq6Ok6g2cmfLRLufQKPRjAvGttBPTQ3ViIdL6DuTmhI1GkadV0ODOJcTdXDSaDTD\nxtgW+mBlsRYWhrehxwrlmM3OTtymFSkpcn6q5EKiDk4ajWbYGLs2fcWiRWK3jlRHPlYYhmS/DufA\nEgvc7tAQUo1GM64Y+5o+SGjjSDgr09ISv4zBYBK7NBrNmCHBJZQm5ihBr3r1ajSacYUW+uMNpek7\n6/VrNJpxQYIboDUxJzMTpk/XWbgazThFC/3xyPz58T4DjUYTJ/T8XqPRaMYRWuhrNBrNOEILfY1G\noxlHaKGv0Wg04wgt9DUajWYcoYW+RqPRjCO00NdoNJpxRCLH6ScD1NTUxPs8NBqNZlSxdu3aacBJ\n0zS7nesSWeiXAVx33XXxPg+NRqMZbRwBKoGjzhWJLPS3Au8BqoGeOJ+LRqPRjDZOhluYsI3RNRqN\nRhN7tCNXo9FoxhFa6Gs0Gs04Qgt9jUajGUdooa/RaDTjCC30NRqNZhyRyCGbg8IwjCXAA8B84ABw\ns2maW+J7VhaGYXwduAPotC1eB+wGHgQuATzAd0zT/OXIn6FgGMZ5wF9M0ywPvi+IdH6GYaQD/w18\nCOgC7jVN8wdxPt/lwBtAm22zO0zTvMMwjCTkN/gc8h94GPhH0zT/fzvnFmJVFcfhzygdx6Ikch4i\nxaJ+BVEpQhTdMShzRCLsAhNJGEWJD2GYVmZhkmkUhFaGQU4SmpE3zBB7yIKETILMn6ZSPXTFUqfJ\nMKyHtSf3nDnn2Ms+e3HO+mAeZu098M2Ptdf5r8vZhR4NlnQNsBi4GPgVWGj7tVizreMbY7ZTgHnA\necC3wBzb70ecbS3fwrNtqkFfUhuwHpgPvAF0AesknW+7p1S5E4wBZttelG+U9C7QA3QAlwGbJH3V\n6A+srGNNBV4E8t/mW1bHbz4wivBlkBHAh5L22l5Vou8YYJPtiVX+7GHgNsL/8Q+wAXgUWFig53Bg\nHfAI8A5wBbBF0j7gQSLL9iS+o4kr24uAN4GbbX8qaTywUdK5wKvEl20938L7bbMt79wIHLe91PYx\n28uBn4AJJXvlGQPszDdIOh2YDMy1fdT2dmAlcG8JfrOBGYQH4v/6dRGqkUO29wKvAPeV5ZsxIOcc\nXcBLtn+w/SOwgOJ9RwEbba+0fdz2DuAj4GrizLaeb1TZ2t4DdGQD6KmEAf4IYTYdXbYn8S0826aq\n9AnT0F0Vbc7aS0dSOyBghqRu4DfgBeAL4Jjt/bnbDdzeeEuWE6aQ1+faLqSGX1YRjqB/7iZUJY2g\nmi+Eh+eopAOE9zitIkyh/2JgPzEgSYNsF/JtRds7CQ8t8F8lfS3wJRFmW8f3LcJyZDTZZr49kkYT\nlnRPAR4CLiDCbGv52j6cLU8Xmm2zVfrDgN6Ktl6gvQSXanQA24ClwEjgAcKyxET6r+FBSd5ZFVHZ\ngYZR229Y7vfKa4VTwxfgF8JS36XADYRZ4LzsWmU/6SU8C0OKMz2BpDMzt88J1XOU2fZR4bueeLP9\nHhgKjCfsRXQSd7b9fCXdRAOybbZKv5cQYp52wppe6dg+QP+K9GNJK4DrgLaK26PxJuRay6+vEw4F\nDldcKw3bk3K/7pf0HGFGMIuB/aQd+Nv20aK9supuA7APuBO4hIizrfS1fRyIMtvcGyW3SloDjCPi\nbKv4Tm5Ev222Sv9rwvJJHjFwyacUJI2VNKuiuQ34DhgsaWT+diLxJkxBq/rZPgj8TP/cS3WXNFzS\nIkln5JrbgL6Ho7KfKGsr2mss4WTGZsID/icRZ1vNN8ZsJU2QtKWieTDhgyq6bOv4DmpEts1W6W8F\nhkiaTti17yIsqWwu1eoEPcBcSd8A7xGmbncRqv+zgAWSphGOm95DJBvQto9IWkttv27gaUl3AGcT\nTnw8Vops4BDhGN6g7EN2FDAHeD273g3MlLSVcFTvcWBFkUKSOoAPgMW2n+9rjzXbWr5EmC2wAxgn\nqQt4G7iFkN+VhGXUqLKt43sVYWmn0GybqtLPNjtuBe4GDgLTgUm2/yhVLCPbtZ8CPEXYrV8CTM1O\nRkwDTiO8DnUNMNP2Z2W5VqGe3xPAHmA3Yc9ime3VpVgC2RJEJ3A54Xz5NmA18HJ2yxJgLbCdUNl9\nQthbKZL7gXOAJyX15H7mE2e2VX2BZ4ks2+wkSyfhFNfvwDOEmcluIsy2ju8uGpBterVyIpFItBBN\nVeknEolEoj5p0E8kEokWIg36iUQi0UKkQT+RSCRaiDToJxKJRAuRBv1EIpFoIdKgn0gkEi1EGvQT\niUSihfgXC9oZ77C/RAEAAAAASUVORK5CYII=\n", 341 | "text/plain": [ 342 | "" 343 | ] 344 | }, 345 | "metadata": {}, 346 | "output_type": "display_data" 347 | } 348 | ], 349 | "source": [ 350 | "# This is a snippet from believer_skeptic.py, specifically the \n", 351 | "# simulate_multirace function.\n", 352 | "single_process=0 \n", 353 | "si=.1\n", 354 | "tb=1.0\n", 355 | "dt=.001\n", 356 | "nresp = p['vd'].size\n", 357 | "dx = si * np.sqrt(dt)\n", 358 | "nTime = np.ceil((tb-p['tr'])/dt).astype(int)\n", 359 | "xtb = believer_skeptic.temporal_dynamics(p, np.cumsum([dt]*nTime))\n", 360 | "\n", 361 | "# Run the process model\n", 362 | "Pd = .5 * (1 + (p['vd'] * np.sqrt(dt))/si)\n", 363 | "Pi = .5 * (1 + (p['vi'] * np.sqrt(dt))/si)\n", 364 | "direct = xtb * np.where((rs((nresp, nTime)).T < Pd),dx,-dx).T\n", 365 | "indirect = np.where((rs((nresp, nTime)).T < Pi),dx,-dx).T\n", 366 | "execution = np.cumsum(direct-indirect, axis=1)\n", 367 | "\n", 368 | "act_ix, rt, rt_ix = believer_skeptic.analyze_multiresponse(execution, p) \n", 369 | "\n", 370 | "nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1)\n", 371 | "rts = p['tr'] + nsteps_to_rt*dt\n", 372 | "\n", 373 | "# set non responses to 999\n", 374 | "rts[rts==p['tr']]=999\n", 375 | "\n", 376 | "# get accumulator with fastest RT (winner) in each cond\n", 377 | "act_ix = np.argmin(rts)\n", 378 | "winner, rt=act_ix, rts[act_ix]\n", 379 | "rt_ix = np.ceil((rt-p['tr'])/dt).astype(int)\n", 380 | "actions = np.arange(nresp)\n", 381 | "losers = actions[actions!=act_ix]\n", 382 | "print(act_ix)\n", 383 | "plt.plot(execution[act_ix][:rt_ix], color='b')\n", 384 | "for l in losers:\n", 385 | " plt.plot(execution[l][:rt_ix], color='r', alpha=.3)\n", 386 | "sns.despine()" 387 | ] 388 | } 389 | ], 390 | "metadata": { 391 | "kernelspec": { 392 | "display_name": "Python 3", 393 | "language": "python", 394 | "name": "python3" 395 | }, 396 | "language_info": { 397 | "codemirror_mode": { 398 | "name": "ipython", 399 | "version": 3 400 | }, 401 | "file_extension": ".py", 402 | "mimetype": "text/x-python", 403 | "name": "python", 404 | "nbconvert_exporter": "python", 405 | "pygments_lexer": "ipython3", 406 | "version": "3.6.2" 407 | }, 408 | "latex_envs": { 409 | "LaTeX_envs_menu_present": true, 410 | "autocomplete": true, 411 | "bibliofile": "biblio.bib", 412 | "cite_by": "apalike", 413 | "current_citInitial": 1, 414 | "eqLabelWithNumbers": true, 415 | "eqNumInitial": 1, 416 | "hotkeys": { 417 | "equation": "Ctrl-E", 418 | "itemize": "Ctrl-I" 419 | }, 420 | "labels_anchors": false, 421 | "latex_user_defs": false, 422 | "report_style_numbering": false, 423 | "user_envs_cfg": false 424 | } 425 | }, 426 | "nbformat": 4, 427 | "nbformat_minor": 2 428 | } 429 | --------------------------------------------------------------------------------