├── ADMCode
├── snuz
│ ├── ars
│ │ ├── __init__.py
│ │ ├── utils.py
│ │ ├── shared_noise.py
│ │ ├── optimizers.py
│ │ ├── policies.py
│ │ ├── lqr_env.py
│ │ ├── run_policy.py
│ │ ├── logz.py
│ │ ├── filter.py
│ │ └── ars.py
│ ├── ppo
│ │ ├── agents
│ │ │ ├── __init__.py
│ │ │ ├── reinforce.py
│ │ │ └── ppo_gae.py
│ │ ├── __init__.py
│ │ ├── envs.py
│ │ ├── utils.py
│ │ ├── run_ppo.py
│ │ ├── storage.py
│ │ └── models.py
│ └── __init__.py
├── foobar.txt
├── __init__.py
├── sdt.py
├── utils.py
├── ddm.py
├── believer_skeptic.py
├── qlearn.py
├── visualize.py
└── neural.py
├── requirements.txt
├── notebooks
├── images
│ ├── IGT.png
│ ├── car.gif
│ ├── bandit.png
│ ├── attractor.mp4
│ ├── believer-skeptic.png
│ ├── multichannel_selection.png
│ ├── multichannel_selection.tiff
│ ├── believer-skeptic_to_accumulation.png
│ └── believer-skeptic_to_accumulation.tiff
├── Lab TEMPLATE.ipynb
├── Lab 5 - SNUZ.ipynb
└── Homework 4.ipynb
├── data
└── IGTCards.csv
├── LICENSE
├── .gitignore
├── setup.py
└── README.md
/ADMCode/snuz/ars/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/agents/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/agents/reinforce.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | pandas
4 | matplotlib
5 | seaborn
6 | scikit-learn
7 | numba
8 | future
9 |
--------------------------------------------------------------------------------
/notebooks/images/IGT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/IGT.png
--------------------------------------------------------------------------------
/notebooks/images/car.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/car.gif
--------------------------------------------------------------------------------
/notebooks/images/bandit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/bandit.png
--------------------------------------------------------------------------------
/notebooks/images/attractor.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/attractor.mp4
--------------------------------------------------------------------------------
/notebooks/images/believer-skeptic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic.png
--------------------------------------------------------------------------------
/notebooks/images/multichannel_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/multichannel_selection.png
--------------------------------------------------------------------------------
/notebooks/images/multichannel_selection.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/multichannel_selection.tiff
--------------------------------------------------------------------------------
/notebooks/images/believer-skeptic_to_accumulation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic_to_accumulation.png
--------------------------------------------------------------------------------
/notebooks/images/believer-skeptic_to_accumulation.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CoAxLab/AdaptiveDecisionMaking_2018/HEAD/notebooks/images/believer-skeptic_to_accumulation.tiff
--------------------------------------------------------------------------------
/ADMCode/snuz/__init__.py:
--------------------------------------------------------------------------------
1 | from ADMCode.snuz.ppo.run_ppo import run_ppo
2 | from ADMCode.snuz.ars.ars import run_ars
3 | from ADMCode.snuz import ars
4 | from ADMCode.snuz import ppo
5 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/__init__.py:
--------------------------------------------------------------------------------
1 | from ADMCode.snuz.ppo import models
2 | from ADMCode.snuz.ppo import agents
3 | from ADMCode.snuz.ppo import envs
4 | from ADMCode.snuz.ppo import storage
5 |
--------------------------------------------------------------------------------
/ADMCode/foobar.txt:
--------------------------------------------------------------------------------
1 | Now let's evaluate the performance of the model using two metrics:
2 |
3 |
4 |
5 | **Payoff (P)** is the degree to which the agent chooses the High Value decks over the Low Value decks. This is a measure of efficient value-based decision-making.
6 |
7 | P = $\Sigma (C + D) - \Sigma (A + B)$
8 |
9 | **Sensitivity (Q)** is the sensitivity of the agent to High Frequency rewards over Low Frequency rewards.
10 |
11 | Q = $\Sigma (B + D) - \Sigma (A + C)$
12 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/envs.py:
--------------------------------------------------------------------------------
1 | """
2 | From https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/envs.py
3 | """
4 |
5 | import gym
6 | from gym.spaces.box import Box
7 | import cv2
8 | import numpy as np
9 |
10 |
11 | def make_env(env_id, seed, rank):
12 | def _thunk():
13 | env = gym.make(env_id)
14 | env.seed(seed + rank)
15 | env = WrapPyTorch(env)
16 | return env
17 |
18 | return _thunk
19 |
20 |
21 | class WrapPyTorch(gym.ObservationWrapper):
22 | def __init__(self, env=None):
23 | super(WrapPyTorch, self).__init__(env)
24 | self.observation_space = Box(0, 255, [1, 84, 84], dtype=np.uint8)
25 |
26 | def observation(self, observation):
27 | x = cv2.resize(observation, (84, 84), interpolation=cv2.INTER_AREA)
28 | return np.expand_dims(x, 0)
29 |
--------------------------------------------------------------------------------
/data/IGTCards.csv:
--------------------------------------------------------------------------------
1 | a,b,c,d
2 | 100,100,50,50
3 | 100,100,50,50
4 | -150,100,-25,50
5 | 100,100,50,50
6 | -300,100,-75,50
7 | 100,100,50,50
8 | -200,100,-25,50
9 | 100,100,50,50
10 | -250,-1250,-75,50
11 | -350,100,-50,-250
12 | 100,100,50,50
13 | -350,100,-25,50
14 | 100,100,-75,50
15 | -250,-1250,50,50
16 | -200,100,50,-250
17 | 100,100,50,50
18 | -300,100,-25,50
19 | -150,100,-75,50
20 | 100,100,50,50
21 | 100,100,-50,50
22 | 100,-1250,50,-250
23 | -300,100,50,50
24 | 100,100,50,50
25 | -350,100,-50,50
26 | 100,100,-25,50
27 | -200,100,-50,50
28 | -250,100,50,50
29 | -150,100,50,50
30 | 100,100,-75,50
31 | 100,100,-50,50
32 | -350,100,50,50
33 | -250,-1250,50,-250
34 | -250,100,50,50
35 | 100,100,-25,50
36 | 100,100,-25,50
37 | 100,100,50,50
38 | -150,100,-75,50
39 | -300,100,50,50
40 | 100,100,-50,50
41 | 100,100,-25,50
42 | 100,-1250,-50,-250
43 | -300,100,50,50
44 | 100,100,50,50
45 | -350,100,-50,50
46 | 100,100,-25,50
47 | -200,100,-50,50
48 | -250,100,50,50
49 | -150,100,50,50
50 | 100,100,-75,50
51 | 100,100,-50,50
52 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/utils.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/openai/evolution-strategies-starter.
3 |
4 | import numpy as np
5 |
6 | def itergroups(items, group_size):
7 | assert group_size >= 1
8 | group = []
9 | for x in items:
10 | group.append(x)
11 | if len(group) == group_size:
12 | yield tuple(group)
13 | del group[:]
14 | if group:
15 | yield tuple(group)
16 |
17 |
18 |
19 | def batched_weighted_sum(weights, vecs, batch_size):
20 | total = 0
21 | num_items_summed = 0
22 | for batch_weights, batch_vecs in zip(itergroups(weights, batch_size),
23 | itergroups(vecs, batch_size)):
24 | assert len(batch_weights) == len(batch_vecs) <= batch_size
25 | total += np.dot(np.asarray(batch_weights, dtype=np.float64),
26 | np.asarray(batch_vecs, dtype=np.float64))
27 | num_items_summed += len(batch_weights)
28 | return total, num_items_summed
29 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/shared_noise.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es
3 |
4 | import ray
5 | import numpy as np
6 |
7 | @ray.remote
8 | def create_shared_noise():
9 | """
10 | Create a large array of noise to be shared by all workers. Used
11 | for avoiding the communication of the random perturbations delta.
12 | """
13 |
14 | seed = 12345
15 | count = 250000000
16 | noise = np.random.RandomState(seed).randn(count).astype(np.float64)
17 | return noise
18 |
19 |
20 | class SharedNoiseTable(object):
21 | def __init__(self, noise, seed = 11):
22 |
23 | self.rg = np.random.RandomState(seed)
24 | self.noise = noise
25 | assert self.noise.dtype == np.float64
26 |
27 | def get(self, i, dim):
28 | return self.noise[i:i + dim]
29 |
30 | def sample_index(self, dim):
31 | return self.rg.randint(0, len(self.noise) - dim + 1)
32 |
33 | def get_delta(self, dim):
34 | idx = self.sample_index(dim)
35 | return idx, self.get(idx, dim)
36 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/optimizers.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/openai/evolution-strategies-starter.
3 |
4 | from __future__ import absolute_import
5 | from __future__ import division
6 | from __future__ import print_function
7 |
8 | import numpy as np
9 |
10 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES
11 | class Optimizer(object):
12 | def __init__(self, w_policy):
13 | self.w_policy = w_policy.flatten()
14 | self.dim = w_policy.size
15 | self.t = 0
16 |
17 | def update(self, globalg):
18 | self.t += 1
19 | step = self._compute_step(globalg)
20 | ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5)
21 | return self.w_policy + step, ratio
22 |
23 | def _compute_step(self, globalg):
24 | raise NotImplementedError
25 |
26 |
27 | class SGD(Optimizer):
28 | def __init__(self, pi, stepsize):
29 | Optimizer.__init__(self, pi)
30 | self.stepsize = stepsize
31 |
32 | def _compute_step(self, globalg):
33 | step = -self.stepsize * globalg
34 | return step
35 |
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 CoAxLab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ##########################
2 | ##########################
3 | ## radd .gitignore file ##
4 | ##########################
5 | ##########################
6 |
7 |
8 | # Compiled source #
9 | ###################
10 | *.com
11 | *.class
12 | *.dll
13 | *.exe
14 | *.o
15 | *.so
16 |
17 |
18 | # Packages #
19 | ############
20 | # it's better to unpack these files and commit the raw source
21 | # git has its own built in compression methods
22 | *.7z
23 | *.dmg
24 | *.gz
25 | *.iso
26 | *.jar
27 | *.rar
28 | *.tar
29 | *.zip
30 |
31 | # Logs and databases #
32 | ######################
33 | *.log
34 | *.sql
35 | *.sqlite
36 |
37 | # OS generated files #
38 | ######################
39 | .DS_Store
40 | .DS_Store?
41 | ._*
42 | .Spotlight-V100
43 | .Trashes
44 | ehthumbs.db
45 | Thumbs.db
46 |
47 | # Temporary Files #
48 | ###################
49 | .ipynb_checkpoints/
50 | ADMCode/__pycache__/
51 | __pycache__/
52 | (alias)/
53 | *.pyc
54 |
55 | # Wheel build folder #
56 | ######################
57 | build/
58 |
59 | # Setuptools distribution folder #
60 | ##################################
61 | dist/
62 |
63 | # Python egg metadata#
64 | ######################
65 | *.egg-info
66 | *.egg
67 |
--------------------------------------------------------------------------------
/ADMCode/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Code and lab resources for "Neural and Cognitive Models of
3 | Adaptive Decision Making" course (2018)
4 |
5 | Authors:
6 | CoAxLab at github.com
7 |
8 | """
9 |
10 | from __future__ import print_function
11 | import os
12 | import sys
13 | from glob import glob
14 |
15 | modules = glob(os.path.dirname(__file__) + "/*.py")
16 | __all__ = [os.path.basename(f)[:-3] for f in modules]
17 |
18 | major = 0
19 | minor = 5
20 | patch = 2
21 | __version__ = '.'.join([str(v) for v in [major, minor, patch]])
22 |
23 | _package_dir = os.path.dirname(os.path.realpath(__file__))
24 |
25 | def style_notebook():
26 | from IPython.core.display import HTML
27 | _styles_dir = os.path.join(_package_dir, 'styles')
28 | style = os.path.join(_styles_dir, 'custom.css')
29 | csscontent = open(style, "r").read()
30 | return HTML(csscontent)
31 |
32 |
33 | def load_attractor_animation():
34 | import io, base64
35 | from IPython.display import HTML
36 | _examples_dir = os.path.join(_package_dir, '../notebooks/images')
37 | mov_fpath = os.path.join(_examples_dir, 'attractor.mp4')
38 | video = io.open(mov_fpath, 'r+b').read()
39 | encoded = base64.b64encode(video)
40 | data=''''''.format(encoded.decode('ascii'))
41 | return HTML(data=data)
42 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | import numpy as np
3 | import os
4 |
5 | package_data = {'ADMCode':['notebooks/*.ipynb', 'notebooks/images/*.png', 'notebooks/images/*.mp4', 'data/*.csv']}
6 |
7 | major = 0
8 | minor = 5
9 | patch = 2
10 | __version__ = '.'.join([str(v) for v in [major, minor, patch]])
11 |
12 | setup(
13 | name='ADMCode',
14 | version=__version__,
15 | author='Kyle Dunovan, Timothy Verstynen',
16 | author_email='dunovank@gmail.com',
17 | url='http://github.com/CoAxLab/AdaptiveDecisionMaking_2018',
18 | packages=['ADMCode', 'ADMCode.snuz', 'ADMCode.snuz.ars', 'ADMCode.snuz.ppo', 'ADMCode.snuz.ppo.agents'],
19 | package_data=package_data,
20 | description='Code and lab resources for Neural and Cognitive Models of Adaptive Decision Making course (2018)',
21 | install_requires=['numpy', 'scipy', 'pandas', 'matplotlib', 'seaborn', 'scikit-learn', 'numba', 'future'],
22 | include_dirs = [np.get_include()],
23 | classifiers=[
24 | 'Environment :: Console',
25 | 'Operating System :: OS Independent',
26 | 'License :: OSI Approved :: MIT License',
27 | 'Development Status :: 3 - Alpha',
28 | 'Programming Language :: Python',
29 | 'Programming Language :: Python :: 3',
30 | 'Programming Language :: Python :: 3.4',
31 | 'Programming Language :: Python :: 3.6',
32 | 'Topic :: Scientific/Engineering',
33 | ]
34 | )
35 |
--------------------------------------------------------------------------------
/ADMCode/sdt.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | import pandas as pd
4 | from scipy.stats import norm
5 | from scipy.signal import detrend
6 |
7 |
8 | def sdt_mle(h, m, cr, fa):
9 |
10 | """Calculates maximum-likelihood estimates of sensitivity and bias.
11 |
12 | Args:
13 | h: Hits
14 | m: Misses
15 | cr: Correct Rejections.
16 | fa: False Alarms
17 |
18 | Returns:
19 | d (d-prime)
20 | c (criterion)
21 |
22 | """
23 |
24 | H, M, CR, FA = h, m, cr, fa
25 |
26 | n0, n1 = float(FA + CR), float(H + M)
27 | if H == 0: H += 0.5
28 | if H == n1: H -= 0.5
29 | if FA == 0: FA += 0.5
30 | if FA == n0: FA -= 0.5
31 |
32 | pH = H / float(n1)
33 | pFA = FA / float(n0)
34 | d = norm.ppf(pH) - norm.ppf(pFA)
35 | c = -0.5 * (norm.ppf(pH) + norm.ppf(pFA))
36 |
37 | return d, c
38 |
39 |
40 |
41 | def analyze_yesno(sdtData):
42 |
43 | hits, misses, cr, fa = sdtData[['H','M','CR','FA']].sum().values
44 |
45 | numSignal = hits + misses
46 | numNoise = cr + fa
47 | signalAcc = hits/numSignal
48 | noiseAcc = cr/numNoise
49 |
50 | d, c = sdt_mle(hits, misses, cr, fa)
51 |
52 | print("Signal Accuracy = {:.0f}%".format(signalAcc*100))
53 | print("\tHits = {}".format(hits))
54 | print("\tMisses = {}\n".format(misses))
55 |
56 | print("Noise Accuracy = {:.0f}%".format(noiseAcc*100))
57 | print("\tCorr. Rej. = {}".format(cr))
58 | print("\tFalse Alarms = {}\n".format(fa))
59 |
60 | print("d-prime (d') = {:.2f}".format(d))
61 | print("criterion (c) = {:.2f}".format(c))
62 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/policies.py:
--------------------------------------------------------------------------------
1 | '''
2 | Policy class for computing action from weights and observation vector.
3 | Horia Mania --- hmania@berkeley.edu
4 | Aurelia Guy
5 | Benjamin Recht
6 | '''
7 |
8 | import numpy as np
9 | from ADMCode.snuz.ars.filter import get_filter
10 |
11 |
12 | class Policy(object):
13 | def __init__(self, policy_params):
14 |
15 | self.ob_dim = policy_params['ob_dim']
16 | self.ac_dim = policy_params['ac_dim']
17 | self.weights = np.empty(0)
18 |
19 | # a filter for updating statistics of the observations and normalizing inputs to the policies
20 | self.observation_filter = get_filter(
21 | policy_params['ob_filter'], shape=(self.ob_dim, ))
22 | self.update_filter = True
23 |
24 | def update_weights(self, new_weights):
25 | self.weights[:] = new_weights[:]
26 | return
27 |
28 | def get_weights(self):
29 | return self.weights
30 |
31 | def get_observation_filter(self):
32 | return self.observation_filter
33 |
34 | def act(self, ob):
35 | raise NotImplementedError
36 |
37 | def copy(self):
38 | raise NotImplementedError
39 |
40 |
41 | class LinearPolicy(Policy):
42 | """
43 | Linear policy class that computes action as .
44 | """
45 |
46 | def __init__(self, policy_params):
47 | Policy.__init__(self, policy_params)
48 | self.weights = np.zeros((self.ac_dim, self.ob_dim), dtype=np.float64)
49 |
50 | def act(self, ob):
51 | ob = self.observation_filter(ob, update=self.update_filter)
52 | return np.dot(self.weights, ob)
53 |
54 | def get_weights_plus_stats(self):
55 |
56 | mu, std = self.observation_filter.get_stats()
57 | aux = np.asarray([self.weights, mu, std])
58 | return aux
59 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/lqr_env.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 | import numpy as np
5 | from os import path
6 |
7 | class LQR_Env(gym.Env):
8 |
9 | def __init__(self):
10 |
11 | self.viewer = None
12 |
13 | self.A = np.array([[1.01, 0.01, 0.0],[0.01, 1.01, 0.01], [0., 0.01, 1.01]])
14 | self.B = np.eye(3)
15 |
16 | self.d, self.p = self.B.shape
17 |
18 | self.R = np.eye(self.p)
19 | self.Q = np.eye(self.d) / 1000
20 |
21 | self.time = 0
22 |
23 | self.action_space = spaces.Box(low=-1e+8, high=1e+8, shape=(self.p,))
24 | self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(self.d, ))
25 |
26 | self.state = np.random.normal(0,1,size = self.d)
27 |
28 | self._seed()
29 |
30 |
31 | def _seed(self, seed=None):
32 | self.np_random, seed = seeding.np_random(seed)
33 | return [seed]
34 |
35 | def _step(self,u):
36 |
37 | x = self.state
38 |
39 | cost = np.dot(x, np.dot(self.Q, x)) + np.dot(u, np.dot(self.R, u))
40 | new_x = np.dot(self.A, x) + np.dot(self.B, u) + self.np_random.normal(0,1,size = self.d)
41 |
42 | self.state = new_x
43 |
44 | terminated = False
45 | if self.time > 300:
46 | terminated = True
47 |
48 | self.time += 1
49 |
50 | return self._get_obs(), - cost, terminated, {}
51 |
52 | def _reset(self):
53 | self.state = self.np_random.normal(0, 1, size = self.d)
54 | self.last_u = None
55 | self.time = 0
56 |
57 | return self._get_obs()
58 |
59 | def _get_obs(self):
60 | return self.state
61 |
62 | def get_params(self):
63 | return self.A, self.B, self.Q, self.R
64 |
--------------------------------------------------------------------------------
/ADMCode/utils.py:
--------------------------------------------------------------------------------
1 | #!usr/bin/env python
2 | from __future__ import division
3 | import pandas as pd
4 | import numpy as np
5 | from future.utils import listvalues
6 | from scipy.stats.stats import sem
7 |
8 | def get_optimal_auc(df, nblocks=25, verbose=False, as_percent=True):
9 | xdf = blockify_trials(df, nblocks=nblocks)
10 | muOptDF = xdf.groupby(['agent', 'block']).mean().reset_index()
11 | auc = pd.pivot_table(muOptDF, values='optimal', index='block').values.sum()
12 | if as_percent:
13 | auc = (auc / nblocks) * 100
14 | if verbose:
15 | print("Optimal Choice: {:.2f}".format(auc))
16 |
17 | return auc
18 |
19 | def analyze_bandits(df, nblocks=25, get_err=False):
20 | xdf = blockify_trials(df, nblocks=nblocks)
21 | optDF = xdf.groupby(['agent', 'block']).mean().reset_index()
22 | muOpt = pd.pivot_table(optDF, values='optimal', index='block').values
23 | #muOpt = pd.pivot_table(optDF, values='optimal', index='block').rolling(window=15)
24 | #rolling_mean = muOpt.mean()
25 | muOpt = np.hstack(muOpt)
26 | if get_err:
27 | errOpt = pd.pivot_table(optDF, values='optimal', index='block', aggfunc=sem).values*1.96
28 | errOpt = np.hstack(errOpt)
29 | else:
30 | errOpt = np.zeros_like(muOpt)
31 | return muOpt, errOpt
32 |
33 |
34 | def blockify_trials(data, nblocks=5, conds=None, groups=['agent']):
35 |
36 | datadf = data.copy()
37 | if conds is not None:
38 | if type(conds) is str:
39 | conds = [conds]
40 | groups = groups + conds
41 |
42 | idxdflist = []
43 | for dfinfo, idxdf in datadf.groupby(groups):
44 | ixblocks = np.array_split(idxdf.trial.values, nblocks)
45 | blocks = np.hstack([[i+1]*arr.size for i, arr in enumerate(ixblocks)])
46 | idxdf = idxdf.copy()
47 | colname = 'block'
48 | idxdf[colname] = blocks
49 | idxdflist.append(idxdf)
50 |
51 | return pd.concat(idxdflist)
52 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/run_policy.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Code to load a policy and generate rollout data. Adapted from https://github.com/berkeleydeeprlcourse.
4 | Example usage:
5 | python run_policy.py ../trained_policies/Humanoid-v1/policy_reward_11600/lin_policy_plus.npz Humanoid-v1 --render \
6 | --num_rollouts 20
7 | """
8 | import numpy as np
9 | import gym
10 |
11 | def main():
12 | import argparse
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument('expert_policy_file', type=str)
15 | parser.add_argument('envname', type=str)
16 | parser.add_argument('--render', action='store_true')
17 | parser.add_argument('--num_rollouts', type=int, default=20,
18 | help='Number of expert rollouts')
19 | args = parser.parse_args()
20 |
21 | print('loading and building expert policy')
22 | lin_policy = np.load(args.expert_policy_file)
23 | lin_policy = lin_policy.items()[0][1]
24 |
25 | M = lin_policy[0]
26 | # mean and std of state vectors estimated online by ARS.
27 | mean = lin_policy[1]
28 | std = lin_policy[2]
29 |
30 | env = gym.make(args.envname)
31 |
32 | returns = []
33 | observations = []
34 | actions = []
35 | for i in range(args.num_rollouts):
36 | print('iter', i)
37 | obs = env.reset()
38 | done = False
39 | totalr = 0.
40 | steps = 0
41 | while not done:
42 | action = np.dot(M, (obs - mean)/std)
43 | observations.append(obs)
44 | actions.append(action)
45 |
46 |
47 | obs, r, done, _ = env.step(action)
48 | totalr += r
49 | steps += 1
50 | if args.render:
51 | env.render()
52 | if steps % 100 == 0: print("%i/%i"%(steps, env.spec.timestep_limit))
53 | if steps >= env.spec.timestep_limit:
54 | break
55 | returns.append(totalr)
56 |
57 | print('returns', returns)
58 | print('mean return', np.mean(returns))
59 | print('std of return', np.std(returns))
60 |
61 | if __name__ == '__main__':
62 | main()
63 |
--------------------------------------------------------------------------------
/notebooks/Lab TEMPLATE.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 117,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from __future__ import division\n",
10 | "import ADMCode\n",
11 | "from ADMCode import visualize as vis\n",
12 | "# from ADMCode import \n",
13 | "\n",
14 | "import numpy as np\n",
15 | "import pandas as pd\n",
16 | "\n",
17 | "# from ipywidgets import interactive\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "import seaborn as sns\n",
20 | "import warnings\n",
21 | "\n",
22 | "warnings.simplefilter('ignore', np.RankWarning)\n",
23 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n",
24 | "warnings.filterwarnings(\"ignore\")\n",
25 | "sns.set(style='white', font_scale=1.3)\n",
26 | "\n",
27 | "%matplotlib inline"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## Section Header"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "## Section Header"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "## Section Header"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": []
57 | }
58 | ],
59 | "metadata": {
60 | "kernelspec": {
61 | "display_name": "Python 3",
62 | "language": "python",
63 | "name": "python3"
64 | },
65 | "language_info": {
66 | "codemirror_mode": {
67 | "name": "ipython",
68 | "version": 3
69 | },
70 | "file_extension": ".py",
71 | "mimetype": "text/x-python",
72 | "name": "python",
73 | "nbconvert_exporter": "python",
74 | "pygments_lexer": "ipython3",
75 | "version": "3.6.5"
76 | },
77 | "latex_envs": {
78 | "LaTeX_envs_menu_present": true,
79 | "autocomplete": true,
80 | "bibliofile": "biblio.bib",
81 | "cite_by": "apalike",
82 | "current_citInitial": 1,
83 | "eqLabelWithNumbers": true,
84 | "eqNumInitial": 1,
85 | "hotkeys": {
86 | "equation": "Ctrl-E",
87 | "itemize": "Ctrl-I"
88 | },
89 | "labels_anchors": false,
90 | "latex_user_defs": false,
91 | "report_style_numbering": false,
92 | "user_envs_cfg": false
93 | }
94 | },
95 | "nbformat": 4,
96 | "nbformat_minor": 2
97 | }
98 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/logz.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/berkeleydeeprlcourse
3 |
4 | import json
5 |
6 | """
7 |
8 | Some simple logging functionality, inspired by rllab's logging.
9 | Assumes that each diagnostic gets logged each iteration
10 |
11 | Call logz.configure_output_dir() to start logging to a
12 | tab-separated-values file (some_folder_name/log.txt)
13 |
14 | """
15 |
16 | import os.path as osp, shutil, time, atexit, os, subprocess
17 |
18 | color2num = dict(
19 | gray=30,
20 | red=31,
21 | green=32,
22 | yellow=33,
23 | blue=34,
24 | magenta=35,
25 | cyan=36,
26 | white=37,
27 | crimson=38
28 | )
29 |
30 | def colorize(string, color, bold=False, highlight=False):
31 | attr = []
32 | num = color2num[color]
33 | if highlight: num += 10
34 | attr.append(str(num))
35 | if bold: attr.append('1')
36 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
37 |
38 | class G(object):
39 | output_dir = None
40 | output_file = None
41 | first_row = True
42 | log_headers = []
43 | log_current_row = {}
44 |
45 | def configure_output_dir(d=None):
46 | """
47 | Set output directory to d, or to /tmp/somerandomnumber if d is None
48 | """
49 | G.first_row = True
50 | G.log_headers = []
51 | G.log_current_row = {}
52 |
53 | G.output_dir = d or "/tmp/experiments/%i"%int(time.time())
54 | if not osp.exists(G.output_dir):
55 | os.makedirs(G.output_dir)
56 | G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w')
57 | atexit.register(G.output_file.close)
58 | print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True))
59 |
60 | def log_tabular(key, val):
61 | """
62 | Log a value of some diagnostic
63 | Call this once for each diagnostic quantity, each iteration
64 | """
65 | if G.first_row:
66 | G.log_headers.append(key)
67 | else:
68 | assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key
69 | assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key
70 | G.log_current_row[key] = val
71 |
72 |
73 | def save_params(params):
74 | with open(osp.join(G.output_dir, "params.json"), 'w') as out:
75 | out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True))
76 |
77 |
78 | def dump_tabular():
79 | """
80 | Write all of the diagnostics from the current iteration
81 | """
82 | vals = []
83 | key_lens = [len(key) for key in G.log_headers]
84 | max_key_len = max(15,max(key_lens))
85 | keystr = '%'+'%d'%max_key_len
86 | fmt = "| " + keystr + "s | %15s |"
87 | n_slashes = 22 + max_key_len
88 | print("-"*n_slashes)
89 | for key in G.log_headers:
90 | val = G.log_current_row.get(key, "")
91 | if hasattr(val, "__float__"): valstr = "%8.3g"%val
92 | else: valstr = val
93 | print(fmt%(key, valstr))
94 | vals.append(val)
95 | print("-"*n_slashes)
96 | if G.output_file is not None:
97 | if G.first_row:
98 | G.output_file.write("\t".join(G.log_headers))
99 | G.output_file.write("\n")
100 | G.output_file.write("\t".join(map(str,vals)))
101 | G.output_file.write("\n")
102 | G.output_file.flush()
103 | G.log_current_row.clear()
104 | G.first_row=False
105 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AdaptiveDecisionMaking_2018 (ADM)
2 | Repository for code and lab resources for "Neural and Cognitive Models of Adaptive Decision Making" course (2018)
3 |
4 |
5 | ### Jupyter notebooks [](https://mybinder.org/v2/gh/CoAxLab/AdaptiveDecisionMaking_2018/master)
6 | Click on binder badge above to run jupyter notebooks for labs and homework. Or download the ipynb files [**here**](https://nbviewer.jupyter.org/github/CoAxLab/AdaptiveDecisionMaking_2018/tree/master/notebooks/) to run locally.
7 |
8 |
9 | ## Instructions for getting started
10 | #### Install **Anaconda** with **Python 3.6**:
11 | - [**OSX**](https://www.anaconda.com/download/#macos)
12 | - [**Linux**](https://www.anaconda.com/download/#linux)
13 | - [**Windows**](https://www.anaconda.com/download/#windows)
14 |
15 | #### Confirm installs
16 | ```bash
17 | # check that your system is now using Anaconda's python
18 | which python
19 | ```
20 | ```bash
21 | # and that you installed Python 3.6
22 | python -V
23 | ```
24 |
25 |
26 |
27 | ## Install ADMCode package
28 | [**ADMCode**](https://pypi.org/project/ADMCode/) is a python package with custom code that can be used to complete the labs and homeworks (which will both be in the form of jupyter notebooks)
29 | ```bash
30 | pip install --upgrade ADMCode
31 | ```
32 |
33 |
34 |
35 | ## Working with `git`
36 | Git is full of weird nonsense terminology. [**This tutorial**](http://rogerdudler.github.io/git-guide/) is a super useful resource for understanding how to use it.
37 |
38 | - If you don't already have a github account, create one [**here**](https://github.com)
39 | - Install git command-line tools (see *setup* section [**here**](http://rogerdudler.github.io/git-guide/))
40 |
41 | #### Clone ADMCode
42 | * Open a terminal and `cd` to a directory where you want to download the ADMCode repo (example: `cd ~/Dropbox/Git/`)
43 | * Next, use `git` to `clone` the *remote* ADMCode repository to create a *local* repo on your machine
44 | ```bash
45 | # make sure you've done steps 1 and 2
46 | # before executing this in your terminal
47 | git clone https://github.com/CoAxLab/AdaptiveDecisionMaking_2018.git
48 | ```
49 |
50 | #### Pull updates
51 | * Use `git pull` to update your local repo with any changes to the *remote* ADMCode repo
52 | * In the command below, `origin` is the default name pointing to the remote repo on Github
53 | * `master` is the `branch` of the remote that you want to sync with
54 | ```bash
55 | # first cd into your local ADMCode repo
56 | # (same directory as step 1 in "Clone ADMCode" ^^^)
57 | git pull origin master
58 | ```
59 |
60 | ## Useful resources
61 | - [**Anaconda distribution**](https://www.anaconda.com/): package management for scientific python (& R)
62 | - [**Jupyter**](http://jupyter.org/): interactive python interpreter in your browser ([tutorial](https://medium.com/codingthesmartway-com-blog/getting-started-with-jupyter-notebook-for-python-4e7082bd5d46))
63 | - [**pandas**](http://pandas.pydata.org/pandas-docs/stable/): tabular dataframe manager ([tutorial](https://medium.com/init27-labs/intro-to-pandas-and-numpy-532a2d5293c8))
64 | - [**numpy**](http://www.numpy.org/): numerical computing library ([tutorial](https://www.machinelearningplus.com/python/101-numpy-exercises-python/))
65 | - [**scikit-learn**](http://scikit-learn.org/stable/): data science and machine learning library ([tutorial](http://ogrisel.github.io/scikit-learn.org/sklearn-tutorial/tutorial/text_analytics/general_concepts.html))
66 | - [**matplotlib**](https://matplotlib.org/index.html): plotting and visualization library ([tutorial](https://www.datacamp.com/community/tutorials/matplotlib-tutorial-python))
67 | - [**seaborn**](https://seaborn.pydata.org/): wrapper for making matplotlib pretty, plays nice w/ pandas ([tutorial](https://elitedatascience.com/python-seaborn-tutorial))
68 | - [**and more...** ](https://docs.anaconda.com/anaconda/packages/pkg-docs/)
69 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 |
4 | import numpy as np
5 | from collections import deque
6 |
7 |
8 | # ----------------------------------------------------------------------------
9 | # Code from:
10 | # https://github.com/reinforcement-learning-kr/pg_travel/blob/master/mujoco/utils/utils.py
11 | def get_action(mu, std):
12 | action = torch.normal(mu, std)
13 | action = action.data.numpy()
14 | return action
15 |
16 |
17 | def log_probability(x, mu, std, logstd):
18 | var = std.pow(2)
19 | log_density = (
20 | -(x - mu).pow(2) / (2 * var) - 0.5 * math.log(2 * math.pi) - logstd)
21 | return log_density.sum(1, keepdim=True)
22 |
23 |
24 | def flat_grad(grads):
25 | grad_flatten = []
26 | for grad in grads:
27 | grad_flatten.append(grad.view(-1))
28 | grad_flatten = torch.cat(grad_flatten)
29 | return grad_flatten
30 |
31 |
32 | def flat_hessian(hessians):
33 | hessians_flatten = []
34 | for hessian in hessians:
35 | hessians_flatten.append(hessian.contiguous().view(-1))
36 | hessians_flatten = torch.cat(hessians_flatten).data
37 | return hessians_flatten
38 |
39 |
40 | def flat_params(model):
41 | params = []
42 | for param in model.parameters():
43 | params.append(param.data.view(-1))
44 | params_flatten = torch.cat(params)
45 | return params_flatten
46 |
47 |
48 | def update_model(model, new_params):
49 | index = 0
50 | for params in model.parameters():
51 | params_length = len(params.view(-1))
52 | new_param = new_params[index:index + params_length]
53 | new_param = new_param.view(params.size())
54 | params.data.copy_(new_param)
55 | index += params_length
56 |
57 |
58 | def kl_divergence(new_actor, old_actor, states):
59 | mu, std, logstd = new_actor(torch.Tensor(states))
60 | mu_old, std_old, logstd_old = old_actor(torch.Tensor(states))
61 | mu_old = mu_old.detach()
62 | std_old = std_old.detach()
63 | logstd_old = logstd_old.detach()
64 |
65 | # kl divergence between old policy and new policy : D( pi_old || pi_new )
66 | # pi_old -> mu0, logstd0, std0 / pi_new -> mu, logstd, std
67 | # be careful of calculating KL-divergence. It is not symmetric metric
68 | kl = logstd_old - logstd + (std_old.pow(2) + (mu_old - mu).pow(2)) / \
69 | (2.0 * std.pow(2)) - 0.5
70 | return kl.sum(1, keepdim=True)
71 |
72 |
73 | def save_checkpoint(state, filename='checkpoint.pth.tar'):
74 | torch.save(state, filename)
75 |
76 |
77 | # from https://github.com/joschu/modular_rl
78 | # http://www.johndcook.com/blog/standard_deviation/
79 | class RunningStat(object):
80 | def __init__(self, shape):
81 | self._n = 0
82 | self._M = np.zeros(shape)
83 | self._S = np.zeros(shape)
84 |
85 | def push(self, x):
86 | x = np.asarray(x)
87 | assert x.shape == self._M.shape
88 | self._n += 1
89 | if self._n == 1:
90 | self._M[...] = x
91 | else:
92 | oldM = self._M.copy()
93 | self._M[...] = oldM + (x - oldM) / self._n
94 | self._S[...] = self._S + (x - oldM) * (x - self._M)
95 |
96 | @property
97 | def n(self):
98 | return self._n
99 |
100 | @n.setter
101 | def n(self, n):
102 | self._n = n
103 |
104 | @property
105 | def mean(self):
106 | return self._M
107 |
108 | @mean.setter
109 | def mean(self, M):
110 | self._M = M
111 |
112 | @property
113 | def sum_square(self):
114 | return self._S
115 |
116 | @sum_square.setter
117 | def sum_square(self, S):
118 | self._S = S
119 |
120 | @property
121 | def var(self):
122 | return self._S / (self._n - 1) if self._n > 1 else np.square(self._M)
123 |
124 | @property
125 | def std(self):
126 | return np.sqrt(self.var)
127 |
128 | @property
129 | def shape(self):
130 | return self._M.shape
131 |
132 |
133 | class ZFilter:
134 | """
135 | y = (x-mean)/std
136 | using running estimates of mean,std
137 | """
138 |
139 | def __init__(self, shape, demean=True, destd=True, clip=10.0):
140 | self.demean = demean
141 | self.destd = destd
142 | self.clip = clip
143 |
144 | self.rs = RunningStat(shape)
145 |
146 | def __call__(self, x, update=True):
147 | if update: self.rs.push(x)
148 | if self.demean:
149 | x = x - self.rs.mean
150 | if self.destd:
151 | x = x / (self.rs.std + 1e-8)
152 | if self.clip:
153 | x = np.clip(x, -self.clip, self.clip)
154 | return x
155 |
156 | def output_shape(self, input_space):
157 | return input_space.shape
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/agents/ppo_gae.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from ADMCode.snuz.ppo.utils import log_probability
4 |
5 |
6 | class Hyperparameters:
7 | gamma = 0.99
8 | lam = 0.98
9 | actor_hidden1 = 64
10 | actor_hidden2 = 64
11 | actor_hidden3 = 64
12 | critic_hidden1 = 64
13 | critic_lr = 0.0003
14 | actor_lr = 0.0003
15 | batch_size = 64
16 | l2_rate = 0.001
17 | clip_param = 0.2
18 | num_training_epochs = 10
19 | num_episodes = 100
20 | num_memories = 24
21 | num_training_epochs = 10
22 | clip_actions = True
23 | clip_std = 1.0 #0.25
24 | seed_value = None
25 |
26 |
27 | def get_returns(rewards, masks, values, hp):
28 | rewards = torch.Tensor(rewards)
29 | masks = torch.Tensor(masks)
30 | returns = torch.zeros_like(rewards)
31 | advantages = torch.zeros_like(rewards)
32 |
33 | running_returns = 0
34 | previous_value = 0
35 | running_advants = 0
36 |
37 | for t in reversed(range(0, len(rewards))):
38 | running_returns = rewards[t] + hp.gamma * running_returns * masks[t]
39 | running_tderror = (
40 | rewards[t] + hp.gamma * previous_value * masks[t] - values.data[t])
41 | running_advants = (
42 | running_tderror + hp.gamma * hp.lam * running_advants * masks[t])
43 |
44 | returns[t] = running_returns
45 | previous_value = values.data[t]
46 | advantages[t] = running_advants
47 |
48 | advantages = (advantages - advantages.mean()) / advantages.std()
49 | return returns, advantages
50 |
51 |
52 | def surrogate_loss(actor, advantages, states, old_policy, actions, index):
53 | mu, std, logstd = actor(torch.Tensor(states))
54 | new_policy = log_probability(actions, mu, std, logstd)
55 | old_policy = old_policy[index]
56 |
57 | ratio = torch.exp(new_policy - old_policy)
58 | surrogate = ratio * advantages
59 | return surrogate, ratio
60 |
61 |
62 | def train_model(actor,
63 | critic,
64 | memory,
65 | actor_optim,
66 | critic_optim,
67 | hp,
68 | num_training_epochs=10):
69 | memory = np.array(memory)
70 | states = np.vstack(memory[:, 0])
71 | actions = list(memory[:, 1])
72 | rewards = list(memory[:, 2])
73 | masks = list(memory[:, 3])
74 | values = critic(torch.Tensor(states))
75 |
76 | # ----------------------------
77 | # step 1: get returns and GAEs and log probability of old policy
78 | returns, advantages = get_returns(rewards, masks, values, hp)
79 | mu, std, logstd = actor(torch.Tensor(states))
80 | old_policy = log_probability(torch.Tensor(actions), mu, std, logstd)
81 | old_values = critic(torch.Tensor(states))
82 |
83 | criterion = torch.nn.MSELoss()
84 | n = len(states)
85 | arr = np.arange(n)
86 |
87 | # ----------------------------
88 | # step 2: get value loss and actor loss and update actor & critic
89 | for epoch in range(num_training_epochs):
90 | np.random.shuffle(arr)
91 |
92 | for i in range(n // hp.batch_size):
93 | batch_index = arr[hp.batch_size * i:hp.batch_size * (i + 1)]
94 | batch_index = torch.LongTensor(batch_index)
95 | inputs = torch.Tensor(states)[batch_index]
96 | returns_samples = returns.unsqueeze(1)[batch_index]
97 | advantages_samples = advantages.unsqueeze(1)[batch_index]
98 | actions_samples = torch.Tensor(actions)[batch_index]
99 | oldvalue_samples = old_values[batch_index].detach()
100 |
101 | loss, ratio = surrogate_loss(actor, advantages_samples, inputs,
102 | old_policy.detach(), actions_samples,
103 | batch_index)
104 |
105 | values = critic(inputs)
106 | clipped_values = oldvalue_samples + \
107 | torch.clamp(values - oldvalue_samples,
108 | -hp.clip_param,
109 | hp.clip_param)
110 | critic_loss1 = criterion(clipped_values, returns_samples)
111 | critic_loss2 = criterion(values, returns_samples)
112 | critic_loss = torch.max(critic_loss1, critic_loss2).mean()
113 |
114 | clipped_ratio = torch.clamp(ratio, 1.0 - hp.clip_param,
115 | 1.0 + hp.clip_param)
116 | clipped_loss = clipped_ratio * advantages_samples
117 | actor_loss = -torch.min(loss, clipped_loss).mean()
118 |
119 | loss = actor_loss + 0.5 * critic_loss
120 |
121 | critic_optim.zero_grad()
122 | loss.backward(retain_graph=True)
123 | critic_optim.step()
124 |
125 | actor_optim.zero_grad()
126 | loss.backward()
127 | actor_optim.step()
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/run_ppo.py:
--------------------------------------------------------------------------------
1 | """Test games with flowing actions."""
2 | import os
3 | import errno
4 |
5 | from collections import deque
6 |
7 | import gym
8 | from gym import wrappers
9 |
10 | import numpy as np
11 | import torch
12 | import torch.optim as optim
13 |
14 | from ADMCode.snuz.ppo.models import Actor3Sigma
15 | from ADMCode.snuz.ppo.models import Critic3
16 | from ADMCode.snuz.ppo.utils import get_action
17 | from ADMCode.snuz.ppo.utils import save_checkpoint
18 | from ADMCode.snuz.ppo.utils import ZFilter
19 |
20 |
21 | def run_ppo(env_name='MountainCarContinuous-v0',
22 | update_every=100,
23 | save=None,
24 | progress=True,
25 | debug=False,
26 | render=False,
27 | **algorithm_hyperparameters):
28 |
29 | # ------------------------------------------------------------------------
30 | from ADMCode.snuz.ppo.agents.ppo_gae import train_model
31 | from ADMCode.snuz.ppo.agents.ppo_gae import Hyperparameters
32 |
33 | # and its hyperparams
34 | hp = Hyperparameters()
35 | for k, v in algorithm_hyperparameters.items():
36 | setattr(hp, k, v)
37 |
38 | # ------------------------------------------------------------------------
39 | # Setup the world
40 | prng = np.random.RandomState(hp.seed_value)
41 |
42 | env = gym.make(env_name)
43 | env.seed(hp.seed_value)
44 |
45 | num_inputs = env.observation_space.shape[0]
46 | running_state = ZFilter((num_inputs, ), clip=5)
47 | num_actions = env.action_space.shape[0]
48 |
49 | # ------------------------------------------------------------------------
50 | # Actor-critic init
51 | actor = Actor3Sigma(num_inputs, num_actions, hp, max_std=hp.clip_std)
52 | critic = Critic3(num_inputs, hp)
53 |
54 | actor_optim = optim.Adam(actor.parameters(), lr=hp.actor_lr)
55 | critic_optim = optim.Adam(
56 | critic.parameters(), lr=hp.critic_lr, weight_decay=hp.l2_rate)
57 |
58 | # ------------------------------------------------------------------------
59 | # Play many games
60 | episode = 0
61 | episodes_scores = []
62 | for n_e in range(hp.num_episodes):
63 | # Re-init
64 | actor.eval()
65 | critic.eval()
66 | memory = deque()
67 |
68 | # -
69 | scores = []
70 | steps = 0
71 | for n_m in range(hp.num_memories):
72 | episode += 1
73 | state = env.reset()
74 | state = running_state(state)
75 |
76 | score = 0
77 | done = False
78 | while not done:
79 | if render:
80 | env.render()
81 |
82 | # Move
83 | steps += 1
84 | mu, std, _ = actor(torch.Tensor(state).unsqueeze(0))
85 | action = get_action(mu, std)[0] # Flattens too
86 | action_std = std.clone().detach().numpy().flatten(
87 | ) # Match action
88 |
89 | if hp.clip_actions:
90 | action = np.clip(action, env.action_space.low,
91 | env.action_space.high)
92 |
93 | next_state, reward, done, _ = env.step(action)
94 | next_state = running_state(next_state)
95 |
96 | # Process outcome
97 | if done:
98 | mask = 0
99 | else:
100 | mask = 1
101 |
102 | # Save/update
103 | memory.append([state, action, reward, mask, action_std])
104 | score += reward
105 | scores.append(score)
106 |
107 | # Shift
108 | state = next_state
109 |
110 | if debug and (n_m % update_every) == 0:
111 | print(">>> Mem. {}".format(n_m))
112 | print(">>> Last score {}".format(score))
113 | print(">>> Mu, Sigma ({}, {})".format(mu.tolist(),
114 | std.tolist()))
115 |
116 | score_avg = np.mean(scores)
117 | if progress:
118 | print(">>> Episode {} avg. score {}".format(n_e, score_avg))
119 | episodes_scores.append(score_avg)
120 |
121 | # --------------------------------------------------------------------
122 | # Learn!
123 | actor.train()
124 | critic.train()
125 | train_model(
126 | actor,
127 | critic,
128 | memory,
129 | actor_optim,
130 | critic_optim,
131 | hp,
132 | num_training_epochs=hp.num_training_epochs)
133 |
134 | # --------------------------------------------------------------------
135 | if (save is not None) and (n_e % update_every) == 0:
136 | save_checkpoint({
137 | 'actor': actor.state_dict(),
138 | 'critic': critic.state_dict(),
139 | 'z_filter_n': running_state.rs.n,
140 | 'z_filter_m': running_state.rs.mean,
141 | 'z_filter_s': running_state.rs.sum_square,
142 | 'score': score_avg
143 | },
144 | filename=save + "_ep_{}.pytorch.tar".format(n_e))
145 |
146 | return list(range(hp.num_episodes)), episodes_scores
147 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/storage.py:
--------------------------------------------------------------------------------
1 | """
2 | Modified from
3 | https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/storage.py
4 | """
5 |
6 | import torch
7 | from torch.utils.data.sampler import BatchSampler
8 | from torch.utils.data.sampler import SubsetRandomSampler
9 |
10 |
11 | class RolloutStorage(object):
12 | def __init__(self, num_steps, num_processes, obs_shape, action_space,
13 | state_size):
14 | self.observations = torch.zeros(num_steps + 1, num_processes,
15 | *obs_shape)
16 | self.states = torch.zeros(num_steps + 1, num_processes, state_size)
17 | self.rewards = torch.zeros(num_steps, num_processes, 1)
18 | self.value_preds = torch.zeros(num_steps + 1, num_processes, 1)
19 | self.returns = torch.zeros(num_steps + 1, num_processes, 1)
20 | self.action_log_probs = torch.zeros(num_steps, num_processes, 1)
21 | if action_space.__class__.__name__ == 'Discrete':
22 | action_shape = 1
23 | else:
24 | action_shape = action_space.shape[0]
25 | self.actions = torch.zeros(num_steps, num_processes, action_shape)
26 | if action_space.__class__.__name__ == 'Discrete':
27 | self.actions = self.actions.long()
28 | self.masks = torch.ones(num_steps + 1, num_processes, 1)
29 |
30 | def cuda(self):
31 | self.observations = self.observations.cuda()
32 | self.states = self.states.cuda()
33 | self.rewards = self.rewards.cuda()
34 | self.value_preds = self.value_preds.cuda()
35 | self.returns = self.returns.cuda()
36 | self.action_log_probs = self.action_log_probs.cuda()
37 | self.actions = self.actions.cuda()
38 | self.masks = self.masks.cuda()
39 |
40 | def insert(self, step, current_obs, state, action, action_log_prob,
41 | value_pred, reward, mask):
42 | self.observations[step + 1].copy_(current_obs)
43 | self.states[step + 1].copy_(state)
44 | self.actions[step].copy_(action)
45 | self.action_log_probs[step].copy_(action_log_prob)
46 | self.value_preds[step].copy_(value_pred)
47 | self.rewards[step].copy_(reward)
48 | self.masks[step + 1].copy_(mask)
49 |
50 | def after_update(self):
51 | self.observations[0].copy_(self.observations[-1])
52 | self.states[0].copy_(self.states[-1])
53 | self.masks[0].copy_(self.masks[-1])
54 |
55 | def compute_returns(self, next_value, use_gae, gamma, tau):
56 | if use_gae:
57 | self.value_preds[-1] = next_value
58 | gae = 0
59 | for step in reversed(range(self.rewards.size(0))):
60 | delta = self.rewards[step] + gamma * self.value_preds[step +
61 | 1] * self.masks[step
62 | +
63 | 1] - self.value_preds[step]
64 | gae = delta + gamma * tau * self.masks[step + 1] * gae
65 | self.returns[step] = gae + self.value_preds[step]
66 | else:
67 | self.returns[-1] = next_value
68 | for step in reversed(range(self.rewards.size(0))):
69 | self.returns[step] = self.returns[step + 1] * \
70 | gamma * self.masks[step + 1] + self.rewards[step]
71 |
72 | def feed_forward_generator(self, advantages, num_mini_batch):
73 | num_steps, num_processes = self.rewards.size()[0:2]
74 | batch_size = num_processes * num_steps
75 | assert batch_size >= num_mini_batch, "ppo req batch size to be greater than number of mini batches"
76 | mini_batch_size = batch_size // num_mini_batch
77 | sampler = BatchSampler(
78 | SubsetRandomSampler(range(batch_size)),
79 | mini_batch_size,
80 | drop_last=False)
81 | for indices in sampler:
82 | indices = torch.LongTensor(indices)
83 |
84 | if advantages.is_cuda:
85 | indices = indices.cuda()
86 |
87 | observations_batch = self.observations[:-1].view(
88 | -1,
89 | *self.observations.size()[2:])[indices]
90 | states_batch = self.states[:-1].view(-1,
91 | self.states.size(-1))[indices]
92 | actions_batch = self.actions.view(-1,
93 | self.actions.size(-1))[indices]
94 | return_batch = self.returns[:-1].view(-1, 1)[indices]
95 | masks_batch = self.masks[:-1].view(-1, 1)[indices]
96 | old_action_log_probs_batch = self.action_log_probs.view(-1,
97 | 1)[indices]
98 | adv_targ = advantages.view(-1, 1)[indices]
99 |
100 | yield observations_batch, states_batch, actions_batch, \
101 | return_batch, masks_batch, old_action_log_probs_batch, adv_targ
102 |
103 | def recurrent_generator(self, advantages, num_mini_batch):
104 | num_processes = self.rewards.size(1)
105 | num_envs_per_batch = num_processes // num_mini_batch
106 | perm = torch.randperm(num_processes)
107 | for start_ind in range(0, num_processes, num_envs_per_batch):
108 | observations_batch = []
109 | states_batch = []
110 | actions_batch = []
111 | return_batch = []
112 | masks_batch = []
113 | old_action_log_probs_batch = []
114 | adv_targ = []
115 | #pdb.set_trace()
116 | for offset in range(num_envs_per_batch):
117 | ind = perm[start_ind + offset]
118 | observations_batch.append(self.observations[:-1, ind])
119 | states_batch.append(self.states[:-1, ind])
120 | actions_batch.append(self.actions[:, ind])
121 | return_batch.append(self.returns[:-1, ind])
122 | masks_batch.append(self.masks[:-1, ind])
123 | old_action_log_probs_batch.append(
124 | self.action_log_probs[:, ind])
125 | adv_targ.append(advantages[:, ind])
126 | #pdb.set_trace()
127 | observations_batch = torch.cat(observations_batch, 0)
128 | states_batch = torch.cat(states_batch, 0)
129 | actions_batch = torch.cat(actions_batch, 0)
130 | return_batch = torch.cat(return_batch, 0)
131 | masks_batch = torch.cat(masks_batch, 0)
132 | old_action_log_probs_batch = torch.cat(old_action_log_probs_batch,
133 | 0)
134 | adv_targ = torch.cat(adv_targ, 0)
135 |
136 | yield observations_batch, states_batch, actions_batch, \
137 | return_batch, masks_batch, old_action_log_probs_batch, adv_targ
138 |
--------------------------------------------------------------------------------
/ADMCode/ddm.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | import pandas as pd
4 | import numba as nb
5 | from numba import jit
6 | from numpy.random import random_sample
7 | from numba import float64, int64, vectorize, boolean
8 |
9 |
10 | def sim_ddm_trials(parameters, ntrials=500, deadline=1.5):
11 | """ main user interface function for simulating multiple trials
12 | with the DDM (wrapper for numba compiled _sim_ddm_trials_ func)
13 |
14 | :: Arguments ::
15 | parameters: 1d array (Nparams) of DDM parameters
16 | a: boundary height
17 | tr: non-decision time
18 | v: drift-rate
19 | z: starting point (frac of a; 0.0 < z < 1.0)
20 | si: diffusion constant (sigma param in DDM equation from lecture)
21 | dx: step-size of evidence
22 | dt: time step
23 | ntrials : (int) number of trials to simulate
24 | deadline (max time for accumualtion in seconds)
25 |
26 | :: Returns ::
27 | data (pd.DataFrame): pandas DF with rt and choice data
28 | traces (np.ndarray): 2d array (Ntrials x Ntime) of evidence traces
29 | """
30 |
31 | # generate storage objects for data/traces
32 | data, rProb, traces = gen_ddm_storage_objects(parameters, ntrials, deadline)
33 |
34 | # simulate ntrials w/ DDM and fill data & traces array
35 | _sim_ddm_trials_(parameters, data, rProb, traces)
36 |
37 | # filter data/traces and return as pd.DataFrame
38 | df, traces = clean_output(data, traces, deadline=deadline)
39 | return df, traces
40 |
41 |
42 |
43 | @jit(nb.typeof((1.0, 1.0))(float64[:], float64[:], float64[:]), nopython=True)
44 | def sim_ddm(parameters, rProb, trace):
45 | """ single trial simulation of the DDM (discrete time / random walk)
46 |
47 | ::Arguments::
48 | parameters: 1d array (Nparams) of DDM parameters
49 | rProb: 1d array (Ntimesteps) of random floats between 0 and 1
50 | trace: 1d array (Ntimesteps) for storing the evidence trace
51 |
52 | ::Returns::
53 | RT (float): the time that evidence crossed one of the boundaries
54 | choice: 1 if evidence terminated at upper bound, 0 if lower bound
55 | """
56 |
57 | # extract parameters
58 | a, tr, v, z, si, dx, dt = parameters
59 |
60 | # convert drift-rate into a probability,
61 | # & scale by sigma (si) and timestep (dt)
62 | # if v > 0, then 0.5 < vProb < 1.0
63 | # if v < 0, then 0.0 < vProb < 0.5
64 | vProb = .5 * (1 + (v * np.sqrt(dt))/si)
65 |
66 | # define starting point with respect to boundary height
67 | zStart = z * a
68 |
69 | #initialize evidence variable at zStart
70 | evidence = zStart
71 | trace[0] = evidence
72 |
73 | # define deadline (max time allowed for accumulation)
74 | deadline = trace.size
75 |
76 | for nsteps in range(1, deadline):
77 | # sample a random probability (r) and compare w/ vProb
78 | if rProb[nsteps] < vProb:
79 | # if r < vProb, step evidence up (towards a)
80 | evidence += dx
81 | else:
82 | # if r > vProb, step evidence down (towards 0)
83 | evidence -= dx
84 | # store new value of evidence at current timestep
85 | trace[nsteps] = evidence
86 |
87 | # check if new value of evidence crossed bound
88 | if evidence >= a:
89 | # calculate RT (in milliseconds)
90 | rt = tr + (nsteps * dt)
91 | # set choice to 1.0 (upper bound)
92 | choice = 1.0
93 |
94 | # terminate simulation, return rt & choice
95 | return rt, choice
96 |
97 | elif evidence <= 0:
98 | # calculate RT (in milliseconds)
99 | rt = tr + (nsteps * dt)
100 | # set choice to 0.0 (lower bound)
101 | choice = 0.0
102 |
103 | # terminate simulation, return rt & choice
104 | return rt, choice
105 |
106 | # return -1.0 for rt and choice so we can filter out
107 | # trials where evidence never crossed 0 or a
108 | return -1.0, -1.0
109 |
110 |
111 |
112 | @jit((float64[:], float64[:,:], float64[:,:], float64[:,:]), nopython=True)
113 | def _sim_ddm_trials_(parameters, data, rProb, traces):
114 | """ called by sim_ddm_trials() func to speed up trial iteraion
115 | """
116 | ntrials = data.shape[0]
117 | for t in range(ntrials):
118 | data[t, :] = sim_ddm(parameters, rProb[t], traces[t])
119 |
120 |
121 | def gen_ddm_storage_objects(parameters, ntrials=200, deadline=1.5):
122 | """ create pandas dataframes from data (numpy array)
123 | and filter data/traces to remove failed decision trials
124 | ::Arguments::
125 | parameters (array): 1d array (Nparams) of DDM parameters
126 | ntrials : (int) number of trials to simulate
127 | deadline (float): (max time for accumualtion in seconds)
128 |
129 | ::Returns::
130 | data (ndarray): ndarray with rt and choice data
131 | rProb (ndarray): 2d array (Ntrials x Ntimesteps) w. random floats (0-1)
132 | traces (ndarray): 2d array (Ntrials x Ntime) of evidence traces
133 | """
134 | dt = parameters[-1]
135 | ntime = np.int(np.floor(deadline / dt))
136 |
137 | # empty matrix Ntrials x 2 (cols for RT & Choice)
138 | data = np.zeros((ntrials, 2))
139 | # 1d array (Ntimesteps) of random floats between 0 and 1
140 | rProb = random_sample((ntrials, ntime))
141 | # 1d array (Ntimesteps) for storing evidence traces
142 | traces = np.zeros_like(rProb)
143 | return data, rProb, traces
144 |
145 |
146 | def clean_output(data, traces, deadline=1.2, stimulus=None):
147 | """ create pandas dataframes from data (numpy array)
148 | and filter data/traces to remove failed decision trials
149 | ::Arguments::
150 | data (ndarray): ndarray with rt and choice data
151 | traces (ndarray): 2d array (Ntrials x Ntime) of evidence traces
152 | ::Returns::
153 | data (pd.DataFrame): pandas DF with rt and choice data
154 | traces (ndarray): 2d array (Ntrials x Ntime) filtered traces
155 | """
156 | # store RT/choice matrix in a pandas dataframe (DF)
157 | df = pd.DataFrame(data, columns=['rt', 'choice'])
158 |
159 | # add a column for trial number
160 | df.insert(0, 'trial', np.arange(1, 1+df.shape[0]))
161 |
162 | # remove trials with no boundary crossing
163 | df = df[(df.rt>0)&(df.rt1:
18 | feedback = feedback.append([feedback]*(nblocks-1)).reset_index()
19 |
20 | feedback.rename(columns={'index':'t'}, inplace=True)
21 | self.feedback = feedback
22 |
23 | self.names = np.sort(self.feedback.columns.values)
24 | self.ntrials=self.feedback.shape[0]
25 |
26 | self.choices, self.all_traces = [], []
27 | self.rts={k:[] for k in self.names}
28 |
29 | self.qdict={k:[0] for k in self.names}
30 | self.choice_prob={k:[1/self.names.size] for k in self.names}
31 |
32 |
33 | def get_feedback(self, trial, action_ix):
34 |
35 | choice_name = self.names[action_ix]
36 | return self.feedback.loc[trial, choice_name]
37 |
38 |
39 | def play_IGT(p, feedback, alphaGo=.1, alphaNo=.1, beta=.2, nblocks=2, singleProcess=True):
40 | """
41 | ::Arguments::
42 | p (dict): parameter dictionary for accumulator
43 | feedback (dataframe): IGT card deck values
44 | alphaGo (float): learning rate for vd (direct pathway)
45 | alphaNo (float): learning rate for vi (indirect pathway)
46 | beta (float): inverse temperature parameter
47 | nblocks (int): number of IGT blocks to simulate
48 | singleProcess (bool): if true simulate accumulator with v = v_d - v_i
49 |
50 | ::Returns::
51 | qpDF, trialwise Q/P values for each IGT deck
52 | agentDF, trialwise choice, response time, and drift-rates (vd, vi)
53 | """
54 |
55 | names = np.sort(feedback.columns.values)
56 | nact = len(names)
57 | actions = np.arange(nact)
58 | IGT = IowaGamblingTask(feedback, nblocks=nblocks)
59 | ntrials=IGT.feedback.shape[0]
60 |
61 | Qmatrix = np.ones((ntrials, nact))*.05
62 | Pmatrix=np.zeros_like(Qmatrix)
63 | Qvalues = Qmatrix[0, :]
64 | Pvalues = np.array([1/nact]*nact)
65 |
66 | agent = np.zeros((ntrials, 3 + nact*3))
67 | agent[0, 3:] = np.hstack([p['vd'], p['vi'], p['vd']-p['vi']])
68 |
69 | #driftRates = np.zeros(ntrials, )
70 | for t in range(ntrials):
71 | # select bandit arm (action)
72 | act_i, rt, rt_i = simulate_multirace(p, singleProcess=singleProcess)
73 | agent[t, :3] = act_i, rt, rt_i
74 |
75 | # observe feedback
76 | r = IGT.get_feedback(t, act_i)
77 |
78 | # get expected value
79 | Qexpected = Qvalues[act_i]
80 |
81 | # get prediction error
82 | RPE = r - Qexpected
83 |
84 | # get alpha for Q-value update
85 | alpha = alphaGo
86 | if RPE<0:
87 | alpha = alphaNo
88 |
89 | # update expected value and store in Qvalues array
90 | # update v_d or v_i (depending on RPE sign)
91 | Qvalues[act_i] = update_Qi(Qexpected, RPE, alpha=alpha)
92 |
93 | # update action selection probabilities
94 | Pvalues = update_Pall(Qvalues, beta)
95 |
96 | # store new values in output matrices
97 | Qmatrix[t, :] = Qvalues
98 | Pmatrix[t, :] = Pvalues
99 |
100 | # re-scale drift-rates by change in Softmax probability
101 | deltaP = Pmatrix[t] - Pmatrix[t-1]
102 | p = update_drift(p, deltaP, alphaGo, alphaNo)
103 | agent[t, 3:] = np.hstack([p['vd'], p['vi'], p['vd']-p['vi']])
104 |
105 | return make_output_df(Qmatrix, Pmatrix, agent)
106 |
107 |
108 | def temporal_dynamics(p, t):
109 | return np.cosh(p['xb'] * t)
110 |
111 |
112 | def simulate_multirace(p, dt=.001, si=.1, tb=1.5, singleProcess=False):
113 |
114 | temporal_dynamics = lambda p, t: np.cosh(p['xb'] * t)
115 |
116 | nresp = p['vd'].size
117 | dx = si * np.sqrt(dt)
118 |
119 | nTime = np.ceil((tb-p['tr'])/dt).astype(int)
120 | xtb = temporal_dynamics(p, np.cumsum([dt]*nTime))
121 |
122 | if singleProcess:
123 | Pdelta = .5 * (1 + ((p['vd']-p['vi']) * np.sqrt(dt))/si)
124 | execution = xtb * np.cumsum(np.where((rs((nresp, nTime)).T < Pdelta), dx, -dx).T, axis=1)
125 | else:
126 | Pd = .5 * (1 + (p['vd'] * np.sqrt(dt))/si)
127 | Pi = .5 * (1 + (p['vi'] * np.sqrt(dt))/si)
128 | direct = xtb * np.where((rs((nresp, nTime)).T < Pd),dx,-dx).T
129 | indirect = np.where((rs((nresp, nTime)).T < Pi),dx,-dx).T
130 | execution = np.cumsum(direct-indirect, axis=1)
131 |
132 | act_ix, rt, rt_ix = analyze_multiresponse(execution, p, dt=dt)
133 | return act_ix, rt, rt_ix
134 |
135 |
136 | def analyze_multiresponse(execution, p, dt=.001):
137 | """analyze multi-race execution processes
138 | """
139 | nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1)
140 | rts = p['tr'] + nsteps_to_rt*dt
141 |
142 | # set non responses to 999
143 | rts[rts==p['tr']]=999
144 |
145 | # get accumulator with fastest RT (winner) in each cond
146 | act_ix = np.argmin(rts)
147 | rt = rts[act_ix]
148 | rt_ix = np.ceil((rt-p['tr'])/dt).astype(int)
149 |
150 | return act_ix, rt, rt_ix
151 |
152 |
153 | def update_drift(p, delta_prob, alphaGo=.3, alphaNo=.3):
154 | """ update direct & indirect drift-rates for all IGT actions
155 | """
156 | vd_exp = p['vd']
157 | vi_exp = p['vi']
158 | p['vd'] = vd_exp + (alphaGo * delta_prob)
159 | p['vi'] = vi_exp + (alphaNo * -delta_prob)
160 | return p
161 |
162 |
163 | def update_Qi(Qval, RPE, alpha=.3):
164 | """ update q-value of selected action, given RPE and alpha
165 | """
166 | QUpdate = Qval + alpha*RPE
167 | return QUpdate
168 |
169 |
170 | def update_Pall(Qvector, beta):
171 | """ update vector of action selection probabilities given
172 | associated q-values
173 | """
174 | return np.array([np.exp(beta*Q_i) / np.sum(np.exp(beta * Qvector)) for Q_i in Qvector])
175 |
176 |
177 | def make_output_df(Qmatrix, Pmatrix, agent):
178 | """ generate output dataframe with trialwise Q and P measures
179 | for each "card" in IGT, as well as choice selection, rt, & vd, vi (drift-rates)
180 | ::Arguments::
181 | Qmatrix (ndarray): q-value array with dims [Ntrials x Ncards]
182 | Pmatrix (ndarray): softmax prob array with dims [Ntrials x Ncards]
183 | agent (ndarray): array with behavior and agent vd and vi (drift rates)
184 | ::Returns::
185 | df (DataFrame): pandas df containing Q and SoftmaxP values for each card
186 | agentdf (DataFrame): DF of agent (with non-response trials removed)
187 | """
188 | actions = np.arange(Qmatrix.shape[1])
189 | df = pd.concat([pd.DataFrame(dat) for dat in [Qmatrix, Pmatrix]], axis=1)
190 | cols = [['{}{}'.format(x,c) for c in actions] for x in ['q', 'p']]
191 | df.columns = np.hstack(cols)
192 | df.insert(0, 'trial', np.arange(1, df.shape[0]+1))
193 | vdCols = ['vd{}'.format(i) for i in range(actions.size)]
194 | viCols = ['vi{}'.format(i) for i in range(actions.size)]
195 | vDeltaCols = ['vDelta{}'.format(i) for i in range(actions.size)]
196 | agentdf = pd.DataFrame(agent, columns=['choice', 'rt', 'rt_i']+vdCols+viCols+vDeltaCols)
197 | RT_ix = agentdf.rt[agentdf.rt>1.5].index.values
198 | agentdf.iloc[RT_ix, :] = np.nan
199 | agentdf = agentdf.dropna()
200 | return df, agentdf
201 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ppo/models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | # ----------------------------------------------------------------------------
6 | # Code from
7 | # https://github.com/reinforcement-learning-kr/pg_travel/blob/master/unity/model.py
8 |
9 |
10 | class Actor3Linear(nn.Module):
11 | """Three layer MLP."""
12 |
13 | def __init__(self, num_inputs, num_outputs, hp, max_std=1):
14 | self.num_inputs = num_inputs
15 | self.num_outputs = num_outputs
16 | self.max_std = max_std
17 |
18 | super(Actor3Linear, self).__init__()
19 |
20 | # Latent
21 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1)
22 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2)
23 |
24 | # Note: fc3 and fc4 are in parallel!
25 | # Mu
26 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs)
27 | self.fc3.weight.data.mul_(0.1)
28 | self.fc3.bias.data.mul_(0.0)
29 | # Sigma
30 | self.fc4 = nn.Linear(hp.actor_hidden2, num_outputs)
31 | self.fc4.weight.data.mul_(0.1)
32 | self.fc4.bias.data.mul_(0.0)
33 |
34 | def forward(self, x):
35 | x = self.fc1(x)
36 | x = self.fc2(x)
37 | mu = self.fc3(x)
38 |
39 | std = torch.exp(self.fc4(x))
40 | std = torch.clamp(std, 0, self.max_std)
41 | logstd = torch.log(std)
42 |
43 | return mu, std, logstd
44 |
45 |
46 | class Actor3Sigma(nn.Module):
47 | """Three layer MLP."""
48 |
49 | def __init__(self, num_inputs, num_outputs, hp, max_std=1):
50 | self.num_inputs = num_inputs
51 | self.num_outputs = num_outputs
52 | self.max_std = max_std
53 |
54 | super(Actor3Sigma, self).__init__()
55 |
56 | # Latent
57 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1)
58 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2)
59 |
60 | # Note: fc3 and fc4 are in parallel!
61 | # Mu
62 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs)
63 | self.fc3.weight.data.mul_(0.1)
64 | self.fc3.bias.data.mul_(0.0)
65 | # Sigma
66 | self.fc4 = nn.Linear(hp.actor_hidden2, num_outputs)
67 | self.fc4.weight.data.mul_(0.1)
68 | self.fc4.bias.data.mul_(0.0)
69 |
70 | def forward(self, x):
71 | x = F.tanh(self.fc1(x))
72 | x = F.tanh(self.fc2(x))
73 | mu = self.fc3(x)
74 |
75 | std = torch.exp(self.fc4(x))
76 | std = torch.clamp(std, 0, self.max_std)
77 | logstd = torch.log(std)
78 |
79 | return mu, std, logstd
80 |
81 |
82 | class Actor3(nn.Module):
83 | """Three layer MLP."""
84 |
85 | def __init__(self, num_inputs, num_outputs, hp):
86 | self.num_inputs = num_inputs
87 | self.num_outputs = num_outputs
88 |
89 | super(Actor3, self).__init__()
90 |
91 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1)
92 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2)
93 | self.fc3 = nn.Linear(hp.actor_hidden2, num_outputs)
94 | self.fc3.weight.data.mul_(0.1)
95 | self.fc3.bias.data.mul_(0.0)
96 |
97 | def forward(self, x):
98 | x = F.tanh(self.fc1(x))
99 | x = F.tanh(self.fc2(x))
100 | mu = self.fc3(x)
101 | logstd = torch.zeros_like(mu)
102 | std = torch.exp(logstd)
103 | return mu, std, logstd
104 |
105 |
106 | class Actor4(nn.Module):
107 | """Four layer MLP."""
108 |
109 | def __init__(self, num_inputs, num_outputs, hp):
110 | self.num_inputs = num_inputs
111 | self.num_outputs = num_outputs
112 | super(Actor4, self).__init__()
113 | self.fc1 = nn.Linear(num_inputs, hp.actor_hidden1)
114 | self.fc2 = nn.Linear(hp.actor_hidden1, hp.actor_hidden2)
115 | self.fc3 = nn.Linear(hp.actor_hidden2, hp.actor_hidden3)
116 | self.fc4 = nn.Linear(hp.actor_hidden4, num_outputs)
117 |
118 | self.fc4.weight.data.mul_(0.1)
119 | self.fc4.bias.data.mul_(0.0)
120 |
121 | def forward(self, x):
122 | if self.args.activation == 'tanh':
123 | x = F.tanh(self.fc1(x))
124 | x = F.tanh(self.fc2(x))
125 | x = F.tanh(self.fc3(x))
126 | mu = self.fc4(x)
127 | elif self.args.activation == 'swish':
128 | x = self.fc1(x)
129 | x = x * F.sigmoid(x)
130 | x = self.fc2(x)
131 | x = x * F.sigmoid(x)
132 | x = self.fc3(x)
133 | x = x * F.sigmoid(x)
134 | mu = self.fc4(x)
135 | else:
136 | raise ValueError
137 |
138 | logstd = torch.zeros_like(mu)
139 | std = torch.exp(logstd)
140 | return mu, std, logstd
141 |
142 |
143 | class Critic3Linear(nn.Module):
144 | def __init__(self, num_inputs, hp):
145 | super(Critic3Linear, self).__init__()
146 | self.fc1 = nn.Linear(num_inputs, hp.critic_hidden1)
147 | self.fc2 = nn.Linear(hp.critic_hidden1, hp.critic_hidden1)
148 | self.fc3 = nn.Linear(hp.critic_hidden1, 1)
149 | self.fc3.weight.data.mul_(0.1)
150 | self.fc3.bias.data.mul_(0.0)
151 |
152 | def forward(self, x):
153 | x = self.fc1(x)
154 | x = self.fc2(x)
155 | v = self.fc3(x)
156 | return v
157 |
158 |
159 | class Critic3(nn.Module):
160 | def __init__(self, num_inputs, hp):
161 | super(Critic3, self).__init__()
162 | self.fc1 = nn.Linear(num_inputs, hp.critic_hidden1)
163 | self.fc2 = nn.Linear(hp.critic_hidden1, hp.critic_hidden1)
164 | self.fc3 = nn.Linear(hp.critic_hidden1, 1)
165 | self.fc3.weight.data.mul_(0.1)
166 | self.fc3.bias.data.mul_(0.0)
167 |
168 | def forward(self, x):
169 | x = F.tanh(self.fc1(x))
170 | x = F.tanh(self.fc2(x))
171 | v = self.fc3(x)
172 | return v
173 |
174 |
175 | # ----------------------------------------------------------------------------
176 | # Other models (experimental)
177 | class ActorSigma1(nn.Module):
178 | """A N(mu, sigma) parameterized policy model.
179 |
180 | Note: sigma is learnable; this implementation is shallow."""
181 |
182 | def __init__(self,
183 | in_channels,
184 | action_space,
185 | num_hidden1=128,
186 | gain=1,
187 | sigma=None):
188 | super(ActorSigma1, self).__init__()
189 | self.gain = gain
190 |
191 | # Est sigma?
192 | if sigma is not None:
193 | self.sigma0 = torch.tensor(sigma)
194 | else:
195 | self.sigma0 = sigma
196 |
197 | # Def number of outputs, per param (mu, sigma)
198 | num_outputs = action_space.shape[0]
199 | self.action_space = action_space
200 |
201 | # Def the network
202 | # Shared intial
203 | self.fc1 = nn.Linear(in_channels, num_hidden1)
204 |
205 | # Mu
206 | self.fc_mu = nn.Linear(num_hidden1, num_outputs)
207 | self.fc_mu.bias.data.zero_()
208 |
209 | # Sigma?
210 | if self.sigma0 is None:
211 | self.fc_sigma = nn.Linear(num_hidden1, num_outputs)
212 | self.fc_sigma.bias.data.zero_()
213 |
214 | def forward(self, x):
215 | # Shared nonlin. projection
216 | x = F.softmax(self.fc1(x))
217 |
218 | # Linear mu
219 | mu = self.fc_mu(x)
220 |
221 | # Exp. sigma
222 | if self.sigma0 is None:
223 | sigma = torch.exp(self.fc_sigma(x) - self.gain)
224 | # print(sigma)
225 | else:
226 | sigma = self.sigma0
227 |
228 | return mu, sigma
229 |
230 |
231 | class DiscreteMLPPolicy(nn.Module):
232 | """A discrete-action policy model."""
233 |
234 | def __init__(self, in_channels, num_action=2, num_hidden1=128):
235 | super(DiscreteMLPPolicy, self).__init__()
236 | self.affine1 = nn.Linear(in_channels, num_hidden1)
237 | self.affine2 = nn.Linear(num_hidden1, num_action)
238 |
239 | def forward(self, x):
240 | x = F.relu(self.affine1(x))
241 |
--------------------------------------------------------------------------------
/notebooks/Lab 5 - SNUZ.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# SNUZ - white noise to drive _and_ relax you!\n",
8 | "\n",
9 | "> Arrive at your destination safely, comfortably, and well rested. We combine state-of-the-art methods in random search to get you safely to your destination. Using random methods lets us generate efficient routes, and high quality (mandatory) white noise for your journey -- across the town or across the country!\n",
10 | "\n",
11 | "In this experiment an autonomous car will learn to drive up a hill. We'll compare random search ([ARS](https://arxiv.org/abs/1803.07055)) to Proximal Policy Optimization ([PPO](https://blog.openai.com/openai-baselines-ppo/)).\n",
12 | "\n",
13 | "# Aims\n",
14 | "1. Install pytorch, et al\n",
15 | "2. Answer the question: does random search do better than a state of the 'cart' RL method in ...one of the simplest continuous control tasks?\n",
16 | "3. _Acquirehire_.\n",
17 | "\n",
18 | "\n",
19 | "# Install\n",
20 | "Before doing anything else, we need to install some libraries.\n",
21 | "\n",
22 | "From the command line, run:\n",
23 | "\n",
24 | "`pip install gym`\n",
25 | "\n",
26 | "`pip install ray`\n",
27 | "\n",
28 | "`pip install opencv-python`\n",
29 | " \n",
30 | "Then for your OS, do:\n",
31 | "\n",
32 | "## Mac\n",
33 | "`conda install pytorch torchvision -c pytorch`\n",
34 | "## Linux\n",
35 | "`conda install pytorch torchvision -c pytorch`\n",
36 | "## Windows\n",
37 | "`conda install pytorch -c pytorch`\n",
38 | "\n",
39 | "`pip3 install torchvision`"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "from ADMCode import visualize as vis\n",
49 | "from ADMCode.snuz import run_ppo\n",
50 | "from ADMCode.snuz import run_ars\n",
51 | "import numpy as np\n",
52 | "import pandas as pd\n",
53 | "\n",
54 | "import matplotlib.pyplot as plt\n",
55 | "import seaborn as sns\n",
56 | "import warnings\n",
57 | "\n",
58 | "warnings.simplefilter('ignore', np.RankWarning)\n",
59 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n",
60 | "warnings.filterwarnings(\"ignore\")\n",
61 | "sns.set(style='white', font_scale=1.3)\n",
62 | "\n",
63 | "%matplotlib inline\n",
64 | "%config InlineBackend.figure_format = 'png'\n",
65 | "%config InlineBackend.savefig.dpi = 150"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "# Task\n",
73 | "\n",
74 | "We're going to teaching a car to drive up a hill! This is the `MountainCarContinuous-v0` from the OpenAI [gym].(https://gym.openai.com)\n",
75 | "\n",
76 | ""
77 | ]
78 | },
79 | {
80 | "cell_type": "markdown",
81 | "metadata": {},
82 | "source": [
83 | "# Vrooooom!\n",
84 | "Let's get driving, uphill! First let's try PPO.\n",
85 | "\n",
86 | "\n",
87 | "## PPO\n",
88 | "\n",
89 | "The default hyperparameters are:\n",
90 | "\n",
91 | " gamma = 0.99 # Try me?\n",
92 | " lam = 0.98 # Try me?\n",
93 | " actor_hidden1 = 64 # Try me?\n",
94 | " actor_hidden2 = 64 # Try me?\n",
95 | " actor_hidden3 = 64 # Try me?\n",
96 | " critic_hidden1 = 64 # Try me?\n",
97 | " critic_lr = 0.0003 # Try me? (small changes)\n",
98 | " actor_lr = 0.0003 # Try me? (small changes)\n",
99 | " batch_size = 64 # Leave me be\n",
100 | " l2_rate = 0.001 # Leave me be\n",
101 | " clip_param = 0.2 # Leave me be\n",
102 | " num_training_epochs = 10 # Try me?\n",
103 | " num_episodes = 10 # Try me?\n",
104 | " num_memories = 24 # Try me?\n",
105 | " num_training_epochs = 10 # Try me?\n",
106 | " clip_actions = True # Leave me be\n",
107 | " clip_std = 1.0 # Leave me be\n",
108 | " seed_value = None # Try me (with int only)\n",
109 | " \n",
110 | "Parameters can be changed by passing to `run_ppo`. For example `run_ppo(num_episodes=20, actor_lr=0.0006`) doubles the train time and the learning rate of the PPO."
111 | ]
112 | },
113 | {
114 | "cell_type": "code",
115 | "execution_count": 6,
116 | "metadata": {},
117 | "outputs": [
118 | {
119 | "ename": "NameError",
120 | "evalue": "name 'run_ppo' is not defined",
121 | "output_type": "error",
122 | "traceback": [
123 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
124 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
125 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mepisodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_ppo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_episodes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
126 | "\u001b[0;31mNameError\u001b[0m: name 'run_ppo' is not defined"
127 | ]
128 | }
129 | ],
130 | "source": [
131 | "episodes, scores = run_ppo(render=True, num_episodes=10)"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "Plot the average reward / episode."
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "plt.plot(episodes, scores)\n",
148 | "plt.xlabel(\"Episode\")\n",
149 | "plt.xlabel(\"Reward\")"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "Compare, say, 10 episodes of PPO to 10 of...\n",
157 | "\n",
158 | "\n",
159 | "## ARS\n",
160 | "\n",
161 | "The [ARS](https://arxiv.org/abs/1803.07055) code was modified from Recht's [original source](https://github.com/modestyachts/ARS). \n",
162 | "\n",
163 | "\n",
164 | "The default hyperparameters are:\n",
165 | "\n",
166 | " num_episodes = 10 # Try me?\n",
167 | " n_directions = 8 # Try me?\n",
168 | " deltas_used = 8 # Try me?\n",
169 | " step_size = 0.02 # Try me?\n",
170 | " delta_std = 0.03 # Try me?\n",
171 | " n_workers = 1 # Leave me be\n",
172 | " rollout_length = 240 # Try me?\n",
173 | " shift = 0 # Leave me be (all below)\n",
174 | " seed = 237\n",
175 | " policy_type = 'linear'\n",
176 | " dir_path = 'data'\n",
177 | " filter = 'MeanStdFilter' # Leave me be\n",
178 | " \n",
179 | " _Note_: Due to the way the backend of ARS works (it uses a [ray](https://ray.readthedocs.io/en/latest/), a dist. job system) we can't render exps here. Sorry. :("
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "episodes, scores = run_ars(num_episodes=10)"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": null,
194 | "metadata": {},
195 | "outputs": [],
196 | "source": [
197 | "plt.plot(episodes, scores)\n",
198 | "plt.xlabel(\"Episode\")\n",
199 | "plt.xlabel(\"Reward\")"
200 | ]
201 | }
202 | ],
203 | "metadata": {
204 | "kernelspec": {
205 | "display_name": "Python 3",
206 | "language": "python",
207 | "name": "python3"
208 | },
209 | "language_info": {
210 | "codemirror_mode": {
211 | "name": "ipython",
212 | "version": 3
213 | },
214 | "file_extension": ".py",
215 | "mimetype": "text/x-python",
216 | "name": "python",
217 | "nbconvert_exporter": "python",
218 | "pygments_lexer": "ipython3",
219 | "version": "3.7.0"
220 | }
221 | },
222 | "nbformat": 4,
223 | "nbformat_minor": 2
224 | }
225 |
--------------------------------------------------------------------------------
/ADMCode/qlearn.py:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/env python
2 | from __future__ import division
3 | import numpy as np
4 | from numpy import array
5 | from numpy.random import sample as rs
6 | from numpy import newaxis as na
7 | import pandas as pd
8 | from scipy.stats import sem
9 | import seaborn as sns
10 | import string
11 | import matplotlib.pyplot as plt
12 |
13 |
14 |
15 | def update_Qi(Qval, reward, alpha):
16 | """ update q-value of selected action, given reward and alpha
17 | """
18 | return Qval + alpha * (reward - Qval)
19 |
20 |
21 | def update_Pall(Qvector, beta):
22 | """ update vector of action selection probabilities given
23 | associated q-values
24 | """
25 | return np.array([np.exp(beta*Q_i) / np.sum(np.exp(beta * Qvector)) for Q_i in Qvector])
26 |
27 |
28 |
29 | class IowaGamblingTask(object):
30 | """ defines a multi-armed bandit task
31 |
32 | ::Arguments::
33 | preward (list): 1xN vector of reward probaiblities for each of N bandits
34 | rvalues (list): 1xN vector of payout values for each of N bandits
35 | """
36 | def __init__(self, feedback, nblocks=1):
37 |
38 | if nblocks>1:
39 | feedback = feedback.append([feedback]*(nblocks-1)).reset_index()
40 |
41 | feedback.rename(columns={'index':'t'}, inplace=True)
42 | self.feedback = feedback
43 |
44 | self.names = np.sort(self.feedback.columns.values)
45 | self.ntrials=self.feedback.shape[0]
46 |
47 | self.choices, self.all_traces = [], []
48 | self.rts={k:[] for k in self.names}
49 |
50 | self.qdict={k:[0] for k in self.names}
51 | self.choice_prob={k:[1/self.names.size] for k in self.names}
52 |
53 |
54 | def get_feedback(self, trial, action_ix):
55 |
56 | choice_name = self.names[action_ix]
57 | return self.feedback.loc[trial, choice_name]
58 |
59 | #new_col = self.feedback[choice_name].shift(-1)
60 | #new_col.set_value(new_col.index[-1], oldval)
61 | #self.feedback.loc[:, choice_name] = new_col
62 | #return self.feedback.loc[:, choice_name] = new_col
63 |
64 |
65 |
66 |
67 | class MultiArmedBandit(object):
68 | """ defines a multi-armed bandit task
69 |
70 | ::Arguments::
71 | preward (list): 1xN vector of reward probaiblities for each of N bandits
72 | rvalues (list): 1xN vector of payout values for each of N bandits
73 | """
74 | def __init__(self, preward=[.9, .8, .7], rvalues=[1, 1, 1]):
75 | self.preward = preward
76 | self.rvalues = rvalues
77 | try:
78 | assert(len(self.rvalues)==len(self.preward))
79 | except AssertionError:
80 | self.rvalues = np.ones(len(self.preward))
81 |
82 | def set_params(self, **kwargs):
83 | error_msg = """preward and rvalues must be same size
84 | setting all rvalues to 1"""
85 | kw_keys = list(kwargs)
86 | if 'preward' in kw_keys:
87 | self.preward = kwargs['preward']
88 | if 'rvalues' not in kw_keys:
89 | try:
90 | assert(len(self.rvalues)==len(self.preward))
91 | except AssertionError:
92 | self.rvalues = np.ones(len(self.preward))
93 |
94 | if 'rvalues' in kw_keys:
95 | self.rvalues = kwargs['rvalues']
96 | try:
97 | assert(len(self.rvalues)==len(self.preward))
98 | except AssertionError:
99 | raise(AssertionError, error_msg)
100 |
101 |
102 | def get_feedback(self, action_ix):
103 | pOutcomes = np.array([self.preward[action_ix], 1-self.preward[action_ix]])
104 | Outcomes = np.array([self.rvalues[action_ix], 0])
105 | feedback = np.random.choice(Outcomes, p=pOutcomes)
106 | return feedback
107 |
108 |
109 |
110 |
111 | class Qagent(object):
112 | """ defines the learning parameters of single q-learning agent
113 | in a multi-armed bandit task
114 |
115 | ::Arguments::
116 | alpha (float): learning rate
117 | beta (float): inverse temperature parameter
118 | preward (list): 1xN vector of reward probaiblities for each of N bandits
119 | rvalues (list): 1xN vector of payout values for each of N bandits
120 | IF rvalues is None, all values set to 1
121 |
122 | """
123 | def __init__(self, alpha=.04, beta=3.5, epsilon=.1, preward=[.9, .8, .7], rvalues=None):
124 | if rvalues is None:
125 | rvalues = np.ones(len(preward))
126 | self.bandits = MultiArmedBandit(preward=preward, rvalues=rvalues)
127 | self.updateQ = lambda Qval, r, alpha: Qval + alpha*(r - Qval)
128 | self.updateP = lambda Qvector, act_i, beta: np.exp(beta*Qvector[act_i])/np.sum(np.exp(beta*Qvector))
129 | self.set_params(alpha=alpha, beta=beta, epsilon=epsilon)
130 |
131 |
132 | def set_params(self, **kwargs):
133 | """ update learning rate, inv. temperature, and/or
134 | epsilon parameters of q-learning agent
135 | """
136 |
137 | kw_keys = list(kwargs)
138 |
139 | if 'alpha' in kw_keys:
140 | self.alpha = kwargs['alpha']
141 |
142 | if 'beta' in kw_keys:
143 | self.beta = kwargs['beta']
144 |
145 | if 'epsilon' in kw_keys:
146 | self.epsilon = kwargs['epsilon']
147 |
148 | if 'preward' in kw_keys:
149 | self.bandits.set_params(preward=kwargs['preward'])
150 |
151 | if 'rvalues' in kw_keys:
152 | self.bandits.set_params(rvalues=kwargs['rvalues'])
153 |
154 | self.nact = len(self.bandits.preward)
155 | self.actions = np.arange(self.nact)
156 |
157 |
158 | def play_bandits(self, ntrials=1000, get_output=True):
159 | """ simulates agent performance on a multi-armed bandit task
160 |
161 | ::Arguments::
162 | ntrials (int): number of trials to play bandits
163 | get_output (bool): returns output DF if True (default)
164 |
165 | ::Returns::
166 | DataFrame (Ntrials x Nbandits) with trialwise Q and P
167 | values for each bandit
168 | """
169 | pdata = np.zeros((ntrials+1, self.nact))
170 | pdata[0, :] = np.array([1/self.nact]*self.nact)
171 | qdata = np.zeros_like(pdata)
172 | self.choices = []
173 | self.feedback = []
174 |
175 | for t in range(ntrials):
176 |
177 | # select bandit arm (action)
178 | act_i = np.random.choice(self.actions, p=pdata[t, :])
179 |
180 | # observe feedback
181 | r = self.bandits.get_feedback(act_i)
182 |
183 | # update value of selected action
184 | qdata[t+1, act_i] = update_Qi(qdata[t, act_i], r, self.alpha)
185 |
186 | # broadcast old q-values for unchosen actions
187 | for act_j in self.actions[np.where(self.actions!=act_i)]:
188 | qdata[t+1, act_j] = qdata[t, act_j]
189 |
190 | # update action selection probabilities and store data
191 | pdata[t+1, :] = update_Pall(qdata[t+1, :], self.beta)
192 | self.choices.append(act_i)
193 | self.feedback.append(r)
194 |
195 | self.pdata = pdata[1:, :]
196 | self.qdata = qdata[1:, :]
197 | self.make_output_df()
198 |
199 | if get_output:
200 | return self.data.copy()
201 |
202 |
203 | def make_output_df(self):
204 | """ generate output dataframe with trialwise Q and P measures for each bandit,
205 | as well as choice selection, and feedback
206 | """
207 | df = pd.concat([pd.DataFrame(dat) for dat in [self.qdata, self.pdata]], axis=1)
208 | columns = np.hstack(([['{}{}'.format(x, c) for c in self.actions] for x in ['q', 'p']]))
209 | df.columns = columns
210 | df.insert(0, 'trial', np.arange(1, df.shape[0]+1))
211 | df['choice'] = self.choices
212 | df['feedback'] = self.feedback
213 | r = np.array(self.bandits.rvalues)
214 | p = np.array(self.bandits.preward)
215 | df['optimal'] = np.where(df['choice']==np.argmax(p * r), 1, 0)
216 | df.insert(0, 'agent', 1)
217 | self.data = df.copy()
218 |
219 |
220 | def simulate_multiple(self, nsims=10, ntrials=1000):
221 | """ simulates multiple identical agents on multi-armed bandit task
222 | """
223 | dflist = []
224 | for i in range(nsims):
225 | data_i = self.play_bandits(ntrials=ntrials, get_output=True)
226 | data_i['agent'] += i
227 | dflist.append(data_i)
228 | return pd.concat(dflist)
229 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/filter.py:
--------------------------------------------------------------------------------
1 | # Code in this file is copied and adapted from
2 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py
3 |
4 |
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import numpy as np
10 |
11 |
12 | class Filter(object):
13 | """Processes input, possibly statefully."""
14 |
15 | def update(self, other, *args, **kwargs):
16 | """Updates self with "new state" from other filter."""
17 | raise NotImplementedError
18 |
19 | def copy(self):
20 | """Creates a new object with same state as self.
21 |
22 | Returns:
23 | copy (Filter): Copy of self"""
24 | raise NotImplementedError
25 |
26 | def sync(self, other):
27 | """Copies all state from other filter to self."""
28 | raise NotImplementedError
29 |
30 |
31 | class NoFilter(Filter):
32 | def __init__(self, *args):
33 | pass
34 |
35 | def __call__(self, x, update=True):
36 | return np.asarray(x, dtype = np.float64)
37 |
38 | def update(self, other, *args, **kwargs):
39 | pass
40 |
41 | def copy(self):
42 | return self
43 |
44 | def sync(self, other):
45 | pass
46 |
47 | def stats_increment(self):
48 | pass
49 |
50 | def clear_buffer(self):
51 | pass
52 |
53 | def get_stats(self):
54 | return 0, 1
55 |
56 | @property
57 | def mean(self):
58 | return 0
59 |
60 | @property
61 | def var(self):
62 | return 1
63 |
64 | @property
65 | def std(self):
66 | return 1
67 |
68 |
69 |
70 | # http://www.johndcook.com/blog/standard_deviation/
71 | class RunningStat(object):
72 |
73 | def __init__(self, shape=None):
74 | self._n = 0
75 | self._M = np.zeros(shape, dtype = np.float64)
76 | self._S = np.zeros(shape, dtype = np.float64)
77 | self._M2 = np.zeros(shape, dtype = np.float64)
78 |
79 | def copy(self):
80 | other = RunningStat()
81 | other._n = self._n
82 | other._M = np.copy(self._M)
83 | other._S = np.copy(self._S)
84 | return other
85 |
86 | def push(self, x):
87 | x = np.asarray(x)
88 | # Unvectorized update of the running statistics.
89 | assert x.shape == self._M.shape, ("x.shape = {}, self.shape = {}"
90 | .format(x.shape, self._M.shape))
91 | n1 = self._n
92 | self._n += 1
93 | if self._n == 1:
94 | self._M[...] = x
95 | else:
96 | delta = x - self._M
97 | deltaM2 = np.square(x) - self._M2
98 | self._M[...] += delta / self._n
99 | self._S[...] += delta * delta * n1 / self._n
100 |
101 |
102 | def update(self, other):
103 | n1 = self._n
104 | n2 = other._n
105 | n = n1 + n2
106 | delta = self._M - other._M
107 | delta2 = delta * delta
108 | M = (n1 * self._M + n2 * other._M) / n
109 | S = self._S + other._S + delta2 * n1 * n2 / n
110 | self._n = n
111 | self._M = M
112 | self._S = S
113 |
114 | def __repr__(self):
115 | return '(n={}, mean_mean={}, mean_std={})'.format(
116 | self.n, np.mean(self.mean), np.mean(self.std))
117 |
118 | @property
119 | def n(self):
120 | return self._n
121 |
122 | @property
123 | def mean(self):
124 | return self._M
125 |
126 | @property
127 | def var(self):
128 | return self._S / (self._n - 1) if self._n > 1 else np.square(self._M)
129 |
130 | @property
131 | def std(self):
132 | return np.sqrt(self.var)
133 |
134 | @property
135 | def shape(self):
136 | return self._M.shape
137 |
138 |
139 | class MeanStdFilter(Filter):
140 | """Keeps track of a running mean for seen states"""
141 |
142 | def __init__(self, shape, demean=True, destd=True):
143 | self.shape = shape
144 | self.demean = demean
145 | self.destd = destd
146 | self.rs = RunningStat(shape)
147 | # In distributed rollouts, each worker sees different states.
148 | # The buffer is used to keep track of deltas amongst all the
149 | # observation filters.
150 |
151 | self.buffer = RunningStat(shape)
152 |
153 | self.mean = np.zeros(shape, dtype = np.float64)
154 | self.std = np.ones(shape, dtype = np.float64)
155 |
156 | def clear_buffer(self):
157 | self.buffer = RunningStat(self.shape)
158 | return
159 |
160 | def update(self, other, copy_buffer=False):
161 | """Takes another filter and only applies the information from the
162 | buffer.
163 |
164 | Using notation `F(state, buffer)`
165 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`,
166 | `update` modifies `Filter1` to `Filter1(x1 + yt, y1)`
167 | If `copy_buffer`, then `Filter1` is modified to
168 | `Filter1(x1 + yt, yt)`.
169 | """
170 | self.rs.update(other.buffer)
171 | if copy_buffer:
172 | self.buffer = other.buffer.copy()
173 | return
174 |
175 | def copy(self):
176 | """Returns a copy of Filter."""
177 | other = MeanStdFilter(self.shape)
178 | other.demean = self.demean
179 | other.destd = self.destd
180 | other.rs = self.rs.copy()
181 | other.buffer = self.buffer.copy()
182 | return other
183 |
184 | def sync(self, other):
185 | """Syncs all fields together from other filter.
186 |
187 | Using notation `F(state, buffer)`
188 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`,
189 | `sync` modifies `Filter1` to `Filter1(x2, yt)`
190 | """
191 | assert other.shape == self.shape, "Shapes don't match!"
192 | self.demean = other.demean
193 | self.destd = other.destd
194 | self.rs = other.rs.copy()
195 | self.buffer = other.buffer.copy()
196 | return
197 |
198 | def __call__(self, x, update=True):
199 | x = np.asarray(x, dtype = np.float64)
200 | if update:
201 | if len(x.shape) == len(self.rs.shape) + 1:
202 | # The vectorized case.
203 | for i in range(x.shape[0]):
204 | self.rs.push(x[i])
205 | self.buffer.push(x[i])
206 | else:
207 | # The unvectorized case.
208 | self.rs.push(x)
209 | self.buffer.push(x)
210 | if self.demean:
211 | x = x - self.mean
212 | if self.destd:
213 | x = x / (self.std + 1e-8)
214 | return x
215 |
216 | def stats_increment(self):
217 | self.mean = self.rs.mean
218 | self.std = self.rs.std
219 |
220 | # Set values for std less than 1e-7 to +inf to avoid
221 | # dividing by zero. State elements with zero variance
222 | # are set to zero as a result.
223 | self.std[self.std < 1e-7] = float("inf")
224 | return
225 |
226 | def get_stats(self):
227 | return self.rs.mean, (self.rs.std + 1e-8)
228 |
229 | def __repr__(self):
230 | return 'MeanStdFilter({}, {}, {}, {}, {}, {})'.format(
231 | self.shape, self.demean,
232 | self.rs, self.buffer)
233 |
234 |
235 | def get_filter(filter_config, shape = None):
236 | if filter_config == "MeanStdFilter":
237 | return MeanStdFilter(shape)
238 | elif filter_config == "NoFilter":
239 | return NoFilter()
240 | else:
241 | raise Exception("Unknown observation_filter: " +
242 | str(filter_config))
243 |
244 |
245 | def test_running_stat():
246 | for shp in ((), (3,), (3, 4)):
247 | li = []
248 | rs = RunningStat(shp)
249 | for _ in range(5):
250 | val = np.random.randn(*shp)
251 | rs.push(val)
252 | li.append(val)
253 | m = np.mean(li, axis=0)
254 | assert np.allclose(rs.mean, m)
255 | v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0)
256 | assert np.allclose(rs.var, v)
257 |
258 |
259 | def test_combining_stat():
260 | for shape in [(), (3,), (3, 4)]:
261 | li = []
262 | rs1 = RunningStat(shape)
263 | rs2 = RunningStat(shape)
264 | rs = RunningStat(shape)
265 | for _ in range(5):
266 | val = np.random.randn(*shape)
267 | rs1.push(val)
268 | rs.push(val)
269 | li.append(val)
270 | for _ in range(9):
271 | rs2.push(val)
272 | rs.push(val)
273 | li.append(val)
274 | rs1.update(rs2)
275 | assert np.allclose(rs.mean, rs1.mean)
276 | assert np.allclose(rs.std, rs1.std)
277 |
278 |
279 | test_running_stat()
280 | test_combining_stat()
281 |
--------------------------------------------------------------------------------
/ADMCode/visualize.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | import pandas as pd
4 | import seaborn as sns
5 | from scipy.stats import norm
6 | from mpl_toolkits.axes_grid1 import make_axes_locatable
7 | from ADMCode import sdt, utils
8 | import matplotlib.pyplot as plt
9 | from scipy.stats.stats import sem
10 |
11 | def convert_params(parameters, maxtime=1.5):
12 | a, tr, v, z, si, dx, dt = parameters
13 | zStart = z * a
14 | trSteps = int(tr/dt)
15 | deadline = (maxtime / dt) * 1.1
16 | return a, trSteps, v, zStart, si, dx, dt, deadline
17 |
18 |
19 | def build_ddm_axis(parameters, maxtime=1.5):
20 |
21 | sns.set(style='white')
22 | f, ax = plt.subplots(1, figsize=(8.5, 7), sharex=True)
23 |
24 | a, tr, v, z, si, dx, dt, deadline = convert_params(parameters, maxtime)
25 | w = deadline
26 | xmin=tr - 100
27 |
28 | plt.setp(ax, xlim=(xmin - 51, w + 1), ylim=(0 - (.01 * a), a + (.01 * a)))
29 | ax.hlines(y=a, xmin=xmin, xmax=w, color='#3572C6', linewidth=4)
30 | ax.hlines(y=0, xmin=xmin, xmax=w, color='#e5344a', linewidth=4)
31 | ax.hlines(y=z, xmin=xmin, xmax=w, color='k', alpha=.4, linestyles='--', linewidth=3)
32 | ax.vlines(x=xmin-50, ymin=-.1, ymax=a+.1, color='k', alpha=.15, linewidth=5)
33 | ax.hlines(y=z, xmin=xmin, xmax=tr, color='k', linewidth=4)
34 |
35 | ax.set_xticklabels([])
36 | ax.set_yticklabels([])
37 | ax.set_xticks([])
38 | ax.set_yticks([])
39 | sns.despine(top=True, right=True, bottom=True, left=True, ax=ax)
40 |
41 | divider = make_axes_locatable(ax)
42 | axx1 = divider.append_axes("top", size=1.2, pad=0.0, sharex=ax)
43 | axx2 = divider.append_axes("bottom", size=1.2, pad=0.0, sharex=ax)
44 | plt.setp(axx1, xlim=(xmin - 51, w + 1),ylim=(0 - (.01 * a), a + (.01 * a)))
45 | plt.setp(axx2, xlim=(xmin - 51, w + 1),ylim=(0 - (.01 * a), a + (.01 * a)))
46 | axx2.invert_yaxis()
47 | axx1.hist([0], density=False, bins=np.linspace(200, w, num=9), alpha=1., color='White')
48 | axx2.hist([0], density=False, bins=np.linspace(200, w, num=9), alpha=1., color='White')
49 |
50 | for axx in [axx1, axx2]:
51 | for spine in ['top', 'left', 'bottom', 'right']:
52 | axx.spines[spine].set_visible(False)
53 | axx.set_xticklabels([])
54 | axx.set_yticklabels([])
55 | return f, [ax, axx1, axx2]
56 |
57 |
58 |
59 | def plot_ddm_sims(df, parameters, traces=None, plot_v=False, fig=None, colors=None, vcolor='k', kdeplot=True):
60 |
61 | maxtime = df.rt.max()
62 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters, maxtime)
63 |
64 | if colors is None:
65 | colors = ['#3572C6', '#e5344a']
66 | if fig is None:
67 | f, axes = build_ddm_axis(parameters, maxtime)
68 | else:
69 | f = fig; axes = fig.axes
70 |
71 | plot_bound_rts(df, parameters, f=f, colors=colors, kdeplot=kdeplot)
72 |
73 | if traces is not None:
74 | plot_traces(df, parameters, traces, f=f, colors=colors)
75 |
76 | if plot_v:
77 | plot_drift_line(df, parameters, color=vcolor, ax=f.axes[0])
78 |
79 | return f
80 |
81 |
82 | def compare_drift_effects(df, param_list):
83 |
84 | sDF = df[df.stim=='signal']
85 | nDF = df[df.stim=='noise']
86 | colors = [['#009e07','#009e07'], ["#e5344a", "#e5344a"]]
87 |
88 | maxtime = df.rt.max()
89 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(param_list[0], maxtime)
90 | f=None
91 | for i, dfi in enumerate([sDF, nDF]):
92 | clrs = colors[i]
93 | f = plot_ddm_sims(dfi, param_list[i], colors=clrs, plot_v=True, fig=f, vcolor=clrs[0], kdeplot=True)#False)
94 |
95 | ax, axx1, axx2 = f.axes
96 | xmin = trSteps-100
97 | ax.hlines(y=a, xmin=xmin, xmax=deadline, color='k', linewidth=4)
98 | ax.hlines(y=0, xmin=xmin, xmax=deadline, color='k', linewidth=4)
99 |
100 | if sDF.shape[0] > nDF.shape[0]:
101 | ymax, ymin = axx1.get_ylim()[::-1]
102 | axx2.set_ylim(ymax, ymin)
103 | else:
104 | ymax, ymin = axx2.get_ylim()[::-1]
105 | axx1.set_ylim(ymax, ymin)
106 | return ax
107 |
108 |
109 | def plot_bound_rts(df, parameters, f, colors=None, kdeplot=True):
110 |
111 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters)
112 | rt1 = df[df.choice==1].rt.values / dt
113 | rt0 = df[df.choice==0].rt.values / dt
114 |
115 | if colors is None:
116 | colors = ['#3572C6', '#e5344a']
117 | ax, axx1, axx2 = f.axes
118 | clip = (df.rt.min()/dt, deadline/dt)
119 |
120 | if kdeplot:
121 | #print('kdeplot')
122 | sns.kdeplot(rt1, alpha=.5, linewidth=0, color=colors[0], ax=axx1, shade=True, clip=clip)#, bw=15)
123 | sns.kdeplot(rt0, alpha=.5, linewidth=0, color=colors[1], ax=axx2, shade=True, clip=clip)#, bw=15)
124 |
125 | ymax = (.005, .01)
126 | if rt1.size < rt0.size:
127 | ymax = (.01, .005)
128 | axx1.set_ylim(0, ymax[0])
129 | axx2.set_ylim(ymax[1], 0.0)
130 | # axx2.invert_yaxis()
131 |
132 | else:
133 | #print('not_kdeplot')
134 | #print(repr(rt1))
135 | sns.histplot(rt1, color=colors[0], ax=axx1, kde=False)#, norm_hist=False)
136 | sns.histplot(rt0, color=colors[1], ax=axx2, kde=False)#, norm_hist=False)
137 |
138 |
139 | def plot_traces(df, parameters, traces, f, colors):
140 |
141 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters)
142 | ax = f.axes[0]
143 | ntrials = int(traces.shape[0])
144 | for i in range(ntrials):
145 | trace = traces[i]
146 | c = colors[0]
147 | nsteps = np.argmax(trace[trace<=a]) + 2
148 | if df.iloc[i]['choice']==0:
149 | # if trace[nsteps]=0]) + 2
152 | ax.plot(np.arange(trSteps, trSteps + nsteps), traces[i, :nsteps], color=c, alpha=.1)
153 |
154 |
155 | def plot_drift_line(df, parameters, color='k', ax=None):
156 |
157 | a, trSteps, v, zStart, si, dx, dt, deadline = convert_params(parameters)
158 | rt = np.mean(df[df.choice==1].rt.values / dt)
159 | if v<0:
160 | rt = np.mean(df[df.choice==0].rt.values / dt)
161 | accum_x = np.arange(rt)*.001
162 | driftRate = zStart + (accum_x * v)
163 | x = np.linspace(trSteps, rt, accum_x.size)
164 | ax.plot(x, driftRate, color=color, linewidth=3)
165 |
166 |
167 | def sdt_interact(pH=.80, pFA=.10):
168 |
169 | plt.figure(2)
170 | ax = plt.gca()
171 |
172 | #n0, n1 = float(FA + CR), float(Hits + Misses)
173 | n0 = 100; n1 = 100
174 | if pH == 0: pH += 0.01
175 | if pH == n1: pH -= 0.01
176 | if pFA == 0: pFA += 0.01
177 | if pFA == n0: pFA -= 0.01
178 |
179 | Hits = pH * n1
180 | Misses = n1 - Hits
181 | FA = pFA * n0
182 | CR = n0 - FA
183 |
184 | d, c = sdt.sdt_mle(Hits, Misses, CR, FA)
185 | cLine = norm.ppf(1-pFA)
186 | dstr = "$d'={:.2f}$".format(d)
187 | cstr = "$c={:.2f}$".format(c)
188 |
189 | x = np.linspace(-4, 7, 1000)
190 | noiseDist = norm.pdf(loc=0, scale=1, x=x)
191 | signalDist = norm.pdf(loc=d, scale=1, x=x)
192 |
193 | plt.plot(x, noiseDist, color='k', alpha=.4)
194 | plt.plot(x, signalDist, color='k')
195 |
196 | yupper = ax.get_ylim()[-1]
197 | ax.vlines(cLine, 0, yupper, linestyles='-', linewidth=1.5)
198 | ax.set_ylim(0, yupper)
199 | ax.set_xlim(-4, 7)
200 | ax.set_yticklabels([])
201 | sns.despine(left=True, right=True, top=True)
202 |
203 | ax.text(4, yupper*.9, dstr, fontsize=14)
204 | ax.text(4, yupper*.8, cstr, fontsize=14)
205 |
206 | plt.show()
207 |
208 |
209 | def plot_qlearning(data, nblocks=25, analyze=True):
210 |
211 | if analyze:
212 | auc = utils.get_optimal_auc(data, nblocks, verbose=True)
213 |
214 | sns.set(style='white', font_scale=1.3)
215 | clrs = ['#3778bf', '#feb308', '#9b59b6', '#2ecc71', '#e74c3c',
216 | '#3498db', '#fd7f23', '#694098', '#319455', '#f266db',
217 | '#13579d', '#fa8d67' '#a38ff1' '#3caca4', '#c24f54']
218 |
219 | f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12,3.5))
220 | df = data.copy()
221 | nactions = int(df.columns[-4].split('p')[-1])+1
222 | actions = np.arange(nactions)
223 |
224 | mudf = df.groupby('trial').mean().reset_index()
225 | errdf = df.groupby('trial').sem().reset_index()*1.96
226 | x = mudf.trial.values
227 |
228 | plot_err = True
229 | if np.isnan(errdf.loc[1, 'q0']):
230 | plot_err = False
231 |
232 | x3 = np.arange(1, nblocks+1)
233 | chance = 1/nactions
234 | mu3, err3 = utils.analyze_bandits(df, nblocks=nblocks, get_err=plot_err)
235 | ax3.plot(x3, mu3, color='k')
236 | ax3.hlines(chance, 1, x3[-1], color='k', linestyles='--', label='chance')
237 |
238 | for i, act in enumerate(actions):
239 | muQ = mudf['q{}'.format(act)].values
240 | muP = mudf['p{}'.format(act)].values
241 | ax1.plot(x, muQ, label='$arm_{}$'.format(i), color=clrs[i])
242 | ax2.plot(x, muP, color=clrs[i])
243 |
244 | if plot_err:
245 | errQ = errdf['q{}'.format(act)].values
246 | errP = errdf['p{}'.format(act)].values
247 | ax1.fill_between(x, muQ-errQ, muQ+errQ, color=clrs[i], alpha=.2)
248 | ax2.fill_between(x, muP-errP, muP+errP, color=clrs[i], alpha=.2)
249 | if i==0:
250 | ax3.fill_between(x3, mu3-err3, mu3+err3, color='k', alpha=.15)
251 | else:
252 | ychance = np.ones(mu3.size) * chance
253 | mu3A = np.copy(mu3)
254 | mu3B = np.copy(mu3)
255 | mu3A[np.where(mu3<=chance)] = chance
256 | mu3B[np.where(mu3>=chance)] = chance
257 | ax3.fill_between(x3, ychance, mu3A, color='#2ecc71', alpha=.15)
258 | ax3.fill_between(x3, ychance, mu3B, color='#e74c3c', alpha=.15)
259 |
260 | ax1.legend(loc=4)
261 | ax1.set_ylabel('$Q(arm)$')
262 | ax1.set_title('Value')
263 |
264 | ax2.set_ylabel('$P(arm)$')
265 | ax2.set_ylim(0,1)
266 | ax2.set_title('Softmax Prob.')
267 |
268 | ax3.set_ylim(0,1)
269 | ax3.set_ylabel('% Optimal Arm')
270 | ax3.set_xticks([1, nblocks+1])
271 | ax3.set_xticklabels([1, df.trial.max()])
272 | ax3.legend(loc=4)
273 |
274 | for ax in f.axes:
275 | ax.set_xlabel('Trials')
276 | plt.tight_layout()
277 | sns.despine()
278 |
--------------------------------------------------------------------------------
/ADMCode/neural.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import numpy as np
3 | import pandas as pd
4 | import random
5 | import matplotlib.pyplot as plt
6 | from numpy.random import sample as rs
7 | from numpy import hstack as hs
8 | from numpy import newaxis as na
9 | from scipy.stats.distributions import norm, uniform
10 | from mpl_toolkits.axes_grid1 import make_axes_locatable
11 | import matplotlib as mpl
12 | import seaborn as sns
13 |
14 | sns.set(style='white', font_scale=1.8)
15 | clrs = ['#3778bf', '#e74c3c', '#9b59b6', '#319455', '#feb308', '#fd7f23']
16 |
17 | def LCA_Model(I1=10, I2=8, I0=2, k=5, B=5, si=1., Z=1, dt=.01, tau=.1, tmax=1.5):
18 |
19 | timepoints = np.arange(0, tmax, dt)
20 | ntime = timepoints.size
21 |
22 | y1 = np.zeros(ntime)
23 | y2 = np.zeros(ntime)
24 | dx=np.sqrt(si*dt/tau)
25 |
26 | E1=si*np.sqrt(dt/tau)*rs(ntime)
27 | E2=si*np.sqrt(dt/tau)*rs(ntime)
28 |
29 | onset=100
30 | for i in range(onset, ntime):
31 | y1[i] = y1[i-1] + (I1 + -k*y1[i-1] + -B*y2[i-1]) * dt/tau + E1[i]
32 | y2[i] = y2[i-1] + (I2 + -k*y2[i-1] + -B*y1[i-1]) * dt/tau + E2[i]
33 | y_t = np.array([y1[i], y2[i]])
34 |
35 | if np.any(y_t>=Z):
36 | rt = i; act = np.argmax(y_t)
37 | return y1[:i], y2[:i], rt, act
38 | return y1[:i], y2[:i], np.nan, np.nan
39 |
40 |
41 | def attractor_network(I1=6, I2=3, I0=2, k=.85, B=.28, si=.3, rmax=50, b=30, g=9, Z=20, dt=.001, tau=.05, tmax=1.5):
42 |
43 | timepoints = np.arange(0, tmax, dt)
44 | ntime = timepoints.size
45 |
46 | r1 = np.zeros(ntime)
47 | r2 = np.zeros(ntime)
48 | dv = np.zeros(ntime)
49 |
50 | NInput = lambda x, r: rmax/(1+np.exp(-(x-b)/g))-r
51 | dspace = lambda r1, r2: (r1-r2)/np.sqrt(2)
52 |
53 | E1=si*np.sqrt(dt/tau)*rs(ntime)
54 | E2=si*np.sqrt(dt/tau)*rs(ntime)
55 |
56 | onset=100
57 | r1[:onset], r2[:onset] = [v[0][:onset] + I0+v[1][:onset] for v in [[r1,E1],[r2,E2]]]
58 |
59 | subZ=True
60 | for i in range(onset, ntime):
61 | r1[i] = r1[i-1] + dt/tau * (NInput(I1 + I0 + k*r1[i-1] + -B*r2[i-1], r1[i-1])) + E1[i]
62 | r2[i] = r2[i-1] + dt/tau * (NInput(I2 + I0 + k*r2[i-1] + -B*r1[i-1], r2[i-1])) + E2[i]
63 | dv[i] = (r1[i]-r2[i])/np.sqrt(2)
64 | if np.abs(dv[i])>=Z:
65 | rt = i+1
66 | return r1[:i+1], r2[:i+1], dv[:i+1], rt
67 | rt = i+1
68 | return r1[:i], r2[:i], dv[:i], rt
69 |
70 |
71 |
72 | def simulate_attractor_competition(Imax=12, I0=0.05, k=1.15, B=.6, g=15, b=30, rmax=100, si=6.5, dt=.002, tau=.075, Z=100, ntrials=250):
73 |
74 | sns.set(style='white', font_scale=1.8)
75 | f, ax = plt.subplots(1, figsize=(8,7))
76 | cmap = mpl.colors.ListedColormap(sns.blend_palette([clrs[1], clrs[0]], n_colors=ntrials))
77 | Iscale = np.hstack(np.tile(np.linspace(.5*Imax, Imax, ntrials/2)[::-1], 2))
78 | Ivector=np.linspace(-1,1,len(Iscale))
79 | norm = mpl.colors.Normalize(
80 | vmin=np.min(Ivector),
81 | vmax=np.max(Ivector))
82 | sm = mpl.cm.ScalarMappable(cmap=cmap, norm=norm)
83 | sm.set_array([])
84 |
85 | for i, I_t in enumerate(Iscale):
86 | if i < (ntrials/2.):
87 | I1 = Imax; I2 = I_t
88 | else:
89 | I1=I_t; I2 = Imax
90 | r1, r2, dv, rt = attractor_network(I1=I1, I2=I2, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z)
91 | ax.plot(r1, r2, color=sm.to_rgba(Ivector[i]), alpha=.5)
92 |
93 | c_ax = plt.colorbar(sm, ax=plt.gca())
94 | c_ax.set_ticks([-1, 1])
95 | c_ax.set_ticklabels(['$I_1<>I_2$'])
96 | ax.plot([0,rmax], [0,rmax], color='k', alpha=.5, linestyle='-', lw=3.5)
97 | _=plt.setp(ax, ylim=[0,rmax], xlim=[0,rmax], xticks=[0,rmax], xticklabels=[0,rmax],
98 | yticks=[0,rmax],yticklabels=[0,rmax], ylabel='$r_1$ (Hz)', xlabel='$r_2$ (Hz)')
99 |
100 |
101 | def simulate_attractor_behavior(I1=12, I2=9, I0=0.05, k=1.15, B=1., g=12, b=35, rmax=100, si=5., dt=.001, tau=.075, Z=30, ntrials=250):
102 |
103 | behavior = np.zeros((ntrials, 3))
104 | for t in range(ntrials):
105 | r1, r2, dv, rt = attractor_network(I1=I1, I2=I2, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z)
106 |
107 | choice=0
108 | acc=0
109 | if dv[-1]>=Z:
110 | choice=1
111 | acc=0
112 | if I1>I2: acc=1
113 | elif dv[-1]<=-Z:
114 | choice=2
115 | if I2>I1: acc=1
116 | elif I2==I1:
117 | acc=.5
118 |
119 |
120 | behavior[t, :] = choice, acc, rt
121 |
122 | return pd.DataFrame(behavior, columns=['choice', 'accuracy', 'rt'], index=np.arange(ntrials))
123 |
124 |
125 | def SAT_experiment(dfa, dfb):
126 |
127 | f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
128 | dfa['cond']='Control'
129 | dfb['cond'] = 'Test'
130 | dfx = pd.concat([dfa,dfb])
131 | dfacc = dfx[dfx.accuracy==1]
132 |
133 | accY=dfx.groupby('cond').mean()['accuracy'].values
134 | ax1.scatter([0], accY[0], s=205, color='k', alpha=1.)
135 | ax1.scatter([1], accY[1], s=205, color=clrs[0], alpha=1.)
136 | ax1.plot([0,1], accY, color='k', alpha=.3, linewidth=3.5)
137 |
138 | sns.kdeplot(dfacc[dfacc.cond=='Control'].rt.values, ax=ax2, shade=True, color='k', alpha=.15, lw=0)
139 | sns.kdeplot(dfacc[dfacc.cond=='Test'].rt.values, ax=ax2, shade=True, color=clrs[0], linewidth=0)
140 |
141 | rtmu = dfacc.groupby('cond').mean()['rt'].values
142 | xmax = ax2.get_ylim()[-1]
143 | ax2.vlines(rtmu[0], 0, xmax, color='k', linestyles='--', linewidth=2, label='Control')
144 | ax2.vlines(rtmu[1], 0, xmax, color=clrs[0], linewidth=2, label='Test')
145 | ax2.set_yticklabels([])
146 | ax1.set_ylim(0,1)
147 | ax1.set_xlim(-.5,1.5)
148 | ax1.set_xticks([0,1])
149 | ax1.set_xticklabels(['Control','Test'])
150 | ax1.set_ylabel('% Correct')
151 | ax1.set_xlabel('Condition')
152 | ax2.set_xlabel('RT (ms)')
153 | ax2.legend()
154 | sns.despine()
155 |
156 | def noisy_attractor_endpoints(I=12, I0=0.05, k=1.15, B=1.15, g=25, b=50, rmax=100, si=6.5, dt=.002, tau=.05, Z=100, ntrials=250):
157 |
158 | f, axes = plt.subplots(1, 4, figsize=(12,3.5))
159 |
160 | for i in range(4):
161 | attractor_endpoints(I=I[i], I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z, ntrials=ntrials, ax=axes[i])
162 | if i>0:
163 | axes[i].set_yticklabels([])
164 | axes[i].set_ylabel('')
165 | else:
166 | axes[i].set_yticks([0, 120])
167 | axes[i].set_yticklabels([0, 120])
168 | axes[i].set_ylim(0,120)
169 | plt.tight_layout()
170 |
171 |
172 | def attractor_endpoints(I=12, I0=0.05, k=1.15, B=1.15, g=25, b=50, rmax=100, si=6.5, dt=.002, tau=.05, Z=100, ntrials=250, ax=None):
173 |
174 | sns.set(style='white', font_scale=1.8)
175 | if ax is None:
176 | f, ax = plt.subplots(1, figsize=(4,4))
177 |
178 | r1d1,r2d1,r1d2,r2d2 = [],[],[],[]
179 | for i in range(ntrials):
180 | r1, r2, dv, rt = attractor_network(I1=I, I2=I, I0=I0, k=k, B=B, g=g, b=b, rmax=rmax, si=si, dt=dt, tau=tau, Z=Z)
181 | if r1[-1]>r2[-1]:
182 | r1d1.append(r1[-1])
183 | r2d1.append(r2[-1])
184 | if r2[-1]>r1[-1]:
185 | r1d2.append(r1[-1])
186 | r2d2.append(r2[-1])
187 |
188 | ax.scatter(r2d1, r1d1, s=30, color=clrs[0], marker='o', alpha=.1)
189 | ax.scatter(r2d2, r1d2, s=30, color=clrs[1], marker='o', alpha=.1)
190 |
191 | #xymax = np.max(np.hstack([r1d1, r2d1]))
192 | #xymax = np.max(np.hstack([r1d2, r2d2]))
193 | xymax=120
194 | rmax=int(xymax)
195 | ax.plot([0,xymax], [0,xymax], color='k', alpha=.5, linestyle='-', lw=3.5)
196 | _ = plt.setp(ax, ylim=[0,xymax], xlim=[0,xymax], xticks=[0,xymax], xticklabels=[0,rmax], yticks=[0,xymax],yticklabels=[0,rmax], ylabel='$r_1$ (Hz)', xlabel='$r_2$ (Hz)')
197 |
198 |
199 | def plot_sigmoid_response(b=50, g=20, rmax=1):
200 |
201 | x = np.linspace(0,100,100)
202 | y = rmax/(1+np.exp(-(x-b)/g))
203 |
204 | plt.vlines(b, 0, y[b], color='r', label='b')
205 | plt.hlines(.5, 0, b, color='k', linestyles='--')
206 | plt.fill_between(x[:b+1], 0, y[:b+1], alpha=.05, color='k')
207 | plt.text(b+2, .045, 'b', color='r')
208 |
209 | # plot g slope
210 | w_lo = int(x[b])
211 | w_hi = int(x[b+10])
212 | plt.plot([w_lo, w_hi], [y[w_lo]+.03, y[w_hi]+.03], color='b')
213 | plt.text(b, y[b+5]+.04, 'g', color='b')
214 |
215 | # plot f-i curve
216 | plt.plot(x, y, color='k')
217 |
218 | ax = plt.gca()
219 | ax.set_xlabel('Input Current')
220 | ax.set_ylabel('Neural Response')
221 | ax.set_xticks([0,100])
222 | ax.set_xlim(0,100)
223 | ax.set_xticklabels([0,100])
224 | ax.set_ylim(0, rmax*1.05)
225 | sns.despine()
226 |
227 |
228 | def plot_decision_dynamics(r1, r2, dv, Z=20, axes=None, alpha=.7, label=None, xlim=None):
229 |
230 | if axes is None:
231 | f, axes = plt.subplots(2, 1, figsize=(6,9))
232 | ax2, ax1 = axes
233 | rt=len(dv)-1
234 |
235 | l1, l2, l3 = [None]*3
236 |
237 | if label:
238 | l1, l2, l3 = '$y_1$', '$y_2$', '$\Delta y$'
239 | ylabel = 'Activation'
240 | ax1.plot(r1, color=clrs[0], label=l1, linewidth=2.5, alpha=alpha)
241 | ax1.plot(r2, color=clrs[1], label=l2, linewidth=2.5, alpha=alpha)
242 | ax1.vlines(rt, ymin=r2[rt], ymax=r1[rt], color=clrs[0], linestyles='--', alpha=alpha)
243 | ax2.plot(dv, color=clrs[2], label=l3, linewidth=2.5, alpha=alpha)
244 |
245 | if xlim is None:
246 | xlim = ax1.get_xlim()
247 |
248 | xmin, xmax = xlim
249 | for ax in [ax1,ax2]:
250 | ax.set_xlim(xmin, xmax)
251 | ax.legend(loc=2)
252 | ax2.set_yticklabels([])
253 | ax1.set_xlabel('Time (ms)')
254 | ax1.set_ylabel(ylabel)
255 | ax2.set_ylabel('Decision State')
256 | ax2.set_ylim(-Z, Z)
257 | ax2.hlines(0, xmin=0, xmax=xmax, color='k', linestyles='--', alpha=.5)
258 | ax2.hlines(Z-.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4)
259 | ax2.hlines(-Z+.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4)
260 | ax2.set_xticklabels([])
261 | sns.despine(ax=ax1)
262 |
263 | sns.despine(ax=ax2, right=True, top=True, bottom=True)
264 |
265 | def plot_rt_distributions(ax1, ax2, rts, xlim=None, alpha=.8):
266 |
267 | divider = make_axes_locatable(ax2)
268 | axx = divider.append_axes("top", size=1.6, pad=0, sharex=ax2)
269 | for rt in rts:
270 | sns.kdeplot(rt, ax=axx, shade=True, color=clrs[2], alpha=alpha)
271 | alpha=alpha-.5
272 |
273 | for spine in ['top', 'left', 'bottom', 'right']:
274 | axx.spines[spine].set_visible(False)
275 |
276 | axx.set_xticklabels([])
277 | axx.set_yticklabels([])
278 | ax2.set_yticklabels([])
279 | xmin, xmax = ax1.get_xlim()
280 | ax1.set_xlim(0, xmax)
281 | Z = ax2.get_ylim()[-1]
282 |
283 | ax2.hlines(0, xmin=0, xmax=xmax, color='k', linestyles='--', alpha=.5)
284 | ax2.hlines(Z-.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4)
285 | ax2.hlines(-Z+.25, 0, xmax, color=clrs[2], alpha=1., linestyles='-', lw=4)
286 |
--------------------------------------------------------------------------------
/ADMCode/snuz/ars/ars.py:
--------------------------------------------------------------------------------
1 | '''
2 | Parallel implementation of the Augmented Random Search method.
3 | Horia Mania --- hmania@berkeley.edu
4 | Aurelia Guy
5 | Benjamin Recht
6 | '''
7 |
8 | # import parser
9 | import time
10 | import os
11 | import numpy as np
12 | import gym
13 | from ADMCode.snuz.ars import logz
14 | import ray
15 | from ADMCode.snuz.ars import utils
16 | from ADMCode.snuz.ars import optimizers
17 | from ADMCode.snuz.ars.policies import *
18 | import socket
19 | from ADMCode.snuz.ars.shared_noise import *
20 |
21 |
22 | @ray.remote
23 | class Worker(object):
24 | """
25 | Object class for parallel rollout generation.
26 | """
27 |
28 | def __init__(self,
29 | env_seed,
30 | env_name='',
31 | policy_params=None,
32 | deltas=None,
33 | rollout_length=1000,
34 | delta_std=0.02):
35 |
36 | # initialize OpenAI environment for each worker
37 | self.env = gym.make(env_name)
38 | self.env.seed(env_seed)
39 |
40 | # each worker gets access to the shared noise table
41 | # with independent random streams for sampling
42 | # from the shared noise table.
43 | self.deltas = SharedNoiseTable(deltas, env_seed + 7)
44 | self.policy_params = policy_params
45 | if policy_params['type'] == 'linear':
46 | self.policy = LinearPolicy(policy_params)
47 | else:
48 | raise NotImplementedError
49 |
50 | self.delta_std = delta_std
51 | self.rollout_length = rollout_length
52 |
53 | def get_weights_plus_stats(self):
54 | """
55 | Get current policy weights and current statistics of past states.
56 | """
57 | assert self.policy_params['type'] == 'linear'
58 | return self.policy.get_weights_plus_stats()
59 |
60 | def rollout(self, shift=0., rollout_length=None):
61 | """
62 | Performs one rollout of maximum length rollout_length.
63 | At each time-step it substracts shift from the reward.
64 | """
65 |
66 | if rollout_length is None:
67 | rollout_length = self.rollout_length
68 |
69 | total_reward = 0.
70 | steps = 0
71 |
72 | ob = self.env.reset()
73 | for i in range(rollout_length):
74 | action = self.policy.act(ob)
75 | ob, reward, done, _ = self.env.step(action)
76 | steps += 1
77 | total_reward += (reward - shift)
78 | if done:
79 | break
80 |
81 | return total_reward, steps
82 |
83 | def do_rollouts(self, w_policy, num_rollouts=1, shift=1, evaluate=False):
84 | """
85 | Generate multiple rollouts with a policy parametrized by w_policy.
86 | """
87 |
88 | rollout_rewards, deltas_idx = [], []
89 | steps = 0
90 |
91 | for i in range(num_rollouts):
92 |
93 | if evaluate:
94 | self.policy.update_weights(w_policy)
95 | deltas_idx.append(-1)
96 |
97 | # set to false so that evaluation rollouts are not used for updating state statistics
98 | self.policy.update_filter = False
99 |
100 | # for evaluation we do not shift the rewards (shift = 0) and we use the
101 | # default rollout length (1000 for the MuJoCo locomotion tasks)
102 | reward, r_steps = self.rollout(
103 | shift=0., rollout_length=self.env.spec.timestep_limit)
104 | rollout_rewards.append(reward)
105 |
106 | else:
107 | idx, delta = self.deltas.get_delta(w_policy.size)
108 |
109 | delta = (self.delta_std * delta).reshape(w_policy.shape)
110 | deltas_idx.append(idx)
111 |
112 | # set to true so that state statistics are updated
113 | self.policy.update_filter = True
114 |
115 | # compute reward and number of timesteps used for positive perturbation rollout
116 | self.policy.update_weights(w_policy + delta)
117 | pos_reward, pos_steps = self.rollout(shift=shift)
118 |
119 | # compute reward and number of timesteps used for negative pertubation rollout
120 | self.policy.update_weights(w_policy - delta)
121 | neg_reward, neg_steps = self.rollout(shift=shift)
122 | steps += pos_steps + neg_steps
123 |
124 | rollout_rewards.append([pos_reward, neg_reward])
125 |
126 | return {
127 | 'deltas_idx': deltas_idx,
128 | 'rollout_rewards': rollout_rewards,
129 | "steps": steps
130 | }
131 |
132 | def stats_increment(self):
133 | self.policy.observation_filter.stats_increment()
134 | return
135 |
136 | def get_weights(self):
137 | return self.policy.get_weights()
138 |
139 | def get_filter(self):
140 | return self.policy.observation_filter
141 |
142 | def sync_filter(self, other):
143 | self.policy.observation_filter.sync(other)
144 | return
145 |
146 |
147 | class ARSLearner(object):
148 | """
149 | Object class implementing the ARS algorithm.
150 | """
151 |
152 | def __init__(self,
153 | env_name='HalfCheetah-v1',
154 | policy_params=None,
155 | num_workers=32,
156 | num_deltas=320,
157 | deltas_used=320,
158 | delta_std=0.02,
159 | logdir=None,
160 | rollout_length=1000,
161 | step_size=0.01,
162 | shift='constant zero',
163 | params=None,
164 | seed=123):
165 |
166 | logz.configure_output_dir(logdir)
167 |
168 | env = gym.make(env_name)
169 |
170 | self.timesteps = 0
171 | self.action_size = env.action_space.shape[0]
172 | self.ob_size = env.observation_space.shape[0]
173 | self.num_deltas = num_deltas
174 | self.deltas_used = deltas_used
175 | self.rollout_length = rollout_length
176 | self.step_size = step_size
177 | self.delta_std = delta_std
178 | self.logdir = logdir
179 | self.shift = shift
180 | self.max_past_avg_reward = float('-inf')
181 | self.num_episodes_used = float('inf')
182 |
183 | # create shared table for storing noise
184 | # print("Creating deltas table.")
185 | deltas_id = create_shared_noise.remote()
186 | self.deltas = SharedNoiseTable(ray.get(deltas_id), seed=seed + 3)
187 | # print('Created deltas table.')
188 |
189 | # initialize workers with different random seeds
190 | print('Initializing workers.')
191 | self.num_workers = num_workers
192 | self.workers = [
193 | Worker.remote(
194 | seed + 7 * i,
195 | env_name=env_name,
196 | policy_params=policy_params,
197 | deltas=deltas_id,
198 | rollout_length=rollout_length,
199 | delta_std=delta_std) for i in range(num_workers)
200 | ]
201 |
202 | # initialize policy
203 | if policy_params['type'] == 'linear':
204 | self.policy = LinearPolicy(policy_params)
205 | self.w_policy = self.policy.get_weights()
206 | else:
207 | raise NotImplementedError
208 |
209 | # initialize optimization algorithm
210 | self.optimizer = optimizers.SGD(self.w_policy, self.step_size)
211 | # print("Initialization of ARS complete.")
212 |
213 | def aggregate_rollouts(self, num_rollouts=None, evaluate=False):
214 | """
215 | Aggregate update step from rollouts generated in parallel.
216 | """
217 |
218 | if num_rollouts is None:
219 | num_deltas = self.num_deltas
220 | else:
221 | num_deltas = num_rollouts
222 |
223 | # put policy weights in the object store
224 | policy_id = ray.put(self.w_policy)
225 |
226 | t1 = time.time()
227 | num_rollouts = int(num_deltas / self.num_workers)
228 |
229 | # parallel generation of rollouts
230 | rollout_ids_one = [
231 | worker.do_rollouts.remote(
232 | policy_id,
233 | num_rollouts=num_rollouts,
234 | shift=self.shift,
235 | evaluate=evaluate) for worker in self.workers
236 | ]
237 |
238 | rollout_ids_two = [
239 | worker.do_rollouts.remote(
240 | policy_id, num_rollouts=1, shift=self.shift, evaluate=evaluate)
241 | for worker in self.workers[:(num_deltas % self.num_workers)]
242 | ]
243 |
244 | # gather results
245 | results_one = ray.get(rollout_ids_one)
246 | results_two = ray.get(rollout_ids_two)
247 |
248 | rollout_rewards, deltas_idx = [], []
249 |
250 | for result in results_one:
251 | if not evaluate:
252 | self.timesteps += result["steps"]
253 | deltas_idx += result['deltas_idx']
254 | rollout_rewards += result['rollout_rewards']
255 |
256 | for result in results_two:
257 | if not evaluate:
258 | self.timesteps += result["steps"]
259 | deltas_idx += result['deltas_idx']
260 | rollout_rewards += result['rollout_rewards']
261 |
262 | deltas_idx = np.array(deltas_idx)
263 | rollout_rewards = np.array(rollout_rewards, dtype=np.float64)
264 |
265 | # print('Maximum reward of collected rollouts:', rollout_rewards.max())
266 | # t2 = time.time()
267 |
268 | # print('Time to generate rollouts:', t2 - t1)
269 |
270 | if evaluate:
271 | return rollout_rewards
272 |
273 | # select top performing directions if deltas_used < num_deltas
274 | max_rewards = np.max(rollout_rewards, axis=1)
275 | if self.deltas_used > self.num_deltas:
276 | self.deltas_used = self.num_deltas
277 |
278 | idx = np.arange(max_rewards.size)[max_rewards >= np.percentile(
279 | max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas)))]
280 | deltas_idx = deltas_idx[idx]
281 | rollout_rewards = rollout_rewards[idx, :]
282 |
283 | # normalize rewards by their standard deviation
284 | rollout_rewards /= np.std(rollout_rewards)
285 |
286 | # t1 = time.time()
287 | # aggregate rollouts to form g_hat, the gradient used to compute SGD step
288 | g_hat, count = utils.batched_weighted_sum(
289 | rollout_rewards[:, 0] - rollout_rewards[:, 1],
290 | (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx),
291 | batch_size=500)
292 | g_hat /= deltas_idx.size
293 | # t2 = time.time()
294 | # print('time to aggregate rollouts', t2 - t1)
295 | return g_hat
296 |
297 | def train_step(self):
298 | """
299 | Perform one update step of the policy weights.
300 | """
301 |
302 | g_hat = self.aggregate_rollouts()
303 | # print("Euclidean norm of update step:", np.linalg.norm(g_hat))
304 | self.w_policy -= self.optimizer._compute_step(g_hat).reshape(
305 | self.w_policy.shape)
306 | return
307 |
308 | def train(self, num_iter):
309 |
310 | start = time.time()
311 | iter_scores = []
312 | for i in range(num_iter):
313 |
314 | t1 = time.time()
315 | self.train_step()
316 | t2 = time.time()
317 | # print('total time of one step', t2 - t1)
318 | # print('iter ', i, ' done')
319 |
320 | # record statistics every 10 iterations
321 | rewards = self.aggregate_rollouts(num_rollouts=100, evaluate=True)
322 | iter_scores.append(np.mean(rewards))
323 |
324 | if ((i + 1) % 10 == 0):
325 | w = ray.get(self.workers[0].get_weights_plus_stats.remote())
326 | # np.savez(self.logdir + "/lin_policy_plus", w)
327 |
328 | logz.log_tabular("Time", time.time() - start)
329 | logz.log_tabular("Iteration", i + 1)
330 | logz.log_tabular("AverageReward", np.mean(rewards))
331 | logz.log_tabular("StdRewards", np.std(rewards))
332 | logz.log_tabular("MaxRewardRollout", np.max(rewards))
333 | logz.log_tabular("MinRewardRollout", np.min(rewards))
334 | logz.log_tabular("timesteps", self.timesteps)
335 | logz.dump_tabular()
336 |
337 | t1 = time.time()
338 | # get statistics from all workers
339 | for j in range(self.num_workers):
340 | self.policy.observation_filter.update(
341 | ray.get(self.workers[j].get_filter.remote()))
342 | self.policy.observation_filter.stats_increment()
343 |
344 | # make sure master filter buffer is clear∫
345 | self.policy.observation_filter.clear_buffer()
346 | # sync all workers
347 | filter_id = ray.put(self.policy.observation_filter)
348 | setting_filters_ids = [
349 | worker.sync_filter.remote(filter_id) for worker in self.workers
350 | ]
351 | # waiting for sync of all workers
352 | ray.get(setting_filters_ids)
353 |
354 | increment_filters_ids = [
355 | worker.stats_increment.remote() for worker in self.workers
356 | ]
357 | # waiting for increment of all workers
358 | ray.get(increment_filters_ids)
359 | t2 = time.time()
360 | # print('Time to sync statistics:', t2 - t1)
361 |
362 | return list(range(num_iter)), iter_scores
363 |
364 |
365 | class Hyperparameters:
366 | num_episodes = 10
367 | n_directions = 8
368 | deltas_used = 8
369 | step_size = 0.02
370 | delta_std = 0.03
371 | n_workers = 1
372 | rollout_length = 240
373 | shift = 0
374 | seed = 237
375 | policy_type = 'linear'
376 | dir_path = 'data'
377 | filter = 'MeanStdFilter'
378 |
379 |
380 | def run_ars(env_name='MountainCarContinuous-v0',
381 | logdir='data',
382 | **algorithm_hyperparameters):
383 |
384 | # Ray init?
385 | if not ray.is_initialized():
386 | # local_ip = socket.gethostbyname(socket.gethostname())
387 | # ray.init(redis_address=local_ip + ':6379', local_mode=True)
388 | ray.init(local_mode=True)
389 |
390 | # Params
391 | hp = Hyperparameters()
392 | for k, v in algorithm_hyperparameters.items():
393 | setattr(hp, k, v)
394 |
395 | dir_path = hp.dir_path
396 |
397 | if not (os.path.exists(dir_path)):
398 | os.makedirs(dir_path)
399 | logdir = dir_path
400 | if not (os.path.exists(logdir)):
401 | os.makedirs(logdir)
402 |
403 | env = gym.make(env_name)
404 | ob_dim = env.observation_space.shape[0]
405 | ac_dim = env.action_space.shape[0]
406 |
407 | # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1.
408 | policy_params = {
409 | 'type': 'linear',
410 | 'ob_filter': hp.filter,
411 | 'ob_dim': ob_dim,
412 | 'ac_dim': ac_dim
413 | }
414 |
415 | ARS = ARSLearner(
416 | env_name=env_name,
417 | policy_params=policy_params,
418 | num_workers=hp.n_workers,
419 | num_deltas=hp.n_directions,
420 | deltas_used=hp.deltas_used,
421 | step_size=hp.step_size,
422 | delta_std=hp.delta_std,
423 | logdir=logdir,
424 | rollout_length=hp.rollout_length,
425 | shift=hp.shift,
426 | params=hp,
427 | seed=hp.seed)
428 |
429 | return ARS.train(hp.num_episodes)
430 |
--------------------------------------------------------------------------------
/notebooks/Homework 4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Homework 4: Believer-Skeptic Model"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 5,
13 | "metadata": {
14 | "slideshow": {
15 | "slide_type": "skip"
16 | }
17 | },
18 | "outputs": [],
19 | "source": [
20 | "from __future__ import division\n",
21 | "import ADMCode\n",
22 | "from ADMCode import visualize as vis\n",
23 | "from ADMCode import believer_skeptic\n",
24 | "\n",
25 | "import numpy as np\n",
26 | "from numpy.random import sample as rs\n",
27 | "import pandas as pd\n",
28 | "import sys\n",
29 | "import os\n",
30 | "\n",
31 | "# from ipywidgets import interactive\n",
32 | "import matplotlib.pyplot as plt\n",
33 | "import seaborn as sns\n",
34 | "import warnings\n",
35 | "\n",
36 | "# Temporary for now until push changes to PIP \n",
37 | "#sys.path.insert(0,'../ADMCode')\n",
38 | "#import believer_skeptic\n",
39 | "\n",
40 | "warnings.simplefilter('ignore', np.RankWarning)\n",
41 | "warnings.filterwarnings(\"ignore\", module=\"matplotlib\")\n",
42 | "warnings.filterwarnings(\"ignore\")\n",
43 | "sns.set(style='white', font_scale=1.3)\n",
44 | "\n",
45 | "%matplotlib inline"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "## **Question 1:** "
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "slideshow": {
59 | "slide_type": "slide"
60 | }
61 | },
62 | "source": [
63 | "**Answer the following questions about the relationship between the system of equations below.** See the Lab 4 notebook for definition of terms. "
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "slideshow": {
70 | "slide_type": "fragment"
71 | }
72 | },
73 | "source": [
74 | "\n",
75 | "* **Eq. 1**: Go process. $$G_{j,t}(\\tau) = G_{j,t}(\\tau - \\Delta \\tau) + \\upsilon ^G _{j,t} \\Delta \\tau + \\epsilon^G_j (\\tau)$$\n",
76 | "\n",
77 | "* **Eq. 2**: No go process. $$N_{j,t}(\\tau) = N_{j,t}(\\tau - \\Delta \\tau) + \\upsilon ^N _{j,t} \\Delta \\tau + \\epsilon^N_j (\\tau)$$\n",
78 | "\n",
79 | "* **Eq. 3**: Execution process. $$\\theta_{j,t}(\\tau) = [G_{j,t}(\\tau) - N_{j,t}(\\tau)] \\cdot cosh(\\gamma \\cdot \\tau)$$\n",
80 | "\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {
86 | "slideshow": {
87 | "slide_type": "slide"
88 | }
89 | },
90 | "source": [
91 | "\n",
92 | "**1a:** Describe the three components of Eqs. 1 & 2 in laymen's terms.\n",
93 | "\n",
94 | "* **Answer 1a:** \n",
95 | "\n",
96 | "\n",
97 | "\n",
98 | "**1b:** As time ($\\tau$) progresses, how does the exponential term in Eq. 3 ($\\cosh (\\gamma \\cdot \\tau)$) influence the nature of the competition between channels?\n",
99 | "\n",
100 | "* **Answer 1b:** \n",
101 | "\n"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "## **Question 2:** "
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "**Answer the following questions about the relationship between the system of equations below.**"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "metadata": {},
121 | "source": [
122 | "* **Eq. 4**: Action value. $$q_j(t+1) = q_j(t) + \\alpha \\cdot [r(t) - q_j(t)]$$\n",
123 | "\n",
124 | "* **Eq. 5**: Greediness. $$p_j(t) = \\frac{\\exp{\\beta \\cdot q_j(t)}}{\\Sigma^n_i \\exp{\\beta \\cdot q_i(t)}}$$\n",
125 | "\n",
126 | "* **Eq. 6**: (Reward) prediction error. $$\\delta_j(t) = p_j(t) - p_j(t-1)$$\n",
127 | "\n",
128 | "* **Eq. 7**: Update rule: $$\\upsilon^{G/N}_{j,t+1} = \\upsilon^{G/N}_{j,t} + \\alpha^{G/N} \\cdot \\delta_j(t)$$"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "metadata": {},
134 | "source": [
135 | "**2a:** How is the estimation of the prediction error (Eq. 6) different than the normative form of the update rule in q-learning?\n",
136 | "* **Answer 2a:** \n",
137 | "\n",
138 | "**2b:** In the Believer-Skeptic model, the Go & NoGo processes have different learning rates (i.e., $\\alpha^G$ & $\\alpha^N$). What biological justification is there for these two pathways having different forms of learning?\n",
139 | "* **Answer 2b:** \n",
140 | "\n"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "## ** Question 3: **"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 6,
153 | "metadata": {
154 | "slideshow": {
155 | "slide_type": "fragment"
156 | }
157 | },
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | "0.62884\n"
164 | ]
165 | },
166 | {
167 | "data": {
168 | "text/plain": [
169 | "0.0 18\n",
170 | "1.0 13\n",
171 | "2.0 33\n",
172 | "3.0 36\n",
173 | "Name: choice, dtype: int64"
174 | ]
175 | },
176 | "execution_count": 6,
177 | "metadata": {},
178 | "output_type": "execute_result"
179 | }
180 | ],
181 | "source": [
182 | "# Define the DDM parameters as an object to pass\n",
183 | "p={'vd':np.asarray([.7]*4), 'vi':np.asarray([.25]*4), 'a':.25, 'tr':.3, 'xb':.00005}\n",
184 | "\n",
185 | "# Learning rates on the Go (direct) and NoGo (indirect) pathways\n",
186 | "aGo=.1\n",
187 | "aNo=.1\n",
188 | "\n",
189 | "# Run one simulation\n",
190 | "igtData = pd.read_csv(\"https://github.com/CoAxLab/AdaptiveDecisionMaking_2018/blob/master/data/IGTCards.csv?raw=true\")\n",
191 | "\n",
192 | "outdf, agentdf = believer_skeptic.play_IGT(p, feedback=igtData, beta=.09, nblocks=2, \n",
193 | " alphaGo=aGo, alphaNo=aNo, singleProcess=0)\n",
194 | "\n",
195 | "print(agentdf.rt.mean())\n",
196 | "agentdf.iloc[:, :].choice.value_counts().sort_index()"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "The Iowa Gambling task has two general metrics for estimating performance of the agent.\n",
204 | "\n",
205 | "
\n",
206 | "\n",
207 | "**Payoff (P)** is the degree to which the agent chooses the High Value decks over the Low Value decks. Th\\\n",
208 | "is is a measure of efficient value-based decision-making.\n",
209 | "\n",
210 | "P = $\\Sigma (C + D) - \\Sigma (A + B)$\n",
211 | "\n",
212 | "**Sensitivity (Q)** is the sensitivity of the agent to High Frequency rewards over Low Frequency rewards.\n",
213 | "\n",
214 | "Q = $\\Sigma (B + D) - \\Sigma (A + C)$\n",
215 | "\n",
216 | "(In the simulations above Deck A is choice 0, Deck B is choice 1, Deck C is choice 2, and Deck D is choice 3)."
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "metadata": {
222 | "slideshow": {
223 | "slide_type": "slide"
224 | }
225 | },
226 | "source": [
227 | "**Q3:** From the agent dataframe (agentdf) run in the code cell above, calculate P & Q."
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 7,
233 | "metadata": {
234 | "scrolled": true,
235 | "slideshow": {
236 | "slide_type": "fragment"
237 | }
238 | },
239 | "outputs": [],
240 | "source": [
241 | "# CODE FOR ANSWERING Q3"
242 | ]
243 | },
244 | {
245 | "cell_type": "markdown",
246 | "metadata": {},
247 | "source": [
248 | "## ** Question 4: **"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 8,
254 | "metadata": {},
255 | "outputs": [],
256 | "source": [
257 | "# Learning rates on the Go (direct) and NoGo (indirect) pathways\n",
258 | "aGo=.1\n",
259 | "aNo=.1\n",
260 | "\n",
261 | "outdf, agentdf = believer_skeptic.play_IGT(p, feedback=igtData, beta=.09, nblocks=2, \n",
262 | " alphaGo=aGo, alphaNo=aNo, singleProcess=0)\n",
263 | "\n",
264 | "## INSERT CALCULATION CODE FOR PAYOFF & SENSITIVITY FROM QUESTION 3 HERE\n",
265 | "## TO ANSWER THE QUESTIONS BELOW"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "(To answer the questions below, you may need to repeate several runs of the code above in order to see stability in Payoff & Sensitivity scores). "
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "**4a:** Change $\\alpha^N$ (i.e., aNo) above to 0.025, while keeping $\\alpha^G$ (i.e., aGo) at 0.1. How does this impact the Payoff and Sensitivity scores?\n",
280 | "* **Answer 4a:** \n",
281 | "\n",
282 | "\n",
283 | "**4b:** Put $\\alpha^N$ (i.e., aNo) back to 0.1, while reducing $\\alpha^G$ (i.e., aGo) at 0.05. How does this impact the Payoff and Sensitivity scores?\n",
284 | "* **Answer 4b:** \n"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {
290 | "slideshow": {
291 | "slide_type": "slide"
292 | }
293 | },
294 | "source": [
295 | "## **Bonus Problems** \n",
296 | "\n",
297 | "Full credit is only given if the instructor can run your modified code below.\n",
298 | "\n",
299 | "**BP a:** Use the process simulation code below to visualize how varying the drift rate of the Go ($v_d$) and NoGo ($v_i$) processes impacts the dynamics of the four choices. \n",
300 | "\n",
301 | "* **Bonus Answer a:** *copy/paste your modified code into a code cell below* \n",
302 | "\n",
303 | "**BP b:** Write a set of nested for-loops to simulate a set of agent runs with $\\alpha^N$ values ranging from 0.025 to 0.15 (in increments of 0.005), keeping $\\alpha^G$. Simulate 100 runs per value of $\\alpha^N$ and report (or visualize) the average Payoff & Sensitivity score. Report how these values are impacted by different levels of $\\alpha^N$. \n",
304 | "* **Bonus Answer b:** *copy/paste your modified code into a code cell below* \n",
305 | "\n",
306 | "\n",
307 | "**BP c:** Repeat the simulations from Bonus Problem b above but now increase $v_i$ to 0.5. How does this change the results?\n",
308 | "* **Bonus Answer c:** *copy/paste your modified code into a code cell below* "
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {
314 | "slideshow": {
315 | "slide_type": "slide"
316 | }
317 | },
318 | "source": [
319 | "## ** Process Code **"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 9,
325 | "metadata": {
326 | "slideshow": {
327 | "slide_type": "fragment"
328 | }
329 | },
330 | "outputs": [
331 | {
332 | "name": "stdout",
333 | "output_type": "stream",
334 | "text": [
335 | "2\n"
336 | ]
337 | },
338 | {
339 | "data": {
340 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD6CAYAAABNu5eFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXl8G+WZ+L+W70s+Y8eOczjX5HZOICEthJS2CWdvKAtt\naWlpt3TLtnRbdvvb7QWFZbst2y6wbSlle2wpLeUulCScIRACuUgyuW/biS/JtnxLvz8evZnRWLJl\nW7Zk+/1+PvpImhnNjEaj533e50wKBAJoNBqNZnzgivcJaDQajWbk0EJfo9FoxhFa6Gs0Gs04Qgt9\njUajGUekxPsEImEYRgpQAZw0TbM73uej0Wg0Y4GEFfqIwD+yYcOGeJ+HRqPRjDaSIq3Q5h2NRqMZ\nR2ihr9FoNOMILfQ1Go1mHBGVTd8wjCXAA8B84ABws2maW8Js9y/A5wE3sB34smmau4PrfgrcBHTZ\nPjLPNM3jQ/oGGo1Go4mafjV9wzAygCeBXwH5wL3AE4Zh5Di2+zRwA3AxUAy8ADxtGIY6xhLgOtM0\nc2wPLfA1Go1mBInGvLMG8JumeZ9pml2maT4I1ALrHdsVAz8wTfNwMMTyJ8AUoCIo+KsQ7V+j0Wg0\ncSIa884cYI9jmRlcbi0wzXsc21wJ1AMngVlAJnCPYRgXBpd92zTNpwZz0hqNRqMZHNFo+tmAz7HM\nB2RF+oBhGBcB9wNfMU3TDxQALwJ3A+XAd4BHDMNYOIhz1mg0Gs0giUbT9yFaup0soCXcxoZhXA/8\nN3CLaZq/Awg6fdfaNvuLYRgbgMuBXQM9aY1Goxlr7N0Ljz0GM2fCU0/BtdfCunWxP040Qn8v8GXH\nMgP4nXNDwzC+DXwVuMo0zY225WuBmaZpPmDbPANoH/AZazQazRjk/POhuTl0WbyE/kYg3TCMWxCT\nzfVAKfCcfSPDMD4D3AqsMk1zn2MffuA/DMPYA2wGPgGcD3x6SGev0Wg0YwS7wF+2DB56aHiO06/Q\nN02zwzCMdYjAvwM4CFxpmmarYRjPAq+YpnkH8C0gF3jLMAz7LlaYprnJMIx/AB5EbPomcIVpmqdi\n+3U0Go1m9NHusHmsXw+uYUqdTUrUdomGYUwjWHCtoqIi3qej0Wg0w8a2bbB8ufX+0UfhIx8Z0i51\nwTWNRqNJRA4cCBX4AIsWDd/xErm0skaj0Yx5nnlGnpOS4I474NAhmDFj+I6nhb5Go9HEke3BOgXv\nvgtz5w7/8bR5R6PRaOLIjh2QkQGzZo3M8bTQ12g0mjjR1SUa/oIFkDJCdhct9DUajSZOmCZ0dsLi\nxSN3TC30NRqNJk4oe35V1cgdUwt9jUajiRM7dsizFvoajUYzDlBCfzjj8p1ooa/RaDRx4Nln4W9/\ng8pKyMsbueNqoa/RaDQjTE8PfOxj8nrVqpE9thb6Go1GM8IcPAitrTBnDjzwQP/bxxIt9DUajWaE\nUbb8z38esrNH9tha6Gs0Gs0IE49QTYWuvaPRaDSD4Lnn4Nvfhu7u0OVZWXDVVfCHP4DfL8u++EW4\n8EJLs/cFu45roa/RaDSjhP/5H9i6FXJypEImiIPW54PXXpP3ubnSEevHP4aTJ63lAJMmQVHRyJ+3\nNu9oNBrNINixQ4S212s99u611k+ZIstWrZJyC2+8Efr5kSy9YEcLfY1Goxkgzc1S976qytLyASZP\ntl4r083ixTIDeO650H3Ew7QDWuhrNJpxSGcn1NaC6hYbCFj2975ob4fqanjpJXnvFNz2AUA1QrFv\nc+ml1mst9DUajWYECARg4UKYOBFuv12W/fM/Q2oq1NdH/lxbm2TPlpfDFVfIsnCCW5VUmDJFnu1m\nHPv2CxYM/jsMBe3I1Wg044pjx2D/fnn9/PNw553yAHG0Xnll+M/t2gU1NTBvngj2vDz40Id6b/f4\n45Jw9aUvyfvly+G22+DMGbj5Zml4/uqrI9MlKxxa6Gs0mnGFSowCaWDS2Wm937cvstBXn7v1Vvjc\n5yLvf9o0axABcLng7rut9zNmwAUXDPi0Y4Y272g0mnGFEt4TJ0JHhxQ+c67r63PxssXHCq3pazSa\nMc1TT4lZRvHMM/L8qU/BXXfBPfdY67a/7OX3dzXRnlNMijuLD31I4vBBhL7LFT9bfKxICij3dYJh\nGMY04MiGDRuoqKiI9+loNJpRyPbtsGRJ7+Xl5fCnP8HKlday5GS4uOcFMmmjjmK2sJI77oBvfUsi\ne/LzoaIC9uwZufMfAkmRVmhNX6PRjFm2bpXnL34xtITx0qXiSH32Wairg8JCWDi7g4P/3QaAr8vD\n5T+1Pn/0qMTmj3bTDmihr9FoxjDKDn/jjRJF4+SDH7S9Oetl8hp5GQh0UfE7Hzt2ZIXsJ15ZtLEk\nKqFvGMYS4AFgPnAAuNk0zS1htvsX4POAG9gOfNk0zd3Bde8DfgxUAm8DnzVNc38svoRGo9GEQ9nh\n58+PYmOvV57z8kjyeLhgnpdHX83C6x07TlyIInrHMIwM4EngV0A+cC/whGEYOY7tPg3cAFwMFAMv\nAE8bhuEyDKMU+DPwLaAguO4xwzAi2p00Go2mP+6/H66+Gp54Qt5v3y4dqb7xDfjP/5R4eMOAzMwo\ndubxyHOwlsIHi7aSTQtXXw0PPiirRkTonzghdqXq6mHZfTSa/hrAb5rmfcH3DxqGcSuwHnjEtl0x\n8APTNA8DGIbxE+B7QAVwGbDdNM0ng+u+D3wVWAG8GYsvotFoxhd+P3z969KB6tgxia//6U/h0UdD\nt1u/Psoder2QkiLlL3fvZulSmPbEcTZtmgeIaWfixNh+h7Ds2SPJA2lpUFYW891HI/TnAE5/tRlc\nbi0wzXsc21wJ1AMnnfswTbPHMIxDweVa6Gs0mgFz6JAIfBA52dXVO87+mmtCQzIj4vdDSwsUFIiw\n/cAHWMJzvLXRS3vQjp+bG1pbZ1hobxeBX1w8bNOKaJKzsgGfY5kPyIr0AcMwLgLuB75imqZ/MPvQ\naDSavlDdp0Dk5LvvSqkEO+99b5Q7a26Wojxut7xPS4OsLDI6POTnS7hmcnJMTrtvlF+hsHDYDhGN\n0PcBTotYFtASbmPDMK4HngJuMU3zd4PZh0aj0fSH0uqvukqeH3lEMmzVexhAtI2y5yuhr153dor2\nPVLYnMnDRTTmnb3Alx3LDOB3zg0Nw/g2Yqu/yjTNjY59fMy2XTIwk95mI41Go4mKHTtgGW/xjXk1\nnHi8jG131vEBAnxw1SoOPX6MqRyjislAGDNJTw+8/LJlH1JJqnahn5cnqbzbt0tPxFWrJBQoHPb9\nJSdLcZ2CAtnvli2iuRtG31/o+HGrC4v9PGJMNEJ/I5BuGMYtiMnmeqAUCGkJYBjGZ4BbgVWmae5z\n7OMx4C7DMD6MzAK+hdj63xna6Ws0mvHKzu1+LiqoYcXyAJ9YfZq6OonSuWrlGaZ99RRNZwJkNZ2G\nwKLexniPR2z4mZlWaE9mZqiGrQTv2bPWZwoKwp+M2l9qqjgXamtlW59Psr98vv6FvorWmTRJGu0O\nE/0KfdM0OwzDWIcI/DuAg8CVpmm2GobxLPCKaZp3III8F3jLCP1yK0zT3GsYxlVInP6vkRj+D5um\nmZg1IDQaTULT0ABNJ5uZvjRAaqqEaJ7DVUvZJV3yurtbCuE7hagyo8yZI7UVwuHUtpubIwt9tb9Z\ns8SrrN6rZ59PziWlD5Hr9UJGhqQLDyNRJWeZprkTWBVm+Trb69n97GMTYedZGo1GMzB27IA8PFRW\nIoK0u1tWuFwyImBb7vH0FvrhbPhOIn0mHEq4T5gggltta/+M1xvZQat8ByUlkY8RI3QZBo1GM2rY\ntk26Wz35JLjxMm0aoqkfPSob5OdbQl8tr63trWE3NsoAkZND1DQ0WKYeRXq6PI4ft/bndkvHlPp6\nazCAyELf6xUTEAyrA1ehhb5GoxkVvP12aP2clXiZMTNJSmYePSqauV3oT5kiy0+ckIeTvLzIjllF\nYaG1P69XnLJOUlOtcE+XS/Z75gxs3iy+hKQkWR9uptDSYjXcVec0zGihr9FoRgWvvy7P114rNe0v\naPJSYWRDURGsWCECPylJzCvKKbt8uQjWcERjSlmxQrTw7GwR5HaamiS6pyvoP1DJVNOnw8mT4ksI\nBCTRqqEhVOtXNDbKc1mZbFda2v85DREt9DUazahAxeV/85uwaKYPNnSBe4IstNdHmDHDej3UMgZp\naTKTgN5aeH291Z2lstJan5YmTXTfeEPe5+eLzV4lgNkjidRAMH36sCZk2dHtEjUazahgxw6xpMyZ\ngyUshzGevV+ciVx9rcvLk1h+lRegiMP30Jq+RqMZVk6cgHvvDW1AnpwMN90kjUwUf/wjvPYa/N3f\nieL74IOS4/Too6Ig79wJ8+aJIj0Smav9kppqvXYK7YyM0HUdHfJ6714xPeXkSAd1j0dMRykpMiAc\nPWolipWViekqxmihr9FohpV77w1f9Ky6Gn7/e3nd3Q3XXy+ycdcu8ck+9VTvz5yrpZMImj6IX+DM\nGanG5kQ5gXNypKAbhDbrdbvFH1BcLO8PHAh1OHd0aKGv0WhGH+8E8+5ff91Kfn3ve0MLppmmpQxv\n327JSICZM0Xbd7lsMwOPR1R+u0YdD1askJMNV41t5UpZl5QkM5L3vU+E/MmTUiJUCXg1cHk8sp/V\nq+X9QMJJB4AW+hqNZtgIBMQWX1kpphrFokUS0ejziVZvL4msIiQVy5c7qgx3d8sHlYYcT1yuyGGf\nznWq5MOECb2Fvirt7HYP++xFO3I1Gs2wUV0tEY/OapeLF4uc271b3iuhv3Zt7330qpSZKKadwaLO\nW9nu8/JE4Pv9Ok5fo9GMTm6+WeqLqTJc5zT1d9+F2lo+mAa7cHHD5YsonFlIWetBFtDGDTcsZMOG\n0H1NmRJ80dUlKbnKgRpPJ+5QUFm8HR1WYpdK0BqBgUwLfY1GE1POnoUHHpDXd9whz1VViCYbLJdw\n3tJU5k/poKvxFJtfL+Ry9nLeBLhy3RyWLUvF45Fet2++CVdcEdzxsWOhZRBGq6YPMHWqlG6oqJAy\nEQpde0ej0Yw27Pb5F1+U56oqLBPGlClM+OACfpb3LFtMLxd9XTy4lZWQ7/Ly1lsRIlbsMZ8DrZuT\naNinQeqCXXzxsJZUVmibvkajiSl2of/886KQT5tGqC0+ORmys5k90UseUpOmspLwpQoUzc3W6+zs\n/uvmjBa8Xvku2dkjcrgxctU0Gk1fdHaGhkUOio4O0dbD7ETJ6u5u+POjfrJpIQk/yXRTtShAUk93\n74SqvDwKcruZiMSuV1YioTstLdbDrt3bB4SxIvADAfleubkj9p20eUejGQdcfTU8+yxceqlo3wPG\n45F2gCBx52vWnNNMX3xR3j78MGzcCIEtW1hDPa1kk00rHyzKgGfbg6m0WLZ4t5ukpFOUcxoIzgZO\nn5aHwuWSkB6XK7RX7Wi259tRJi9dhkGj0cQKv9+S16+80n8Dp7DU18tzdraUC2hoOCf0/+u/ZNXt\nt0Nqsp95NHDb1+Hs2Vbq6uCqy4LCurNTbNbq4EFB97P/7GL/4RTKL1sYGqTf3CzvGxutz0yfLhEv\n06cP8AskKLr2jkajiTWHDll1vtrbJdvfXvMmKpRwmjlTjPY2U4uqLOzxQFJzM0sXB6xyCU7CFCmb\nMQNmrHBLJIu9deHZs1K/3uOxwjQLCqyql2OBONQQGiOGMY1GEwlV7mDy5ND3A0KVCFClim1Cf98+\neW5ulm5WlZVY9Rac2IVbRobEq0N4TVct83oTo8DacKA1fY1GE2tUNM0NN8APfgBvvSWmZJcLPvWp\nfkw9jY3yCJYIONuUyqvPZxPo8ND41GH8fug5AJXBzUupFctLRYVMKZw4C5O53aLRhxN66ekyMDQ2\nyoCTnDwiIY0jiscjA6S9Yucwo4W+RjPGcQr9H/3IWldUJE7eiGzdakXrFBRw552w6acFVHASeBeA\neUBmBrQFTfez5iRHFvpOTb2wUIR+QUH44+fnW5Upi4pCG5CMdjo65DEC3bLsaKGv0Yxxtm8Xq8zs\n2VLS4Phxa922bX0I/fZ2EUqFhWLLLypi2zZ4lwV8/5flpCRL7ZjkZCk2+c47ItPLV2dL4tQll4i2\n3toqTt/29t6a+syZcnLhShODZHVNmSKhjZEGhtFKnGoIaaGv0YxhGhqkku+6dfK+qipU6NsTqXqh\nhNKECVBaeq5i5nQjlctu7K2dXmw4FqhkI6Xdh8ugdbkiC3yQMM8R1oRHjDj5KbQjV6MZwyihrgqe\n2UsUT5zYj1PXI5myShM9dkwWhZQ51gwex/UdKbTQ12jGKAcPioUFrPLE9mjHqiop6f6978l2Dz3k\n2IHD/OAcQDRDxOsVL/oIO6e10Ndoxii//a31WsXNf/Sjktf08MOW8P5//w82bYLvf9+xA4dQUkK/\nV317zcBRTVNyc0fcOa1t+hrNGEUJ6RMnrPB61bQJrP60ikOHJNY+Nxfo6REHrM15qkxBWtOPAc3N\n4pyOQ96B1vQ1mjHKjh3SUXDSpPDrwwnvXbuCL8IIpR07JGpyLCXExo04dv+KStM3DGMJ8AAwHzgA\n3Gya5pY+tv8J0GWa5tdty34K3AR02TadZ5rmcefnNRrNwGlvtzrwNTfD4cNSqyyS9WD2bOv1Rz8q\nzcfffBNWraKXUPJ6+9+fZgDEyYkLUWj6hmFkAE8CvwLygXuBJwzD6BV/ZRhGkWEYDwFfCbOrJcB1\npmnm2B5a4Gs0MeDuuyWxMytLHirKsS9TjD0T94Yb5PnWW4O+AIdQUjMAbdqJEQmu6a8B/KZp3hd8\n/6BhGLcC64FHHNu+CrwG/Mm+0DAMF1AFDKbqh0aj6YcnnpCQ9/e/31qWng6f/Wzfn3vqKSnLcNll\ncNVV8Pjj8PTTcN00r6j0wRh6bc+PMV6v5DEkJ4/4oaMR+nOAPY5lZnC5k7WmaZ4Oavt2ZgGZwD2G\nYVwInAS+bZrmUwM8X41G48DvF3v7nDlSM38gXHaZPAAee0yqHmx/J9jYIyfnnFDS4ZoxpK1NSpNO\nmBCXw0fjyM0GfI5lPqBXcKlpmqedy4IUAC8CdwPlwHeARwzDWBj1mWo0mrAcOSLRf0MNpUxKgkWL\n4ITpo6O1O8T0sGOH1AQbcElmTW/iaM+H6DR9H6Kl28kCWqI9SNDpu9a26C+GYWwALgd2hf+URqOJ\nhkFp4YGACJ/8fPD55HVBAVVVGRx81cszz0Dleje5eVJTZ9cumDfPan6lGSBer9VAJo72fIhO6O8F\nvuxYZgC/i/YghmGsBWaapvmAbXEG0B7hIxqNJkoGJfT37pXA/GXLpBqm1wtFRSxfvoq/4eXBX8Gb\nD+XRmGI1SVm2LOanPj5oaYGXXpL42ZUr494bIBqhvxFINwzjFuB+4HqgFHhuAMfxA/9hGMYeYDPw\nCeB84NMDOluNRtOLQTlZjx2T59OnLSHU1MS11wQoOuzl3Q3w/GY3XV1iNvr85+FDH4rpaY8flDmn\nrk6evV6xlWVkxOV0+rXpm6bZAawDrgUagFuAK03TbDUM41nDMG6PYh+bgH8AHgS8wNeBK0zTPDWU\nk9doNKLpl5ZKAbWoUcH29p60PT2kd7dyxXs9rF2fTifS1Wr9evjiFwe4f41Fc7P1uqtLMp3j2AEs\nquQs0zR3AqvCLF8XYftPh1n2S+CXAzw/jUbTB01NorTbQzX7padHuqOD1SDF7RYNtL4e2tqorLIi\nS3StnSGiNH2A6mp5jpM9H3TtHY0m5vh8cOedUrnyt78VGQsSR/+FL8B554Vu/4tfgGHArFnw3e9K\nRN/s2VBZKVGTr70mcuPKKyVr1r4/FU1TVYUIbJ/Paobb0QH798sHsrPlAIGABOar1F1FRQXs2SOl\nOYGiSkso6TDNQXL2LJw6Je0eFaqbmBb6Gs3Y4TvfkQzZXlUrgdpaSYhSnD0LN90kr++8E+67r/dn\nFH2tW7wY2LxZ3kycKDbjEyfg6FFro/JyqdVw5oy8z8uT0SQtTYT+/v0yaABJE4q5/HJ48UWYMaO/\nb6wJy549oZE6zc1yfZOSpBtZnNBCX6OJMfbWsLm54mhNSoLVq3t3qrK/f+stef7MZ+BXvwq/b+f+\nTgczY6oW+uFocKPmZhEqyqwwebIMAF6vTCMA5s+XGsttbTJApKTApZdCZ6ckZKWn85e/SOJXHJJG\nRz+qdHJeHixfLjUyurrErJaaOqKN0J3oKpsaTYxRQRogyU7Tp4upZvFiaV1YX2+tt3eu+tOfREZ8\n4hOR9+3cn8IotzkLlbBXUSKqzKbHY2meJSXynJlpFeFRtfPTxYGbnBxX2TS6aWkRwZ+fL9c0KUlm\nVFlZcb+oWuhrNDEkEICdO633dtOIso3btXun5r9oUXjHqWo3a9/f/PnW6xSf13rj9Vr18HNzLfux\n1yuCPznZ2qFmeIhz1m1faPOORhNDVB9Zha0HyTmhf/XVYl7fuLG30F+82N4HPMBKXiebVtpKF/Da\n4bKQ/anSLVlZWBo8yHSiutqqh5+eLo8zZ2RZfr6ujzxU6uslVTk1VWpRO7X3OGfd9oUW+hpNDFHm\nmmuukaCNb33LWnfppfCe90girGlKNcu9e0XQl5SITP7kJ2XbH/0Iag80M9GsZ/FiKF92iq88XBay\nv5tukkTPb30LS8hMm2bZlzIzLdPO9Oli109Kkm00Q6O6WvwhbW3yQytzmUILfY1mfKA09xtugHWO\nLJbCQnj5ZQnBXL1a2hV2d8MFF/SOzLn1VuCkF94JLsj28te/hm6Tn2+LBPqrR0w2CyPUMJw5Ux6a\n2OB1mNOcQt8T/D1SEk/Eapu+RhNDoqmDs2iRPG/a1M+2yk7kcol9XiVUOVGlehNQqxyzeL3yu0Co\nPQ8kLDaBfw8t9DWaGKL60qpG5OHIzRVriyKs0O/ulsQekBh6sLI5nai4zTim9o8r1CBbWiqavMcj\nqdFNTTIYNDXJdgkq9BNv7qHRjFIG0kd2yRLZFiJYZLZulYza7GzpRn78uDgMsrNDE3tqayUJCBJW\nyIw5lGaflyd5DfX18Mor1nrl1E3Q30MLfY0mRqhQzWhq1XzvexJ+uWSJlFoIIRCwUverqsR4f/So\nLGtoCBX6Kui/pKS3XVkzPNidtCUl1owsEJCRXNWiTtCZlxb6Gk2MGEhd+7lz4a67IqxsbZU4+8mT\nRcsHWLoUNmwIdSCC9X7pUh2GOVLY6+FnZIQK91OnZIaWmirRUwmItulrNDEiZs3DwyX2qMzZcEI/\nMzPuWZ7jCq9XsmvD1cNXSW8JKvBBa/oazaDo6oKf/czy2QG88ILI3jlzggva2iQ2PidHIj28Xomb\n7ysbtr3dshPZhX5SkrxvbJQgfxBzQkeHLnQ/khw/LjOx4uK+t0vgWZcW+hrNIHj66WAsvYNVq2x9\nZE1ThL4dr1cKcEXi4EErNNNpEy4sFJv+/v2hy+1puprho6PDsuFFuuZTp8pvlMAJcFroazSD4O23\n5flHPxJnrELF4AO947cjLQu3/uKLe5tsDEPCBO218JOSxNGrGX7UbzNhgjQ8CEdFhQwICVzbSAt9\njWYQKIXv2msjWFdUaV0nPp9o8uEyNQMBmQnk5srDicsV1zrs4x57qQtXH+7QBBb4oB25Gs2g6Lcv\nrSqtaxfS6rXTGatQA0KCxnePexK4ns5A0Jq+RhOGb35TZvHPPy++1SuvhD/8Qczxd9wRRV9aJSDK\ny63m4xUV8trjsQYA05QEK7Bs+aNcqIxZPB6r58AoRgt9jcZBfX3vGPqXX5bnrVul9y30k4Rlz9qc\nNk0EunL+qQEhEBDHbSBgmQvS03WSVSKi+hOMAae5FvoajQNnjXsnDz8sz33G49tNAUqr9/ut0E2w\nTEBTpuju44lOc7MMzmNgFqZt+hqNg/6E/pNPynO/Qj8rK9Rh63KJg9brtZy2MCYEyZjHnoU7ytFC\nX6MJ0tgo5vU33wy//qqrrNfp6WDM6JbYbefD65VCXOEEhNst2n1jo1VfRwv9xCeB2x8OFG3eGWvs\n2AE1NbBmjaSITp9uSxHV2LngArGsPPII/PGP8PGPh99u7lzpcHXRRXJJW1vh/LleUl54OTRm3kk4\nAaGWvfZa39tpEguvV3IiwoXSjjK00B9rHD8uzydOiPPpwAEt9MNw9iy88Qbs3i2K9/PPy/LLLpMw\n67Vrpevg2rUSxfPAA/ClL4l8/tvf4OYP1IvALywMX4MlOVkKpjmZNMlqXA6SWKXr5iQ2yhSXnS2/\n6yhHC/2xhCrpCla5V5CbNoFrgcQDZbdvbZWetTt2SPmExx4LL4NVNM9nPysPdnjhOJKCOxDtLz09\nutrLmsShrU2ir8aAPR+iFPqGYSwBHgDmAweAm03T3NLH9j8BukzT/Lpt2fuAHwOVwNvAZ03T3B9h\nF5rB0Nxsvban+7e2hinaPr5RFTEBtm2DXbtg3rwBKN0ejzhm9XUd+4wxh3u/jlzDMDKAJ4FfAfnA\nvcAThmH0utsNwygyDOMh4CuO5aXAn4FvAQXAC8BjhmFo9TNW9PSI4Tkc+/aJDUNzDnuEzqOPSgJW\n1FGTgYAMsLm5egY1HhhDTlyILnpnDeA3TfM+0zS7TNN8EKgF1ofZ9lWgG/iTY/mHge2maT5pmmYn\n8H2gHFgx+FPXhHD0qJX5qVACqbo61NyjOWfOASsEM2qri4qvHyPTfU0/jDdNH5gD7HEsM4PLnaw1\nTfNzgLPSVMg+TNPsAQ5F2IdmMKjC7lVVkjK6ZIl4IQ0jdL2Gjg6ZFC1bJlUSOjtledSa/hgTApp+\n6KtpyigkGqGfDfgcy3xArwIUpmmeHuo+NIPE4xGD9JQpEmVQUSHde2bMEI0/UpGvcciePeKXq6oK\nFfRRC/0xNt3X9EFXlxTCG0OzumiEvg9w9v7Korc2P9z70ERC1QUJJ4SSk2UQUFmgmpBetkrQV1QM\noGqx1vTHDyo4Ygz91tFE7+wFvuxYZgC/G8Bx9gIfO/dhw0gGZtLbbKQZKH4/bAkGUkW6MfPyxA79\n9tvjuoF2TQ3ceaclsxcvtupnDSiKUvelHb2cOWPlsoD8F2bPtsJuT56UZ49H7IDp6fJ+nAn9jUC6\nYRi3APcneEzIAAAgAElEQVQD1wOlwHMDOM5jwF2GYXwYeAqJ4jkJvDOw09X0or7ecuBOmBB+m5IS\nceSePg2zZo2pG3ggfO5z0uYQ5L++cKFYwwoK4IorotyJKrWg+9KOTkyzt38rNVXyLQIBeMchkrq6\nZP0Y+s/0a94xTbMDWAdcCzQAtwBXmqbZahjGs4Zh3B7FPmqAq4B/BeqB9wEfNk1T2xuGil1tLS0N\nv01FhdQSgP7b9Y1h9u2zXs+cKVYvVe7+85+PcifatDN6sYfafuADcOmlof4un9PtiMwMxlg+RlTJ\nWaZp7gRWhVm+LsL2nw6zbBOg68fGGiXE+zNI99e1aRxgT1gedFKsduKOXlpbxf+Vl2fF6+bk9K56\nqggEROjPm9d3e8RRxtj5JuMVrze6bj5KSI1Tod/SIuWIFAsXDnJHWtMfvYQrj+x2y0Dg8/WeBefk\niKag7PpjBF17ZzSjmm/n5/fvnE1JEXtGXZ1l2Ha5pP9fJF/AGKCnBy68UHzY9uCl8vJB7jDaQVaT\nGHR2Stuzjg7rBrAP2Hl54u/yeq1BoadHKvEVF8v73bslZTs9XXxoSUniA6iogP37pahhSQmsGB25\nplroj2ZUN59oY4hnzrQiF3p65Cavrh7TQv/QIammWVoqeWrXXAObN8O11w5iZz09MsgWFIzbCKhR\nR0ODFEzLyhKhnZ4e2vJQDQAej/wf0tNl24oKKCqSkOfSUmtASE0V7b+6WrY5dUqUr5oaSf5ISXyR\nmvhnqInMQE0NU6bIA+RGfeaZMW/uUYXVbrsNvvY1ef3FLw5yZ2OoZd64QZlsFi4M33tY/Zb19TI4\nlJSIht/YKLPgsjJRlFRyx5QpEtapymO3tlr7am4eFT10tU1/NDMUp6Kzdd8YRf1XY1LNeAy1zBs3\n9KcYpadLeQUV9ux29/5f2T/rdsvD55PPBAJWjf1RokBpoT9a6ey0TDWD1TyVE+v06VCNZQzg88Gm\nTWLOhRj0He/pgSNH5LXW9EcPymTTV90cp1B3+m3s/RKU0AcpcghWsxyPRwaBxkbxnalHR0fMvk4s\n0Oad0crWrSKIcnIG380nL0+mqm+/LZr/+98/ZrJMb7sN/vu/5fXkyZZPbtDs2WNpcmOgZd64QNXN\n6c9nlZcnoZkgv60zOML+/8rJsWZ6NTXyXFEBx47J/VFbK/9NO2639NpMELTQH434/VZW4ZIlg9/P\n5MlWLHJdndy0RUWxOcc4s3mzKHe33y7tgoeMmv6vWDEmWuaNC6KtmzN9umj2GRnyfwgXHHHRRTKI\nuFxi558/Xxy3GRlix1fx/vX1sv3UqbJORQYlkJM3Mc5CMzBUPfcpU0QjGSypqVKFMyNDhL7HMyaE\nfmcnvPuujIff/nYMdmgPjdXlF0YP0fq80tIksg2sZA7nZ+zvXS4ZKJzrm5st7X/uXCvSp6VFBH/U\nFf2GF23TH43EOkFIaTWjxBHVH/v2yX9tyHZ8hRpktS1/dDEYx/tg/1vqGD6f+AKUmTQB/1ta0x9t\n+HxWUahYRZFkZ4v2kkA35mDw++FnP5PIO4iB0A8E4OBBccyBjtoZbXi9cl9nZ/ded+qUzAQmThTz\nZk+PLFea+kCFvtMZ7HydQDWvtNAfbRw8aL2OleaZlGRFLfj9o7bOyP/+L3zF1p15yGGaZ8+GVmlL\nkOm5JgpULZ3c3N73c0+PBC+AZO85yc8fuN8mP1+O4/eHmkhzchJOodJCf7Shbp73vS+2jiG3W5zD\nLS2j1oyxf3/o+0WLhrhDpZ0tWCBZmbr0wuihL5OccvDaWbbMmhGEmxn0R2qqtCft6gqtyKkqdKrE\nvgTI5NZCfzShtBe3W5p4xBJ7QbZRKvTtk6Dp02MQWakGWC3wRx992ebDad0TJw59hpuRET4fQM2i\nW1sTokTz6JzHj1dUadjhEMoJ6HAaKCr7FmIUhOT1iganBf7ooy8nrlpnv0mG06SZYP8tremPJoaz\nDMAIl17u7ISrroLDh0Wm/va3MC/jsJXlqEhJsabegQAtL77Fv/xDM62t8PXbUzGuWwHHj+Nr7mH/\n/rnnPjYt7TRsMmV2VFZmNZFRHDhghee5XFIzHSTJJitLkmxaW8dECOuI0NgIO3fKIHn++eFt4keP\nisnM7mGvqxO7+rJlkgD36qtS0bKyEi65RO71ffska1y1Npw0KXS/27bJfpOTpcZOYWHkcM1jxySz\nOilJflsVVz+cqHPYtk1mAnH2DWmhP5oYzlruKu18hKIM3nkH/vpXCZHu7IQ//xnmrTwioTeqwYXf\nL4K3tlbsNS0t7NlYw/5dyfhxseXZVoy1NWCanDCBgMEHPuCivh7u/vtjYtd1ueRPPmdOqD31yBE5\ncFqapMmfOCGCReFyyR+0omJErseo5/Rp6/5saAifBbtrlzwbhmUGef11eT56VIR7TY0I76NHZZ9u\nt/xWfr88jh0LFfptbbKdcqKeOiVCNVIfY6VUVFTIwFJXJwPJcFJQIKUgOjqs84sj2rwzmhjurk1u\ntwjC9vbh2b8NZYq5Pdhsc/c7wZT54mIpB/H+98Pq1bJSCROvlyNHYC9z2cIFUgqnthYQuZBDCx//\nuGTBTyv0yiBWXm41yVCoPrelpVbpCecMp6xM2umpqqSavrFfv3Czxe7uvtfX1clvVFoqHvjWVrnf\n29rks2VlMttzflb9J2bMsFofqt/X+T9RSXYFBRLalZYmzRaGu7R4crI4ee2tGeOIFvqjCa9XNCSl\nCceaEbQ9KqF/+eXyHzy0PUzKfHa2/GHUH9vj4fBh8OKmmVyOHE06F1d95Ajk4RHLQXu7DF724lj2\nGYxzxuR2izCwM0qd2XHDfn3DzRbDDQp+v7Xs7FkR+pmZMutUhcvsv1VenkTHqEQM+74KCqxBIdKM\nOJ5JdsnJoa0Z44gW+qOB7m75I7W3D+8NO4J2/e3b5X8wfz4sX9CO53AddXVyDs3NYusPkCR/FI+H\nznY/J971cvgwJOe7WXdZMscacvDsPsHp0wEOHIACl5f587GKZ+Xlhf9OfZXOVehELIvubtG8W1vl\nHgwErPetrSKEu7okAiYlRa6vWqcEXHBGBli/RV1d6HFaW60GJllZEkJ8+rQI6pwc+Z1UAxNVudLu\n58rLk3M9dcpaBjLTCwTi39/Y7Zbzsw9acUDb9BMdvx82brRu8uEURiOUPej3i89vzhzIaD7Lxwu2\n8DjwmRvhF/vdLFwqit+//iv829Im2LqVz/xkBWc3eukhiwVrUlm8GHY/3civ7qrl1eZSOshgYaWX\njOaz1jRCaYcQXtNU68IJAa3pC877D6wQRIWyz+flyQyroUE+A+KLyckJjaf1eOTxxhuhx2prE3t3\ncbEIfZ8PXnxRYtxTUsQZv327DOqTJsHFF8t5pKXJObjdIvDt9XN8PtiwQez2ysQUT6GvCrDFMSJM\nC/1Ep6XFsk8WFkr1vuEiK0v+XMOs6R85Il+rqgqoq+PyK+DnT06kIZDPw39xc/asbLdpE/zbpFp6\n/FD/2j5K8zqYs2Yil3xDKkKbdOFtBk9WOR+8uIOLV3ks7bGwULogKYes07yTkmLlOpSXi2BJSpLP\n+Hxjrhn2oFH3X26uFdXk9co0bdIkEWLKB+R2i8BWQvfkSYmOUeunTJH9NTZy7kfOzpaIm6NHRcMv\nKJD9lJTIsZQvxu+XQSEQsMxCKsJK1c2ePFmO1dMj+83Othy3+/db28VL6CslQ5V/iBNa6Cc6SlhN\nnQrTpg3/8dxu+VP29AxbCWGliFdVAV4vE0vhn36/mI9ck8rDD4duF/h8Nk2NUNpxjKqVRfzjD91g\nyH/4UQJ0kM7UpcV87TvNMu1XZoTzzrNir91u0Q5VaVxnn9uUFLEzaXqjFIBp00RQ/e1v8j4vT37A\n1lYr7DEvTwZSFZ2i7OuqrHBVlTQZb2iQAQGkVLXKoqupkd8qN1cG3fJy+U2VkD57VvajWhKqfaj1\n6emSPW3Haf/Pzo5fieMRDouOhLbpJzrDGaYZjrw80abCparHCNW3dvFizoXWVS2X0Lrdu2XdpEng\n83RytjWL2lqYwFkqKzmnLc0ob6OLVHxkUVXptbSo5ubeoXp2s5Xuczsw7PefPds0nD/EmSXudlth\nt87tm5utEgXO46jlSqNXppDmZhHamZmh92hfJk+7gFXO/XihGrPHWehrTT/RqKmRm7i2Vm56NQ0e\nqZtVHefIETmPjAzRuGLEwYPwve/J66o5HbCjHUpLqayU/7kKorn+evj5D708+lgyOYfSceNlZmnz\nObtockMDXaTSSAFzC2vAH6HKof39sWOW9q8dtaHU14swKimR1y6XXCNVkMx5TZWA76tOjdst5jal\n6R8+LMdQlSyV4zU1NbxzXQl1u/27qEhmoA0NMlNT5iA77e1WYp3TPxXvwd7tlv90V1fcutRpoZ9I\nVFfDW2/1Xp6bO3JTUtWU5eRJa/qsIidiwBe/KM9Tp0JppqXduVySyLlhgyTHXnop/OGHXp79K0wn\nhxJXBzM4JH/oYIXEFSvgla2TmDWzI7Tampr+O9/bk6+G0nxmrOH3S5JUpFDCnBzr/lN2fGXCUde2\nrKz35wIBEfQgn+/uFrPhiROyrqREbO6zZvXuS1tQIMdJTw+tVzNpkvgYPB4RnosX965n89JLotW/\n972ybVKS9d2c98ZIo4R+HLvUaaGfSDi1kqVLRasZSa3U7ZaElc5O0dKOHJHzioHQDwQkEx2CwR2O\nKJrf/lZkz9Kl4pN75Odeuo9Aw6xPMrd2E1nTXTIwAkyfztf+WMpHGtxMrd0omtOKFaKhOv9MWVnw\nnvdYDsW0tPhrfIlES0vfseMXXGC9XrxYukyp65efL60Ew2n8gYBE3EybJtmvyoeyYIFo8ceOSVim\nPWlKbTN1quzz/PPPleCgpcUqsTBpktgCFy3qXTens1OelUN55kzLiTvkZslDxB5NlshC3zCMJcAD\nwHzgAHCzaZpbwmz3VeA2IBd4AviCaZqtwXVPAWuBHrW9aZrxLzmXSNizRjMze9cYGSmUFpeWJkI/\nRjbIEyfER/zRjwa7zb0dOqUvLYWrr7a2Xz7LA+XJsG4CHFkhPRDVH3nyZLLdbuZPBQ7MkBR+FX0T\nDq3ZR6avEN2KilBbvcvVdytBO6qe/aJF4atLnjkj24Tzs7hcvX9LtY8JE+S3PnXKan6i6OqyXquZ\nal5e/IW9IgGcuf06cg3DyACeBH4F5AP3Ak8YhpHj2O5yROCvASYDhcC/2zZZArzHNM0c9YjNVxhD\n2G+ERAgZjPENqqJ2zjU3UaF/4WKWlfaXm2s1eQH5UyclhQqRBPgjjWrUdQs3MA5llql+30h2/7w8\nUXRU9M9AZl+Rckrs94AaABJpVqe61MWxk1Y00TtrAL9pmveZptllmuaDQC2w3rHd9cAvTdPcb5qm\nB/g2cL1hGMmGYZQAJcDuWJ78qGTvXitj1I4ScooEaLZASorcpEO5QXfvxvP8G/z9Jxt58kkopYZV\nRaZ8X49HrkV1tYwIdq3NmTJvFz7Obkhqm337RLvbvj1y/aCmJkkK2rKld9eVRKa1Va6RXZONFeFK\nDSsGW/993z6rN0Oke1n9bipxayDCWSVk2YW8zwebN4duZ/cTJAIul9y/Hg+YZnxOIYpt5gB7HMvM\n4PK+tjOBHGASouU3A08ZhnHWMIzXDMNYObhTHsW0tMgNfuBA73VqipufLze0M944XqjU98Gkjre2\nwpEj/O7HZ9j8+6P8/Oewgq0sSNsvwl5N77dtg+PHQ8sqO7NmU1MtoeRMbMnMtATGO++ICWh3BP3i\n6FE57tmz8qdzmgcSFXWN7JmtscLjkcF9+nQRpMuXy/2XkzO4ipBdXdY93lcSkkqe6+gQwTyYvrSq\nzhJY5hy1bxCbYSIoUHaU03v//rjcf9HY9LMBn2OZD3AOn87t1OssIAN4HfgGcBC4EXjWMIw5pmnW\nDPSkRy22apG9WqepdVOmDG/W7UDJyxNNXJWqHQjBGUJ9PbjxkoSf3JygefXkSREO9qm/PdU/XJ2U\nVasi9/C96CJJHFIafqRBStVdnzxZBgCvN/4RHdHQ2irP9msUC9rb5XcoLhaBf+ml1rrKysHtU93L\n06eLEzUSxcWwfr31XxiocM7LsxSH4uLQVqKZmYnb73nWLLnucbr/orkiPsD5b88CWvrZTg0KLaZp\nPm6a5mWmab5rmmaHaZr3AScQ09H4Qd2U4YouxbsYVCSGYi8PfqapCXJpJpdmK4ijulpufPvU227e\nipSU1tef2G6TtpfyVdj9BAnWzShqfE79a4gMx33Xl4/ASVKS/KaD0cad96aqw6OUk0QU+Ap1beJw\n/0VzVfYChmOZQW+Tj3M7A2gCThuG8VHDMD7u2D4DGP7C7YmA3x9a5Q8sbV81h4hh5m1npzXjHTJD\nEPpd9V46O2HLsTKSCFDBCUpL/NZ3bm0NnT04i6KFS5lXWZrq2tlDDe3259bW0NK9EOonsH+vQCB+\n5W7t5+g8D3Wd2tutQSyWQsJ+Tw7FYWs/b+WrgeFXYOzO3K6u0MzfREedZ1PTiB86GvPORiDdMIxb\ngPsRh20p8Jxju98A9xuG8SdEi/8u8DvTNP3BSJ8fGoaxGwn5/CoyK3g+Nl8jgTlxQhxwmZmh2uf+\n/ZKIZf+Tq/rxQ+C22+Cee+T1zTfDffcNaXf88/cz8T6Syp3/4WHF38FNN8E//mOEjRsapIPJypV8\n+ituTv3aA2RwkmIq2csneISF7QFQ+Wfp6aHfV5Xo7emRUcvpWNy8WWxFpaWSQ9DTI/u4+GI5rr10\nRHU1PP00rFtnDRx2P4GKCjp7Fp5/Xn6b884b/oYadnbtkin+xIkSz/7KK3I/XHihhMqq8FQ7yr8y\nEFPbgQMS3vie91jXu7ZWrpm6/wYrLDs6pBJmd7fY0VW2rcvVd7ZuLFD/F3si4WgR+ur+O35c7uE5\nThfp8NGvpm+aZgewDrgWaABuAa40TbPVMIxnDcO4Pbjdk8BdwNPAcUTLvy247iHgJ8Bfg8uvBNap\nGP4xTU2N/LF8PhFk9kp7gYA4yiZMkIfhnFANnMceE9NsTg785S9D3h133AHbDrrZ+GQr+/f18LWv\n9bHx9u3Q2Ulg7z6efqyTwsx25p7nZtH7y7j571OYsyKXC66bITGbU6dK/PaSJWJfnzxZ9uHxhJ/1\ndHVZoX21tZbA7+iQTNuGBtGIp04VW/Tx4yLQGxqsfdj3q5patLbK7+L3h4+qGk6UgFRObZWxeuaM\nrEtJse6NiROtazRQbX/fPhkQ7TPN2lrr/ps+feD+GkVDg3X91PeZMEGE2HCbV5KS5DjqGpWWjp5O\nZy6X9X+vGVm3ZlTJWaZp7gRWhVm+zvH+XiSOP9w+7gTuHMQ5jm6cf9CSEtHUOjvlh1+5MmZ/juZm\nKZVyySWiBD35pPy3S0sHvz8AD3m88ko9brw0URDZPxbUGqurIeD1smw1fOPneTAnHV6dDOvconk7\nP1xRYdVB93qtiAa70A8XNjpzpiRsKS0vOVn+SMrp6fOJSUdFcvRV3wVG1r5qb0vp94eWiKitlUGu\nvFwahtuXq2sU7Y9qNx81N4dWwExKGvr95/xdMjNDM3iHm+nTg5l+o5BZs2SAb2wcUadzAns6xgBd\nwb6vzoqP9lKwMfyhVd/pqiorAUolRA1lf17c7DMlAgesGly9CMaQH97Vihuv/BfdbhkMvF7RrCN9\nX7tj1RmuqZZD6LVUoW+NjaHbpaSIQPP5evsJVM8ACB1U7EW/RgJ1LPV9Ghvl2iQnW98nUubrQM4z\nnHM8mt8jWpwD5WgxryQK6v8xjFVtnejaO0MkUFfPO292sWRJMAAhJ0depKay68mjzEuG5GmTrBj0\nvDzLrhrD+OGODvjhD+V1VZXl03zkEXleuza8u6CtTdwLVVXyfvt2MSeDFD8DEfogPWgBHvplD5ed\nX8eqNelWFEIw9K+9HR77TSul1ErEn+pe1NUl5ovGRglRa2mRR1KShNulpYkAUU05UlNlUDxzJtT0\nUl4uNVvAKqFsT1hSMefp6XLchgaZPnd3i3Ztjzu3l2MuLJRj7Npl2fXz88VW1tQkrzs65PxV3fhI\nBAKWKcqZ/h8IyKipZiMVFdYFz8mR72OvTw+WCUWda1ubXK8pU/qOkKmrCzVZqQG1qUlmU7EQ0F6v\nXGu/P66VI0ct9oFc/d5tbfK+oGBYMvO10B8KjY08dftm/ufncNPn4MorOVfRb/NmuPOH8KEPwY3/\nVWgJ/awscVAOxe4ShvvvF3MOiEVA+dB++Ut5/Pa38MlP9v7cjTfC//2fFDqbPVvqWzkjf5rJJUDS\nOU3/kbsOs5N9/PvdMOeWS0MyI//4R9i/P0AxdVTOCmb0VleLfV11QlqzBl591RLWc+bI8pMnxf4M\nYps/eTJ0qpKSItr9sWOW8M7Pt8pPg2UeysoSYamcywq7kHS7rVGvrEyOd+CA+BoyMuR3KiuTRK/5\n8+X86+pEOK/pI9r4zBl48015fcEFoc7hN94QB/OUKWKnnzLFEvr5+fId7WUJmpvhtdesz6t2e7t2\nyYDxhS+EP4e6OvlR7TQ0iLNYmXyGWo9IOZVLSkQ4nTgxuGSu8Uy46Lg335T3EydKEcEYo4X+UGhs\n5KWX4DhTeGhrLld+uvqc41BVk3zo2YncWFYmQqKnRwaF6dNFKMVQ6G8Jlr+7+24rmffxx0Ve3H23\nyJpwQv///k+eN2+2fM3r18P73y/LJ02CGTNcHHowl/Pme3k5O8CxRxvZ8oQktM5papKbM2hyeOZU\nFbvw89Of+Clcly/f1+sVrV4JP5WYlZ8vWmdjoxy8slL2U10t10qZOWbNEs02P1+EyvnnWwktCxeK\nkM3Pl4tgF/olJeL8VMdVLf4U6eniA3C5ZJCZNEmOPWmSnI+qAKnOWWnnLS1Wjfhw2M1NTU2hQl9l\nqmZkyB/a7ZaBoaVFBhgV9ZKZKdsoTb2iQjTBykoZFN54Q2YwkWzB6hymTrWUjFOnZPuSEvnNhlrQ\nz+4jmT1b7udwJZY1kVFlKpTQ7+mxXs+dOyyH1EJ/KHi9+P1wiBmkpeZAedI5oa/kw7tJC0I7BIH8\nyDH+c+zYIfePPbrmyivFrPPv/x7etm+PFj182LI2fepT8HFHVsWSG91w0svfXeLjWKeXLU/AkaNY\nGonXSyAAr+ybQPr0TC7/iu3DXq8IUWUSUaGIFRViymlqktEmM1Mezc1iJjhzRq7d7Nmhgs1efTE7\n28ocVa0eOztlUMjMlOe+HH3JySL8Z82S7TZvtj578qRlE/f5QkNuvd7IWq1da3Pa3+1mHVWiQEWf\nKOxtMdW+KitDNfPTp2HPHtHow1UWVZ+bOVMGwPZ2Efrq2LGo4OqMhtICf+CognT2BDPgXFehYUA7\ncodAoMnDqWoXrWRz8CAEci0bqbLmNLZnDqpszUBoaxOtO1xp8exskWfbt/fOP1IyAGS9amOo7Psh\nqGlofT0VRW20p7k5cpiQm7Xem8qphkyriqaioUE0GCUk7Y203W6xldtL62ZlWYJqIM5G5RTr6LBm\nUf1F5Hg8Mtrl5obaV+1VPe3Pqp9rX/v1ekVLT03tvV1dnRVqGg0qykYdV6G+n73ejPNzqalWxrPd\nfh+r/gwj3cpzrGKvbzUC11Rr+g727YMf/zh8Fj+IgvSv/wou/Jw90sLpVjeQRFMTfOYf3FyTL7lC\nKgIvEJDWf0pJu/pquPzy0H0+/bQopx/6kLVs2zZ44AGxzyuzbU0N/OAHcj+cd55lw/d4ZNYeVlgj\nkTyPPAI33ACrV0uC1Xe/G9qka+dOq71s2HIpqt/p739P8vTpFMyeycG97fznv9SR3P0EZS0mmVNK\nWMQO3l8KbGqQi9jVJRcjM1PMDPYSFEroKxPGpElWhI2aFQxEQNm3LSmRQcPj6bsngHL8JidbZZ4b\nG+Ucla29rMzqDzt5smjYkSJoOjvFUWsYMmDZ2wVu3SrCvqBArss778iANmGCzCg6OuTid3bKOSQn\niyknJ6e3F15p6ps3y3lOnWo5UYuKZEZhr+liFyKRkqYCASno5qyhHwiIWaqtTb7H7NnWgOacxWoG\njtst/5GdOy2FaBgbJ2mh7+Cee8Tx2Rfr18P5c1s4csiPFzeTJ4vF4te/S+UkWZRWdBGgi5bSmVAL\nf/qT9dmNG0OFfiBgve/psZTa734XnngCfv5zuPZauS8efBB++tPI5xXJt7hmjQj93/xGHLozZsB3\nvmOtVx3wdu6UwSRsUnB+vmWbr61lwUUX8fhuHy8/d4yVbKaVs7zwxkqmcJyqAmDjm6GfnzxZbmTV\nLi8vTwRHcbFVObK01MpQ7OqSQWYgzS/s5paiIjEb1dVFrripsB+jqEjOr6lJBK7fL1p2RoZcmIoK\nq2xwOPbvF+07JUVs9XV1MpqmpFhOaqeWfvy49fmUlN6VNMNdg8mT5frV1sqjocEyFx08KD4Qu+BQ\nTbmzsiJHjZ04Ied46pRoLgpnGeCcHDl+c3PfpZM10aF+X6X8pKX1ntnFEC30HWzfLtd8587ewu+x\nx+Ab35Btzp/kFQUSN/fdJ/7E730PfvmLNfxkLzxDgF/c4eI3H7AU2xtvlOAJe3SWPRnv8GFLy1am\nFpBAjQsvDF0GklX/4IPyuq9GW1/4ggxU3/se/OIX8L//K8vvuku0/1//Gr75TVnWyzSjSE2Vlfv3\nQ3k53/77Ev7u1gkknSgi7zev85dfp7Ohey3VlPOza7phn62sUlKSCBy3W4TFrFmWJjlhglR2dLnk\nwith09MjGvJAMkVzcuADHxBBnZEhUT11dbKusjJy1Uh70beqKjk/kMqdmzaJMF26VARyaqocR5XC\ndgo8FUmUnBwamZGSIn6BiROlCqSaOe3e3Tus0k5xcfgy21lZ8PWvy/HefFN+/Koq2d+778oN5jQR\nvO99ka8dWMXcnDHj6pyUdqMGf3u/A83gKSiQyAllXohx/o4TbdO30d0t/5kFC2R2PnNm6GPtWtlu\nx+jC7mEAABhCSURBVA7AK0LfQx6LF0vk3erVEMDFr//XhZ9kqhYnMWmS9fnzz5fP79xpHdPuYFWv\nGxtDlT+13OmMPf98a999+eWSkuT83vMeef/ww/L8vveJDLKbhSKZiAAxP2RkQEqKBCHNSKJycg+F\nRS5ySrLwkUV3WjaTy3tkO/VQscaq3ojqHqTIyBCBD7Lc5RLhOpjSAKq5BoRqukVFctxwD7vgVueX\nnS0zh8JCy+6lzCdqxtIapoqIEuD2+vCqtERrq8yY3G7ZX3Z277K6zlIQZWWRNemsLPlhs7OtMtUF\nBXIcny98hdK+hEm476POH+RYKtJE2/NjS3q6dd9FigqLEVro29i/X+RaJME3b54ocNu3c07opxXm\nUl4u6+2fS06W7e2o9Xbhbdfe1Wu1ft06631rq5hVleB2Hi8aIp1f1ELf3glJTV+CAiG7MIMkAiQn\nQ5I3gr17iMXkBoxdIA1WOCknm72kcV+ZsTU1IljVwKOEpMcj+3AmdtnPK1yxt/7OWzl57Zq3zye/\nz0BNBPbvY/++ypmclyezHI8nNtU5NXFh3Jt3/H742MckSu5HP5JlqycfgzdrJY5aaVmNjWTs3s2i\n2St4440Mrl3v4UxjFvMvST23ydy5Mkh3d8Oi2e1kbN0q0waPBw4c4PzUImAp27fDSy9JaKRdsbvn\nHjG/KHn6iU/ACy/AQw/B039q58LAVlbNXcArr4h2uHDhwL7r3LlWAuucOZZcmjhR5M3ZsxIBFJbu\nbhl5lD36pZfE+XSuZEAui9hFce4e2BvBCz7S2B2Mg22Zp5xs774rdv5AQC7iu++K3W3iRDHBFBeL\nQ7Wuzpo9qNr9KgNWFThz7l/hTDRzru/rHJua4O235VxVWelNm8QkF8kvor6Xaj9p1/Q3bJA/RXW1\nOJaVs1sljKnwtGG0PWuGh3Ev9A8dgj//OXTZB+cdh9omy1EFEurS3s4/feQgP/jDTPJaO+l2F/KR\nm6zPpafDLbfAM8/ANz58UP6Ib70l0rW9ncq0U2SlLmTHjlQyMyWxdNo0GVsyMiTfBuSQ8+bBZZfB\nl78s0T2zOg5SmdPEp+a/xcLfXMozzwy8o2JamuzvmWfgS1+yliclia/i1Kk+/sP2+OG2NhkEamrk\n9eTJrP54Di+2p3PTTUBGighcNVX1eOJTFMvlEvv8YJt0QG/nS3KyaMHJySIMjxyRa+H3W47aqipZ\n5vHID3z4sFUkzakZKxNNfr6YZmpr5ZoVF/edABbuHKur5bzKy+X6t7XJskhC/+RJiRax9xOePdvq\nHXz0qFy7rCwrd2DyZPkufr+YzHTZhVFHUiBezSP6wTCMacCRDRs2UFFRMWzHefRR0fQV294KsLTm\nGbmplyyRaA0QSdnTIzd9eblI6NmzI5dD3r5dnF7KhhrM7Lz4ny/kjQOFLFkiu2hpidJ0rfaXnCxe\n2ZHm6FHRbBcvlmvQ1SVhTs3NMm2YO7fv1nijlfZ2acMIlp1fJZe1t4uDpqhIQkT37pX+suvXw1//\nKtMnVXHy2DHZVl2/WNLQYJVqUMf0++WeLSiQKIBwvPBCaAc3dT+H259mtBFRyxn3Nn2nc3TelBYr\n9V7ZLe2dgVpbw1eBdKIyOVX3o6DwP8/w0N5u1bqJ2leppt7x6vLk/M4qnl7FfI9V266ygUFohVSw\noiza2iwbeHm5lRQVLjN3OByf4XwXKn5edQZzopKB7I5de8npcPvWjAnGtXnH55O49RS66CGZAC4y\nOh2leJubQ1vv2euqpKVZjbbt9mNVjdFORQUcP86KspMkMZUArsjhkXZUGzglQPx+iR+PJn69vT00\n8zM93RJiqqa/+h59jT7d3WL/VdEfLS3yvbOyrOsyHoSDPSIH5DfPypJron4fVYrA7RaTUF2dDAJN\nTeEza2OB3QTkFNjq/lU9ChRqEArelyGfjbQ/zZhgXAv9a66Bo0f8XMZfqaOYwyUrQ7WzM2fgP/5D\nbnwVu33woGxTVSXquhJ6K1aIU8/vl2mzk0mT4ORJ5pY1sZBd7KQqOqH/2mu946Zff93qlBKJzk5x\nxtmbaLhcEhPv98s6uwbY1/62bJHBJzlZnIMgA0V6uiVMhqEEbMJQWCgmj5yc0BlNQYFVn//0aRHq\nKkEqL0+Evr3Spds9fPHXLpeVSGY/3qlTcg9FappcXGwJfbuzOzlZflvtqB1zjFuhHwhIolQuzXzn\n36Curo6ZtwJnghpQcbE44Hp6RODNmiWOuNpa0dxKSqwyuy0tVkZkc7MlaFevthxzRUWwdCnzut/i\nWzfVs7sEPvvZfk6yq0v2l50t+y4rk/oMbW0yk+hL6KsKkUVFIoA8Hpkh2CNJiotFWJw5I+cfbn89\nPVbz5vJyy56tOn+tWTNiHX/ixvLlItQnTxbBvny5ZdpS+QWqnIPSkqdOletv17CHsyDZxRfLb2gf\nlNTrzk4R4lOnhn4mNVXOadUqOVe7s/uii3rvTzMmGLdC//hxkWVfvMzD0qXBhRWdcNBr1Yl5911Z\nXloqQr+nxwpZVNlzM2eKk1XNENS0edEi0QTtyTdlZbgmFHHNFfWwvqf/uHW7yUCVWV26VDQ3j8dy\nMvf12enTZcCoqRGhb29HOHOmnMOZM5HLCqjM02nTetuGc3PDV3gca6Snh2bz2oX3/PmRPzOCza7P\nJfbYcRZZi3Suzgb0kfanGROMcRUtMsqBu2SGTdidPSs2cJXCrpxzmZmhWYhghfAVFYnWp4R9f5mK\n4ZomRCKc8y/azzvPw/45+7r+9md34KokHYXWAhMbu8lN/1aaIONW09+ztZWZnGZhaTA76uxZCYgv\nLrbqoqhEGa9XEl96esRGqgYBVatcOe3a2wcm9J0p+E6c++rpETNDVpZMU/bvl5C6cPvxeMTUoBy0\nyvasioipAlwgGp1qrVdSIrHnhw/LOmW+UCYlt9s6r7Fsxx9rDKakhWZMMm6Ffv1r+5jDaWaVYyXZ\ngAjE1ast0053t9iC7MVwVLKPsgspoa+0aJW9GA57yGN/OEvXHjsm55WWJudlmjIIOctrqrowBQWh\nmnlBgTWQKYejWt7aKqV/L7hAsm1V9/OZM2U2s2+fnEtBgQw0hw6ND9POaGfqVLlvBlKtVDOmGbdC\nv3qfh8zcVAovWw6eJquD0+zZIuRaWiQip7JSGr8qe3ZRkQwMjY2WpqsEeW2tOF/D1VBRqKJjkeqx\nK/z+3qVrlUNVtbpTdd17HP4BZYd3zjaWLbOOa+/CtHCh7LulxUrjLyiw2uBNnSoOZLdb7MKqvaC2\n+SY+CxZYM1KNhnFq0/c2dOOpbqV0dh5JE4pF4Lvd4qQNBCwtfMYMq0wviHnELuyUMFbCVUW29BXb\n3F/SjELlBtj3pc7L5xPNTTngnLOGSMljqn59cXFoLLb6Xuo7+HxyLYqLZTBRTuvKSnmvKlFqEh9V\nRkGjCTIuhf7eN0QoVsxzODHLykTg1dfLe2W/zsgQQZecLCYNZR9VQjszUwSniorpL6HF7ZZt7ZUM\nnTjt+X6/leXrdPBGEvoDSaxRA0RPj9jvS0qsrFJdRlejGTOMSfPO6dNw2+c93HndbqbMShfbezCW\n/NAhuGn9CT7Cm1zSZcLL9WIOyckRk0ZDg4RgHjpkCXLVNENlVCotVwn5pCQRiMGm6P1GSuTliS3+\nzTethJ+aGtHuly0Tjfztt0P3pUw2ICak9nY5l0OHxMyzcqXUXq6sFP9DcbFVxa2zU/wAS5dGLpDl\ndlsNMlJT5Vr09Ihz9+TJ4csm1Wg0I8qYFPp798L2p0/ySk4D112H2N+DppA//hFmsx+D/RgF02SE\ncLsl8UgJ80OHRAB3d4t2f/75Es3icllFxaqrQ0vRlpeLBp6fH1qvJRwlJZLZ29oq2ntNjQj1w4fl\nmMrUkpra2/GbmSmauMcjx2luFqGuioK1t4uzduJEGYTstZvPnuVc8X8nqoZMe7skIZWVidCvr5fn\niRNHvh6+RqOJOVEJfcMwlgAPAPOBA8DNpmluCbPdV4HbgFzgCeALpmm2BtddC/wAKAU2AZ81TbM2\nFl/CyYIF4MZ7LiAHj+ec0N+xA0qp5fLLXeS8f6Vov1OmyHbKzu7zSer9FVdYO7V3F1m0SAS2vel1\nX+34nOTmSlu/U6csjV7VyenpkfN1uaSFmsp2tddKOXBAzrW7W85LhZOCaOiqkYcaKGbNsj4TSeh7\nveK0rawMrdmsro1GoxkT9GvTNwwjA3gS+BWQD9wLPGEYRo5ju8sRgb8GmAwUAv8eXLcIuB+4FigG\naoL7GxZKS2FavudcqLnd5r39bT+TU2vILs0RzdVuD8/JsRpy9xWBAwNLsupvH2DNGnw+iaTJyQkt\nb6COo7Jwla09KcnyMaSmhpqc7AMF9B0xpO32Gs24IBpH7hrAb5rmfaZpdpmm+SBQCziLul8P/NI0\nzf2maXqAbwPXG4aRDFwHPG6a5humabYB/wR80DCM0th9FRvt7cyZ2sbeuiKaW8B/tp66swFOnfDj\n23+CyuIWkvKDZhOPR8wjnZ1WMwzov07KQIV+IGAdB+RYaWkyyHR1WREWPp8Ibmd0jIr/z8kR4W5v\nWaeieOyRGj09lhM6J0eeI51rV5cVDqozNzWaMU005p05wB7HMjO43LndY45tcoBJwXXnyg2apllv\nGEYDYCADSGwxTa7seIRmlnLmZfjt6zN4ccdG3svLfJcjzMzqgMKguaahAZ57zvrs6dPy3J/Q7yvJ\nqqNDqljOnm35ALZtEz8AiOmosVEGgnfekWWzZ8tzTY04YuvqpN5OdrbY2u3x/2632No7O0XLnzBB\nEnBUg/GUFJk5pKVZCVRut9j3lVNXoZqjgHbWajTjgGiEfjbgjC30Ac7gX+d26nXWAPYRG4qKSJ05\nlaZ9+exrzODwzhaM3DaWT2gkzx9g6gWlkng1b54liBWq36gSwpHIybGaXjupqxNNe+9eEfqBgAhc\nZX5RUT5FRfDe94qALi+XWYbHI88FBSLYs7N7h2gqod/VJQPI6tWyfMkScdbm5YnGn5Rk+Rny8qzC\navbsTOXonThRzmesV8zUaMY50Qh9H+As3JEFtPSznRLoLQPYR2woK8P9wQupfiqJv20NcCbg4trF\np7l8bgdkFkk0zkUXicY7adLgjuFyiVasnL/2cgf2qB6wTDYTJ8pr1WBlyZLQmihtbaGDiHrttLfb\nTTBut2j4V10l7yM5Xu3mKLvQ93ols3jFiv6/s0ajGfVEo9btRcwwdgx6m3yc2xlAE3Dauc4wjGLE\n0bt3gOcbHcnJTJqdTXqKn0OHIYkAlaWtYnbJyBBBazdxDJZISVZ2wW0vwpaXFyqwnUWw7E5UuyPW\nmWHrLJkb7blCqDNXtczTzluNZtwQjaa/EUg3DOMWJALneiTs8jnHdr8B7jcM40/ACeC7wO9M0/Qb\nhvF74CXDMB4E3gLuBJ41TbM+Rt+jF8kFbqZObaXuUCNe3ExLrxZ7dk5O7OLNlbA8elS059JSiZu3\nm4yOHLE0f3sdnXDYB4HsbBH2J07IzCA11Yr/V/V7wtXXiYQqAqcGkEBAqnSCdt5qNOOIfjV90zQ7\ngHVIuGUDcAtwpWmarYZhPGsYxu3B7Z4E7gKeBo4jWv5twXXbgZuAB4EzQDnwmZh/Gzv5+dLsiloy\naKOk/ZgIzpycwZt0whwDkKSqN98UW/0WR/rCwYMyCCQlhWr64SpUqhLJkybJvru7JTu4vT20QJrL\nJftJS4u+rorKGm4JNn6vq7PKJ9v3rdFoxjRJgb6KfsURwzCmAUc2bNhARV8doiLR04PnuS2882w1\nE2YXMN+1T+zdy5aJRh4rbV81vz5yRAq0HTokAnbtWjGldHXJdllZVmhlQ4MI4JQwE636ekvg27Np\ni4tDZwLKTzCQaJudOyXK56KL5LxNU7Jvq6r6noFoNJrRRsQ/9JgswwBAcjJ558/h4p4GSO+BjilS\nQjhSRupgmThRZg9HjkiNGpAM2MzMyI0rCgsj708NDMnJIpAjMZjKiXa7vjLzzJmjBb5GM44Y2/F5\nSsh1dIS+jzXKXq6Ok6g2cmfLRLufQKPRjAvGttBPTQ3ViIdL6DuTmhI1GkadV0ODOJcTdXDSaDTD\nxtgW+mBlsRYWhrehxwrlmM3OTtymFSkpcn6q5EKiDk4ajWbYGLs2fcWiRWK3jlRHPlYYhmS/DufA\nEgvc7tAQUo1GM64Y+5o+SGjjSDgr09ISv4zBYBK7NBrNmCHBJZQm5ihBr3r1ajSacYUW+uMNpek7\n6/VrNJpxQYIboDUxJzMTpk/XWbgazThFC/3xyPz58T4DjUYTJ/T8XqPRaMYRWuhrNBrNOEILfY1G\noxlHaKGv0Wg04wgt9DUajWYcoYW+RqPRjCO00NdoNJpxRCLH6ScD1NTUxPs8NBqNZlSxdu3aacBJ\n0zS7nesSWeiXAVx33XXxPg+NRqMZbRwBKoGjzhWJLPS3Au8BqoGeOJ+LRqPRjDZOhluYsI3RNRqN\nRhN7tCNXo9FoxhFa6Gs0Gs04Qgt9jUajGUdooa/RaDTjCC30NRqNZhyRyCGbg8IwjCXAA8B84ABw\ns2maW+J7VhaGYXwduAPotC1eB+wGHgQuATzAd0zT/OXIn6FgGMZ5wF9M0ywPvi+IdH6GYaQD/w18\nCOgC7jVN8wdxPt/lwBtAm22zO0zTvMMwjCTkN/gc8h94GPhH0zT/fzvnFmJVFcfhzygdx6Ikch4i\nxaJ+BVEpQhTdMShzRCLsAhNJGEWJD2GYVmZhkmkUhFaGQU4SmpE3zBB7yIKETILMn6ZSPXTFUqfJ\nMKyHtSf3nDnn2Ms+e3HO+mAeZu098M2Ptdf5r8vZhR4NlnQNsBi4GPgVWGj7tVizreMbY7ZTgHnA\necC3wBzb70ecbS3fwrNtqkFfUhuwHpgPvAF0AesknW+7p1S5E4wBZttelG+U9C7QA3QAlwGbJH3V\n6A+srGNNBV4E8t/mW1bHbz4wivBlkBHAh5L22l5Vou8YYJPtiVX+7GHgNsL/8Q+wAXgUWFig53Bg\nHfAI8A5wBbBF0j7gQSLL9iS+o4kr24uAN4GbbX8qaTywUdK5wKvEl20938L7bbMt79wIHLe91PYx\n28uBn4AJJXvlGQPszDdIOh2YDMy1fdT2dmAlcG8JfrOBGYQH4v/6dRGqkUO29wKvAPeV5ZsxIOcc\nXcBLtn+w/SOwgOJ9RwEbba+0fdz2DuAj4GrizLaeb1TZ2t4DdGQD6KmEAf4IYTYdXbYn8S0826aq\n9AnT0F0Vbc7aS0dSOyBghqRu4DfgBeAL4Jjt/bnbDdzeeEuWE6aQ1+faLqSGX1YRjqB/7iZUJY2g\nmi+Eh+eopAOE9zitIkyh/2JgPzEgSYNsF/JtRds7CQ8t8F8lfS3wJRFmW8f3LcJyZDTZZr49kkYT\nlnRPAR4CLiDCbGv52j6cLU8Xmm2zVfrDgN6Ktl6gvQSXanQA24ClwEjgAcKyxET6r+FBSd5ZFVHZ\ngYZR229Y7vfKa4VTwxfgF8JS36XADYRZ4LzsWmU/6SU8C0OKMz2BpDMzt88J1XOU2fZR4bueeLP9\nHhgKjCfsRXQSd7b9fCXdRAOybbZKv5cQYp52wppe6dg+QP+K9GNJK4DrgLaK26PxJuRay6+vEw4F\nDldcKw3bk3K/7pf0HGFGMIuB/aQd+Nv20aK9supuA7APuBO4hIizrfS1fRyIMtvcGyW3SloDjCPi\nbKv4Tm5Ev222Sv9rwvJJHjFwyacUJI2VNKuiuQ34DhgsaWT+diLxJkxBq/rZPgj8TP/cS3WXNFzS\nIkln5JrbgL6Ho7KfKGsr2mss4WTGZsID/icRZ1vNN8ZsJU2QtKWieTDhgyq6bOv4DmpEts1W6W8F\nhkiaTti17yIsqWwu1eoEPcBcSd8A7xGmbncRqv+zgAWSphGOm95DJBvQto9IWkttv27gaUl3AGcT\nTnw8Vops4BDhGN6g7EN2FDAHeD273g3MlLSVcFTvcWBFkUKSOoAPgMW2n+9rjzXbWr5EmC2wAxgn\nqQt4G7iFkN+VhGXUqLKt43sVYWmn0GybqtLPNjtuBe4GDgLTgUm2/yhVLCPbtZ8CPEXYrV8CTM1O\nRkwDTiO8DnUNMNP2Z2W5VqGe3xPAHmA3Yc9ime3VpVgC2RJEJ3A54Xz5NmA18HJ2yxJgLbCdUNl9\nQthbKZL7gXOAJyX15H7mE2e2VX2BZ4ks2+wkSyfhFNfvwDOEmcluIsy2ju8uGpBterVyIpFItBBN\nVeknEolEoj5p0E8kEokWIg36iUQi0UKkQT+RSCRaiDToJxKJRAuRBv1EIpFoIdKgn0gkEi1EGvQT\niUSihfgXC9oZ77C/RAEAAAAASUVORK5CYII=\n",
341 | "text/plain": [
342 | ""
343 | ]
344 | },
345 | "metadata": {},
346 | "output_type": "display_data"
347 | }
348 | ],
349 | "source": [
350 | "# This is a snippet from believer_skeptic.py, specifically the \n",
351 | "# simulate_multirace function.\n",
352 | "single_process=0 \n",
353 | "si=.1\n",
354 | "tb=1.0\n",
355 | "dt=.001\n",
356 | "nresp = p['vd'].size\n",
357 | "dx = si * np.sqrt(dt)\n",
358 | "nTime = np.ceil((tb-p['tr'])/dt).astype(int)\n",
359 | "xtb = believer_skeptic.temporal_dynamics(p, np.cumsum([dt]*nTime))\n",
360 | "\n",
361 | "# Run the process model\n",
362 | "Pd = .5 * (1 + (p['vd'] * np.sqrt(dt))/si)\n",
363 | "Pi = .5 * (1 + (p['vi'] * np.sqrt(dt))/si)\n",
364 | "direct = xtb * np.where((rs((nresp, nTime)).T < Pd),dx,-dx).T\n",
365 | "indirect = np.where((rs((nresp, nTime)).T < Pi),dx,-dx).T\n",
366 | "execution = np.cumsum(direct-indirect, axis=1)\n",
367 | "\n",
368 | "act_ix, rt, rt_ix = believer_skeptic.analyze_multiresponse(execution, p) \n",
369 | "\n",
370 | "nsteps_to_rt = np.argmax((execution.T>=p['a']).T, axis=1)\n",
371 | "rts = p['tr'] + nsteps_to_rt*dt\n",
372 | "\n",
373 | "# set non responses to 999\n",
374 | "rts[rts==p['tr']]=999\n",
375 | "\n",
376 | "# get accumulator with fastest RT (winner) in each cond\n",
377 | "act_ix = np.argmin(rts)\n",
378 | "winner, rt=act_ix, rts[act_ix]\n",
379 | "rt_ix = np.ceil((rt-p['tr'])/dt).astype(int)\n",
380 | "actions = np.arange(nresp)\n",
381 | "losers = actions[actions!=act_ix]\n",
382 | "print(act_ix)\n",
383 | "plt.plot(execution[act_ix][:rt_ix], color='b')\n",
384 | "for l in losers:\n",
385 | " plt.plot(execution[l][:rt_ix], color='r', alpha=.3)\n",
386 | "sns.despine()"
387 | ]
388 | }
389 | ],
390 | "metadata": {
391 | "kernelspec": {
392 | "display_name": "Python 3",
393 | "language": "python",
394 | "name": "python3"
395 | },
396 | "language_info": {
397 | "codemirror_mode": {
398 | "name": "ipython",
399 | "version": 3
400 | },
401 | "file_extension": ".py",
402 | "mimetype": "text/x-python",
403 | "name": "python",
404 | "nbconvert_exporter": "python",
405 | "pygments_lexer": "ipython3",
406 | "version": "3.6.2"
407 | },
408 | "latex_envs": {
409 | "LaTeX_envs_menu_present": true,
410 | "autocomplete": true,
411 | "bibliofile": "biblio.bib",
412 | "cite_by": "apalike",
413 | "current_citInitial": 1,
414 | "eqLabelWithNumbers": true,
415 | "eqNumInitial": 1,
416 | "hotkeys": {
417 | "equation": "Ctrl-E",
418 | "itemize": "Ctrl-I"
419 | },
420 | "labels_anchors": false,
421 | "latex_user_defs": false,
422 | "report_style_numbering": false,
423 | "user_envs_cfg": false
424 | }
425 | },
426 | "nbformat": 4,
427 | "nbformat_minor": 2
428 | }
429 |
--------------------------------------------------------------------------------