├── EFE_Precision_Updating.m
├── EFE_learning_novelty_term.m
├── Estimate_parameters.m
├── Message_passing_example.m
├── Pencil_and_paper_exercise_solutions.m
├── Prediction_error_example.m
├── README.md
├── Simplified_simulation_script.m
├── Step_by_Step_AI_Guide.m
├── Step_by_Step_Hierarchical_Model.m
├── VFE_calculation_example.m
├── spm_MDP_VB_ERP_tutorial.m
├── spm_MDP_VB_X_tutorial.m
└── spm_MDP_VB_game_tutorial.m


/EFE_Precision_Updating.m:
--------------------------------------------------------------------------------
  1 | %% Example code for simulated expected free energy precision (beta/gamma) updates
  2 | % (associated with dopamine in the neural process theory)
  3 | 
  4 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  5 | % Application to Empirical Data
  6 | 
  7 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  8 | 
  9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 10 | 
 11 | clear all
 12 | close all
 13 | 
 14 | % This script will reproduce the simulation results in Figure 9
 15 | 
 16 | % Here you can set the number of policies and the distributions that
 17 | % contribute to prior and posterior policy precision
 18 | 
 19 | E = [1 1 1 1 1]';                             % Set a fixed-form prior distribution 
 20 |                                               % over policies (habits)
 21 | 
 22 | G = [12.505 9.51 12.5034 12.505 12.505]';     % Set an example expected 
 23 |                                               % free energy distribution over policies
 24 | 
 25 | F = [17.0207 1.7321 1.7321 17.0387 17.0387]'; % Set an example variational 
 26 |                                               % free energy distribution over 
 27 |                                               % policies after a new observation
 28 | 
 29 | 
 30 | gamma_0 = 1;                 % Starting expected free energy precision value
 31 | gamma = gamma_0;             % Initial expected free energy precision to be updated
 32 | beta_prior = 1/gamma;        % Initial prior on expected free energy precision
 33 | beta_posterior = beta_prior; % Initial posterior on expected free energy precision
 34 | psi = 2;                     % Step size parameter (promotes stable convergence) 
 35 | 
 36 | for ni = 1:16 % number of variational updates (16)
 37 | 
 38 |     % calculate prior and posterior over policies (see main text for 
 39 |     % explanation of equations) 
 40 | 
 41 |     pi_0 = exp(log(E) - gamma*G)/sum(exp(log(E) - gamma*G)); % prior over policies
 42 | 
 43 |     pi_posterior = exp(log(E) - gamma*G - F)/sum(exp(log(E) - gamma*G - F)); % posterior 
 44 |                                                                              % over policies
 45 |     % calculate expected free energy precision 
 46 | 
 47 |     G_error = (pi_posterior - pi_0)'*-G; % expected free energy prediction error
 48 | 
 49 |     beta_update = beta_posterior - beta_prior + G_error; % change in beta:  
 50 |                                                          % gradient of F with respect to gamma 
 51 |                                                          % (recall gamma = 1/beta)
 52 |     
 53 |     beta_posterior = beta_posterior - beta_update/psi; % update posterior precision 
 54 |                                                    % estimate (with step size of psi = 2, which reduces 
 55 |                                                    % the magnitude of each update and can promote 
 56 |                                                    % stable convergence)
 57 | 
 58 |     gamma = 1/beta_posterior; % update expected free energy precision
 59 | 
 60 |     % simulate dopamine responses
 61 | 
 62 |     n = ni;
 63 | 
 64 |     gamma_dopamine(n,1) = gamma; % simulated neural encoding of precision
 65 |                                  % (beta_posterior^-1) at each iteration of 
 66 |                                  % variational updating                                 
 67 | 
 68 |     policies_neural(:,n) = pi_posterior; % neural encoding of posterior over policies at 
 69 |                                          % each iteration of variational updating
 70 | end 
 71 | 
 72 | %% Show Results
 73 | 
 74 | disp(' ');
 75 | disp('Final Policy Prior:');
 76 | disp(pi_0);
 77 | disp(' ');
 78 | disp('Final Policy Posterior:');
 79 | disp(pi_posterior);
 80 | disp(' ');
 81 | disp('Final Policy Difference Vector:');
 82 | disp(pi_posterior-pi_0);
 83 | disp(' ');
 84 | disp('Negative Expected Free Energy:');
 85 | disp(-G);
 86 | disp(' ');
 87 | disp('Prior G Precision (Prior Gamma):');
 88 | disp(gamma_0);
 89 | disp(' ');
 90 | disp('Posterior G Precision (Gamma):');
 91 | disp(gamma);
 92 | disp(' ');
 93 | 
 94 | gamma_dopamine_plot = [gamma_0;gamma_0;gamma_0;gamma_dopamine]; % Include prior value
 95 | 
 96 | figure
 97 | plot(gamma_dopamine_plot);
 98 | ylim([min(gamma_dopamine_plot)-.05 max(gamma_dopamine_plot)+.05])
 99 | title('Expected Free Energy Precision (Tonic Dopamine)');
100 | xlabel('Updates');
101 | ylabel('\gamma');
102 | 
103 | figure
104 | plot([gradient(gamma_dopamine_plot)],'r');
105 | ylim([min(gradient(gamma_dopamine_plot))-.01 max(gradient(gamma_dopamine_plot))+.01])
106 | title('Rate of Change in Precision (Phasic Dopamine)');
107 | xlabel('Updates');
108 | ylabel('\gamma gradient');
109 | 
110 | % uncomment if you want to display/plot firing rates encoding beliefs about each
111 | % policy (columns = policies, rows = updates over time)
112 | 
113 | % plot(policies_neural);
114 | % disp('Firing rates encoding beliefs over policies:');
115 | % disp(policies_neural');
116 | % disp(' ');
117 | 


--------------------------------------------------------------------------------
/EFE_learning_novelty_term.m:
--------------------------------------------------------------------------------
  1 | %% Calculating novelty term in expected free energy when learning 'A' matrix concentration parameters
  2 | % (which drives parameter exploration)
  3 | 
  4 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  5 | % Application to Empirical Data
  6 | 
  7 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  9 | 
 10 | clear 
 11 | close all
 12 | 
 13 | %-- 'a' = concentration parameters for likelihood matrix 'A'
 14 | 
 15 | % small concentration parameter values 
 16 | a1 = [.25  1;  
 17 |       .75  1]; 
 18 |   
 19 | % intermediate concentration parameter values 
 20 | a2 = [2.5  10;
 21 |       7.5  10]; 
 22 |   
 23 | % large concentration parameter values  
 24 | a3 = [25  100;
 25 |       75  100]; 
 26 |   
 27 | % normalize columns in 'a' to get likelihood matrix 'A' (see col_norm
 28 | % function at the end of script)
 29 | A1 = col_norm(a1);
 30 | A2 = col_norm(a2);
 31 | A3 = col_norm(a3);
 32 |   
 33 | % calculate 'a_sum' 
 34 | a1_sum = [a1(1,1)+a1(2,1)  a1(1,2)+a1(2,2);
 35 |           a1(1,1)+a1(2,1)  a1(1,2)+a1(2,2)]; 
 36 |   
 37 | a2_sum = [a2(1,1)+a2(2,1)  a2(1,2)+a2(2,2);
 38 |           a2(1,1)+a2(2,1)  a2(1,2)+a2(2,2)];
 39 |       
 40 | a3_sum = [a3(1,1)+a3(2,1)  a3(1,2)+a3(2,2);
 41 |           a3(1,1)+a3(2,1)  a3(1,2)+a3(2,2)];
 42 | 
 43 | % element wise inverse for 'a' and 'a_sum'
 44 | inv_a1 =  [1/a1(1,1)  1/a1(1,2);
 45 |            1/a1(2,1)  1/a1(2,2)];
 46 |        
 47 | inv_a2 =  [1/a2(1,1)  1/a2(1,2);
 48 |            1/a2(2,1)  1/a2(2,2)];
 49 |        
 50 | inv_a3 =  [1/a3(1,1)  1/a3(1,2);
 51 |            1/a3(2,1)  1/a3(2,2)];
 52 |        
 53 | inv_a1_sum =  [1/a1_sum(1,1)  1/a1_sum(1,2);
 54 |                1/a1_sum(2,1)  1/a1_sum(2,2)];
 55 |        
 56 | inv_a2_sum =  [1/a2_sum(1,1)  1/a2_sum(1,2);
 57 |                1/a2_sum(2,1)  1/a2_sum(2,2)];
 58 |        
 59 | inv_a3_sum =  [1/a3_sum(1,1)  1/a3_sum(1,2);
 60 |                1/a3_sum(2,1)  1/a3_sum(2,2)];
 61 |       
 62 | % 'W' term for 'a' matrix
 63 | W1 = .5*(inv_a1-inv_a1_sum);
 64 | W2 = .5*(inv_a2-inv_a2_sum);
 65 | W3 = .5*(inv_a3-inv_a3_sum);
 66 | 
 67 | % beliefs over states under a policy at a time point
 68 | s_pi_tau = [.9 .1]';
 69 | 
 70 | % predictive posterior over outcomes (A*s_pi_tau = predicted o_pi_tau)
 71 | A1s = A1*s_pi_tau;
 72 | A2s = A2*s_pi_tau;
 73 | A3s = A3*s_pi_tau;
 74 | 
 75 | % W term multiplied by beliefs over states under a policy at a time point
 76 | W1s = W1*s_pi_tau;
 77 | W2s = W2*s_pi_tau;
 78 | W3s = W3*s_pi_tau;
 79 | 
 80 | % compute novelty using dot product function
 81 | Novelty_smallCP = dot((A1s),(W1s));
 82 | Novelty_intermediateCP = dot((A2s),(W2s));
 83 | Novelty_largeCP = dot((A3s),(W3s));
 84 | 
 85 | 
 86 | % show results
 87 | disp(' ');
 88 | disp('Novelty term for small concentration parameter values:');
 89 | disp(Novelty_smallCP);
 90 | disp(' ');
 91 | disp('Novelty term for intermediate concentration parameter values:');
 92 | disp(Novelty_intermediateCP);
 93 | disp(' ');
 94 | disp('Novelty term for large concentration parameter values:');
 95 | disp(Novelty_largeCP);
 96 | disp(' ');
 97 | 
 98 | 
 99 | %% function for normalizing 'a' to get likelihood matrix 'A'
100 | function A_normed = col_norm(A_norm)
101 | aa = A_norm; 
102 | norm_constant = sum(aa,1); % create normalizing constant from sum of columns
103 | aa = aa./norm_constant; % divide columns by constant
104 | A_normed = aa;
105 | end 
106 | 


--------------------------------------------------------------------------------
/Estimate_parameters.m:
--------------------------------------------------------------------------------
  1 | function [DCM] = Estimate_parameters(DCM)
  2 | 
  3 | % MDP inversion using Variational Bayes
  4 | % FORMAT [DCM] = spm_dcm_mdp(DCM)
  5 | %
  6 | % Expects:
  7 | %--------------------------------------------------------------------------
  8 | % DCM.MDP   % MDP structure specifying a generative model
  9 | % DCM.field % parameter (field) names to optimise
 10 | % DCM.U     % cell array of outcomes (stimuli)
 11 | % DCM.Y     % cell array of responses (action)
 12 | %
 13 | % Returns:
 14 | %--------------------------------------------------------------------------
 15 | % DCM.M     % generative model (DCM)
 16 | % DCM.Ep    % Conditional means (structure)
 17 | % DCM.Cp    % Conditional covariances
 18 | % DCM.F     % (negative) Free-energy bound on log evidence
 19 | % 
 20 | % This routine inverts (cell arrays of) trials specified in terms of the
 21 | % stimuli or outcomes and subsequent choices or responses. It first
 22 | % computes the prior expectations (and covariances) of the free parameters
 23 | % specified by DCM.field. These parameters are log scaling parameters that
 24 | % are applied to the fields of DCM.MDP. 
 25 | %
 26 | % If there is no learning implicit in multi-trial games, only unique trials
 27 | % (as specified by the stimuli), are used to generate (subjective)
 28 | % posteriors over choice or action. Otherwise, all trials are used in the
 29 | % order specified. The ensuing posterior probabilities over choices are
 30 | % used with the specified choices or actions to evaluate their log
 31 | % probability. This is used to optimise the MDP (hyper) parameters in
 32 | % DCM.field using variational Laplace (with numerical evaluation of the
 33 | % curvature).
 34 | %
 35 | %__________________________________________________________________________
 36 | % Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging
 37 | 
 38 | % Karl Friston
 39 | % $Id: spm_dcm_mdp.m 7120 2017-06-20 11:30:30Z spm $
 40 | 
 41 | % OPTIONS
 42 | %--------------------------------------------------------------------------
 43 | ALL = false;
 44 | 
 45 | % Here we specify prior expectations (for parameter means and variances)
 46 | %--------------------------------------------------------------------------
 47 | prior_variance = 1/4; % smaller values will lead to a greater complexity 
 48 |                       % penalty (posteriors will remain closer to priors)
 49 | 
 50 | for i = 1:length(DCM.field)
 51 |     field = DCM.field{i};
 52 |     try
 53 |         param = DCM.MDP.(field);
 54 |         param = double(~~param);
 55 |     catch
 56 |         param = 1;
 57 |     end
 58 |     if ALL
 59 |         pE.(field) = zeros(size(param));
 60 |         pC{i,i}    = diag(param);
 61 |     else
 62 |         if strcmp(field,'alpha')
 63 |             pE.(field) = log(16);          % in log-space (to keep positive)
 64 |             pC{i,i}    = prior_variance;
 65 |         elseif strcmp(field,'beta')
 66 |             pE.(field) = log(1);           % in log-space (to keep positive)
 67 |             pC{i,i}    = prior_variance;
 68 |         elseif strcmp(field,'la')
 69 |             pE.(field) = log(1);           % in log-space (to keep positive)
 70 |             pC{i,i}    = prior_variance;
 71 |         elseif strcmp(field,'rs')
 72 |             pE.(field) = log(5);           % in log-space (to keep positive)
 73 |             pC{i,i}    = prior_variance;
 74 |         elseif strcmp(field,'eta')
 75 |             pE.(field) = log(0.5/(1-0.5)); % in logit-space - bounded between 0 and 1
 76 |             pC{i,i}    = prior_variance;
 77 |         elseif strcmp(field,'omega')
 78 |             pE.(field) = log(0.5/(1-0.5)); % in logit-space - bounded between 0 and 1
 79 |             pC{i,i}    = prior_variance;
 80 |         else
 81 |             pE.(field) = 0;                % if it can take any negative or positive value
 82 |             pC{i,i}    = prior_variance;
 83 |         end
 84 |     end
 85 | end
 86 | 
 87 | pC      = spm_cat(pC);
 88 | 
 89 | % model specification
 90 | %--------------------------------------------------------------------------
 91 | M.L     = @(P,M,U,Y)spm_mdp_L(P,M,U,Y);  % log-likelihood function
 92 | M.pE    = pE;                            % prior means (parameters)
 93 | M.pC    = pC;                            % prior variance (parameters)
 94 | M.mdp   = DCM.MDP;                       % MDP structure
 95 | 
 96 | % Variational Laplace
 97 | %--------------------------------------------------------------------------
 98 | [Ep,Cp,F] = spm_nlsi_Newton(M,DCM.U,DCM.Y); % This is the actual fitting routine
 99 | 
100 | % Store posterior distributions and log evidence (free energy)
101 | %--------------------------------------------------------------------------
102 | DCM.M   = M;  % Generative model
103 | DCM.Ep  = Ep; % Posterior parameter estimates
104 | DCM.Cp  = Cp; % Posterior variances and covariances
105 | DCM.F   = F;  % Free energy of model fit
106 | 
107 | return
108 | 
109 | function L = spm_mdp_L(P,M,U,Y)
110 | % log-likelihood function
111 | % FORMAT L = spm_mdp_L(P,M,U,Y)
112 | % P    - parameter structure
113 | % M    - generative model
114 | % U    - inputs
115 | % Y    - observed repsonses
116 | %
117 | % This function runs the generative model with a given set of parameter
118 | % values, after adding in the observations and actions on each trial
119 | % from (real or simulated) participant data. It then sums the
120 | % (log-)probabilities (log-likelihood) of the participant's actions under the model when it
121 | % includes that set of parameter values. The variational Bayes fitting
122 | % routine above uses this function to find the set of parameter values that maximize
123 | % the probability of the participant's actions under the model (while also
124 | % penalizing models with parameter values that move farther away from prior
125 | % values).
126 | %__________________________________________________________________________
127 | 
128 | if ~isstruct(P); P = spm_unvec(P,M.pE); end
129 | 
130 | % Here we re-transform parameter values out of log- or logit-space when 
131 | % inserting them into the model to compute the log-likelihood
132 | %--------------------------------------------------------------------------
133 | mdp   = M.mdp;
134 | field = fieldnames(M.pE);
135 | for i = 1:length(field)
136 |     if strcmp(field{i},'alpha')
137 |         mdp.(field{i}) = exp(P.(field{i}));
138 |     elseif strcmp(field{i},'beta')
139 |         mdp.(field{i}) = exp(P.(field{i}));
140 |     elseif strcmp(field{i},'la')
141 |         mdp.(field{i}) = exp(P.(field{i}));
142 |     elseif strcmp(field{i},'rs')
143 |         mdp.(field{i}) = exp(P.(field{i}));
144 |     elseif strcmp(field{i},'eta')
145 |         mdp.(field{i}) = 1/(1+exp(-P.(field{i})));
146 |     elseif strcmp(field{i},'omega')
147 |         mdp.(field{i}) = 1/(1+exp(-P.(field{i})));
148 |     else
149 |         mdp.(field{i}) = exp(P.(field{i}));
150 |     end
151 | end
152 | 
153 | % place MDP in trial structure
154 | %--------------------------------------------------------------------------
155 | la = mdp.la_true;  % true level of loss aversion
156 | rs = mdp.rs_true;  % true preference magnitude for winning (higher = more risk-seeking)
157 | 
158 | if isfield(M.pE,'la')&&isfield(M.pE,'rs')
159 |     mdp.C{2} = [0  0       0   ;      % Null
160 |                 0 -mdp.la -mdp.la  ;  % Loss
161 |                 0  mdp.rs  mdp.rs/2]; % win
162 | elseif isfield(M.pE,'la')
163 |     mdp.C{2} = [0  0       0   ;      % Null
164 |                 0 -mdp.la -mdp.la  ;  % Loss
165 |                 0  rs      rs/2];     % win
166 | elseif isfield(M.pE,'rs')
167 |     mdp.C{2} = [0  0       0   ;      % Null
168 |                 0 -la     -la  ;      % Loss
169 |                 0  mdp.rs  mdp.rs/2]; % win
170 | else
171 |     mdp.C{2} = [0  0   0   ;  % Null
172 |                 0 -la -la  ;  % Loss
173 |                 0  rs  rs/2]; % win
174 | end
175 | 
176 | j = 1:numel(U); % observations for each trial
177 | n = numel(j);   % number of trials
178 | 
179 | [MDP(1:n)] = deal(mdp);  % Create MDP with number of specified trials
180 | [MDP.o]    = deal(U{j}); % Add observations in each trial
181 | 
182 | % solve MDP and accumulate log-likelihood
183 | %--------------------------------------------------------------------------
184 | MDP   = spm_MDP_VB_X_tutorial(MDP); % run model with possible parameter values
185 | 
186 | L     = 0; % start (log) probability of actions given the model at 0
187 | 
188 | for i = 1:numel(Y) % Get probability of true actions for each trial
189 |     for j = 1:numel(Y{1}(:,2)) % Only get probability of the second (controllable) state factor
190 |         
191 |         L = L + log(MDP(i).P(:,Y{i}(2,j),j)+ eps); % sum the (log) probabilities of each action
192 |                                                    % given a set of possible parameter values
193 |     end
194 | end 
195 | 
196 | clear('MDP')
197 | 
198 | fprintf('LL: %f \n',L)
199 | 


--------------------------------------------------------------------------------
/Message_passing_example.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %-- Message Passing Examples--%
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | % Supplementary Code for: A Tutorial on Active Inference Modelling and its 
  6 | % Application to Empirical Data
  7 | 
  8 | % By: Ryan Smith and Christopher J. Whyte
  9 | % We also acknowledge Samuel Taylor for contributing to this example code
 10 | 
 11 | % This script provides two examples of (marginal) message passing, based on
 12 | % the steps described in the main text. Each of the two examples (sections)
 13 | % need to be run separately. The first example fixes all observed
 14 | % variables immediately and does not include variables associated with the
 15 | % neural process theory. The second example provides observations
 16 | % sequentially and also adds in the neural process theory variables. To
 17 | % remind the reader, the message passing steps in the main text are:
 18 | 
 19 | % 	1. Initialize the values of the approximate posteriors q(s_(?,?) ) 
 20 | %      for all hidden variables (i.e., all edges) in the graph. 
 21 | % 	2. Fix the value of observed variables (here, o_?).
 22 | % 	3. Choose an edge (V) corresponding to the hidden variable you want to 
 23 | %      infer (here, s_(?,?)).
 24 | % 	4. Calculate the messages, ?(s_(?,?)), which take on values sent by 
 25 | %      each factor node connected to V.
 26 | % 	5. Pass a message from each connected factor node N to V (often written 
 27 | %      as ?_(N?V)). 
 28 | % 	6. Update the approximate posterior represented by V according to the 
 29 | %      following rule: q(s_(?,?) )? ? ?(s_(?,?))? ?(s_(?,?)). The arrow 
 30 | %      notation here indicates messages from two different factors arriving 
 31 | %      at the same edge. 
 32 | %       6A. Normalize the product of these messages so that q(s_(?,?) ) 
 33 | %           corresponds to a proper probability distribution. 
 34 | %       6B. Use this new q(s_(?,?) ) to update the messages sent by 
 35 | %           connected factors (i.e., for the next round of message passing).
 36 | % 	7. Repeat steps 4-6 sequentially for each edge.
 37 | % 	8. Steps 3-7 are then repeated until the difference between updates 
 38 | %      converges to some acceptably low value (i.e., resulting in stable 
 39 | %      posterior beliefs for all edges). 
 40 | 
 41 | %% Example 1: Fixed observations and message passing steps
 42 | 
 43 | % This section carries out marginal message passing on a graph with beliefs
 44 | % about states at two time points. In this first example, both observations 
 45 | % are fixed from the start (i.e., there are no ts as in full active inference
 46 | % models with sequentially presented observations) to provide the simplest
 47 | % example possible. We also highlight where each of the message passing
 48 | % steps described in the main text are carried out.
 49 | 
 50 | % Note that some steps (7 and 8) appear out of order when they involve loops that
 51 | % repeat earlier steps
 52 | 
 53 | % Specify generative model and initialize variables
 54 | 
 55 | rng('shuffle')
 56 | 
 57 | clear
 58 | close all
 59 | 
 60 | % priors
 61 | D = [.5 .5]';
 62 | 
 63 | % likelihood mapping
 64 | A = [.9 .1;
 65 |      .1 .9];
 66 |  
 67 | % transitions
 68 |  B = [1 0;
 69 |       0 1];
 70 | 
 71 | % number of timesteps
 72 | T = 2;
 73 | 
 74 | % number of iterations of message passing
 75 | NumIterations = 16;
 76 | 
 77 | % initialize posterior (Step 1)
 78 | for t = 1:T 
 79 |     Qs(:,t) = [.5 .5]';
 80 | end 
 81 | 
 82 | % fix observations (Step 2)
 83 | o{1} = [1 0]';
 84 | o{2} = [1 0]';
 85 | 
 86 | % iterate a set number of times (alternatively, until convergence) (Step 8)
 87 | for Ni = 1:NumIterations
 88 |     % For each edge (hidden state) (Step 7)
 89 |     for tau = 1:T
 90 |         % choose an edge (Step 3)
 91 |         q = nat_log(Qs(:,tau));
 92 |         
 93 |         % compute messages sent by D and B (Steps 4) using the posterior
 94 |         % computed in Step 6B
 95 |         if tau == 1 % first time point
 96 |             lnD = nat_log(D);                % Message 1
 97 |             lnBs = nat_log(B'*Qs(:,tau+1));  % Message 2
 98 |         elseif tau == T % last time point
 99 |             lnBs = nat_log(B*Qs(:,tau-1));  % Message 1
100 |         end 
101 |         
102 |         % likelihood (Message 3)
103 |         lnAo = nat_log(A'*o{tau});
104 |         
105 |         % Steps 5-6 (Pass messages and update the posterior)
106 |         % Since all terms are in log space, this is addition instead of
107 |         % multiplication. This corresponds to  equation 16 in the main
108 |         % text (within the softmax)
109 |         if tau == 1
110 |             q = .5*lnD + .5*lnBs + lnAo;
111 |         elseif tau == T
112 |             q = .5*lnBs + lnAo;
113 |         end
114 |         
115 |         % normalize using a softmax function to find posterior (Step 6A)
116 |         Qs(:,tau) = (exp(q)/sum(exp(q))); 
117 |         qs(Ni,:,tau) = Qs(:,tau); % store value for each iteration
118 |     end % Repeat for remaining edges (Step 7)
119 | end % Repeat until convergence/for fixed number of iterations (Step 8)
120 | 
121 | Qs; % final posterior beliefs over states
122 | 
123 | disp(' ');
124 | disp('Posterior over states q(s) in example 1:');
125 | disp(' ');
126 | disp(Qs);
127 | 
128 | figure
129 | 
130 | % firing rates (traces)
131 | qs_plot = [D' D';qs(:,:,1) qs(:,:,2)]; % add prior to starting value
132 | plot(qs_plot)
133 | title('Example 1: Approximate Posteriors (1 per edge per time point)')
134 | ylabel('q(s_t_a_u)','FontSize',12)
135 | xlabel('Message passing iterations','FontSize',12)
136 | 
137 | 
138 | %% Example 2: Sequential observations and simulation of firing rates and ERPs
139 | 
140 | % This script performs state estimation using the message passing 
141 | % algorithm introduced in Parr, Markovic, Kiebel, & Friston (2019).
142 | % This script can be thought of as the full message passing solution to 
143 | % problem 2 in the pencil and paper exercises. It also generates
144 | % simulated firing rates and ERPs in the same manner as those shown in
145 | % figs. 8, 10, 11, 14, 15, and 16. Unlike example 1, observations are
146 | % presented sequentially (i.e., two ts and two taus).
147 | 
148 | % Specify generative model and initialise variables
149 | 
150 | rng('shuffle')
151 | 
152 | clear
153 | 
154 | % priors
155 | D = [.5 .5]';
156 | 
157 | % likelihood mapping
158 | A = [.9 .1;
159 |      .1 .9];
160 |  
161 | % transitions
162 |  B = [1 0;
163 |       0 1];
164 | 
165 | % number of timesteps
166 | T = 2;
167 | 
168 | % number of iterations of message passing
169 | NumIterations = 16;
170 | 
171 | % initialize posterior (Step 1)
172 | for t = 1:T 
173 |     Qs(:,t) = [.5 .5]';
174 | end 
175 | 
176 | % fix observations sequentially (Step 2)
177 | o{1,1} = [1 0]';
178 | o{1,2} = [0 0]';
179 | o{2,1} = [1 0]';
180 | o{2,2} = [1 0]';
181 | 
182 | % Message Passing
183 | 
184 | for t = 1:T 
185 |     for Ni = 1:NumIterations % (Step 8 loop of VMP)
186 |         for tau = 1:T % (Step 7 loop of VMP)
187 |             
188 |             % initialise depolarization variable: v = ln(s)
189 |             % choose an edge (Step 3 of VMP)
190 |             v = nat_log(Qs(:,t));
191 |             
192 |             % get correct D and B for each time point (Steps 4-5 of VMP)
193 |             % using using the posterior computed in Step 6B
194 |             if tau == 1 % first time point
195 |                 % past (Message 1)
196 |                 lnD = nat_log(D);
197 |                 
198 |                 % future (Message 2)
199 |                 lnBs = nat_log(B'*Qs(:,tau+1));
200 |             elseif tau == T % last time point
201 |                 % no contribution from future (only Message 1)
202 |                 lnBs  = nat_log(B*Qs(:,tau-1));
203 |             end 
204 |             % likelihood (Message 3)
205 |             lnAo = nat_log(A'*o{t,tau});
206 |             
207 |             % calculate state prediction error: equation 24
208 |             if tau == 1
209 |                 epsilon(:,Ni,t,tau) = .5*lnD + .5*lnBs + lnAo - v;
210 |             elseif tau == T
211 |                 epsilon(:,Ni,t,tau) = .5*lnBs + lnAo - v;
212 |             end 
213 |             
214 |             % (Step 6 of VMP)
215 |             % update depolarization variable: equation 25
216 |             v = v + epsilon(:,Ni,t,tau); 
217 |             % normalize using a softmax function to find posterior:
218 |             % equation 26 (Step 6A of VMP)
219 |             Qs(:,tau) = (exp(v)/sum(exp(v)));
220 |             % store Qs for firing rate plots
221 |             xn(Ni,:,tau,t) = Qs(:,tau);
222 |         end % Repeat for remaining edges (Step 7 of VMP)
223 |     end % Repeat until convergence/for number of iterations (Step 8 of VMP)
224 | end
225 | 
226 | Qs; % final posterior beliefs over states
227 | 
228 | disp(' ');
229 | disp('Posterior over states q(s) in example 2:');
230 | disp(' ');
231 | disp(Qs);
232 | 
233 | % plots
234 |     
235 | % get firing rates into usable format
236 | num_states = 2;
237 | num_epochs = 2;
238 | time_tau = [1 2 1 2;
239 |             1 1 2 2];      
240 | for t_tau = 1:size(time_tau,2)
241 |     for epoch = 1:num_epochs
242 |         % firing rate 
243 |         firing_rate{epoch,t_tau} = xn(:,time_tau(1,t_tau),time_tau(2,t_tau),epoch);
244 |         ERP{epoch,t_tau} = gradient(firing_rate{epoch,t_tau}')';
245 |    end
246 | end
247 | 
248 | % convert cells to matrices
249 | firing_rate = spm_cat(firing_rate)';
250 | firing_rate = [zeros(length(D)*T,1)+[D; D] full(firing_rate)]; % add prior for starting value
251 | ERP = spm_cat(ERP);
252 | ERP = [zeros(length(D)*T,1)'; ERP]; % add 0 for starting value
253 | 
254 | figure
255 | 
256 | % firing rates
257 | imagesc(t,1:(num_states*num_epochs),64*(1 - firing_rate))
258 | cmap = gray(256);
259 | colormap(cmap)
260 | title('Example 2: Firing rates (Darker = higher value)')
261 | ylabel('Firing rate','FontSize',12)
262 | xlabel('Message passing iterations','FontSize',12)
263 | 
264 | figure
265 | 
266 | % firing rates (traces)
267 | plot(firing_rate')
268 | title('Example 2: Firing rates (traces)')
269 | ylabel('Firing rate','FontSize',12)
270 | xlabel('Message passing iterations','FontSize',12)
271 | 
272 | figure
273 | 
274 | % ERPs/LFPs
275 | plot(ERP)
276 | title('Example 2: Event-related potentials')
277 | ylabel('Response','FontSize',12)
278 | xlabel('Message passing iterations','FontSize',12)
279 | 
280 | %% functions
281 | 
282 | % natural log that replaces zero values with very small values for numerical reasons.
283 | function y = nat_log(x)
284 | y = log(x+exp(-16));
285 | end 
286 | 


--------------------------------------------------------------------------------
/Pencil_and_paper_exercise_solutions.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %-- Code/solutions for pencil and paper exercises --%
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  6 | % Application to Empirical Data
  7 | 
  8 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  9 | 
 10 | % Note to readers: be sure to run sections individually
 11 | 
 12 | 
 13 | %% Static perception
 14 | 
 15 | clear
 16 | close all
 17 | rng('default')
 18 | 
 19 | % priors
 20 | D = [.75 .25]';
 21 | 
 22 | % likelihood mapping
 23 | A = [.8 .2;
 24 |      .2 .8];
 25 | 
 26 | % observations
 27 | o = [1 0]';
 28 | 
 29 | % express generative model in terms of update equations
 30 | lns = nat_log(D) + nat_log(A'*o);
 31 | 
 32 | % normalize using a softmax function to find posterior
 33 | s = (exp(lns)/sum(exp(lns)));
 34 | 
 35 | disp('Posterior over states q(s):');
 36 | disp(' ');
 37 | disp(s);
 38 | 
 39 | % Note: Because the natural log of 0 is undefined, for numerical reasons 
 40 | % the nat_log function here replaces zero values with very small values. This
 41 | % means that the answers generated by this function will vary slightly from
 42 | % the exact solutions shown in the text.
 43 | 
 44 | return
 45 | 
 46 | %% Dynamic perception
 47 | 
 48 | clear
 49 | close all
 50 | rng('default')
 51 | 
 52 | % priors
 53 | D = [.5 .5]';
 54 | 
 55 | % likelihood mapping
 56 | A = [.9 .1;
 57 |      .1 .9];
 58 |  
 59 | % transitions
 60 |  B = [1 0;
 61 |       0 1];
 62 | 
 63 | % observations
 64 | o{1,1} = [1 0]';
 65 | o{1,2} = [0 0]';
 66 | o{2,1} = [1 0]';
 67 | o{2,2} = [1 0]';
 68 | 
 69 | % number of timesteps
 70 | T = 2;
 71 | 
 72 | % initialise posterior 
 73 | for t = 1:T 
 74 |     Qs(:,t) = [.5 .5]';
 75 | end 
 76 | 
 77 | for t = 1:T 
 78 |     for tau = 1:T
 79 |         % get correct D and B for each time point
 80 |         if tau == 1 % first time point
 81 |             lnD = nat_log(D);% past
 82 |             lnBs = nat_log(B'*Qs(:,tau+1));% future
 83 |         elseif tau == T % last time point
 84 |              lnBs  = nat_log(B'*Qs(:,tau-1));% no contribution from future
 85 |         end 
 86 |         % likelihood
 87 |         lnAo = nat_log(A'*o{t,tau});
 88 |         % update equation
 89 |         if tau == 1
 90 |             lns = .5*lnD + .5*lnBs + lnAo;
 91 |         elseif tau == T
 92 |             lns = .5*lnBs + lnAo;
 93 |         end 
 94 |         % normalize using a softmax function to find posterior
 95 |         Qs(:,tau) = (exp(lns)/sum(exp(lns)))
 96 |     end 
 97 | end
 98 | 
 99 | Qs % final posterior beliefs over states
100 | 
101 | disp('Posterior over states q(s):');
102 | disp(' ');
103 | disp(Qs);
104 | 
105 | %% functions
106 | 
107 | % natural log that replaces zero values with very small values for numerical reasons.
108 | function y = nat_log(x)
109 | y = log(x+.01);
110 | end 
111 | 


--------------------------------------------------------------------------------
/Prediction_error_example.m:
--------------------------------------------------------------------------------
  1 | %% Example code for simulating state and outcome prediction errors
  2 | 
  3 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  4 | % Application to Empirical Data
  5 | 
  6 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  7 | 
  8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  9 | clear
 10 | close all
 11 | %% set up model to calculate state prediction errors
 12 | % This minimizes variational free energy (keeps posterior beliefs accurate 
 13 | % while also keeping them as close as possible to prior beliefs)
 14 | 
 15 | A = [.8 .4;       
 16 |      .2 .6];         % Likelihood
 17 | 
 18 | B_t1 = [.9 .2; 
 19 |         .1 .8];      % Transition prior from previous timestep
 20 |     
 21 | B_t2 = [.2 .3; 
 22 |         .8 .7];      % Transition prior from current timestep
 23 |     
 24 | o = [1 0]';          % Observation
 25 | 
 26 | s_pi_tau = [.5 .5]'; % Prior distribution over states. Note that we here
 27 |                      % use the same value for s_pi_tau-1, s_pi_tau, and 
 28 |                      % s_pi_tau+1. But this need not be the case.
 29 |                      
 30 | s_pi_tau_minus_1 = [.5 .5]';
 31 | 
 32 | s_pi_tau_plus_1 = [.5 .5]';
 33 | 
 34 | v_0 = log(s_pi_tau);      % Depolarization term (initial value)
 35 | 
 36 | B_t2_cross_intermediate = B_t2';  % Transpose B_t2
 37 | 
 38 | B_t2_cross = spm_softmax(B_t2_cross_intermediate); % Normalize columns in transposed B_t2
 39 |                                             
 40 | %% Calculate state prediction error (single iteration)
 41 | 
 42 | state_error = 1/2*(log(B_t1*s_pi_tau_minus_1)+log(B_t2_cross*s_pi_tau_plus_1))...
 43 |               +log(A'*o)-log(s_pi_tau); % state prediction error
 44 | 
 45 | v = v_0 + state_error;      % Depolarization
 46 | 
 47 | s = (exp(v)/sum(exp(v)));        % Updated distribution over states
 48 | 
 49 | 
 50 | disp(' ');
 51 | disp('Prior Distribution over States:');
 52 | disp(s_pi_tau);
 53 | disp(' ');
 54 | disp('State Prediction Error:');
 55 | disp(state_error);
 56 | disp(' ');
 57 | disp('Depolarization:');
 58 | disp(v);
 59 | disp(' ');
 60 | disp('Posterior Distribution over States:');
 61 | disp(s);
 62 | disp(' ');
 63 | 
 64 | return
 65 | %% set up model to calculate outcome prediction errors 
 66 | % This minimizes expected free energy (maximizes reward and
 67 | % information-gain)
 68 | 
 69 | clear
 70 | close all
 71 | 
 72 | % Calculate risk (reward-seeking) term under two policies
 73 | 
 74 | A = [.9 .1;
 75 |      .1 .9];   % Likelihood
 76 |  
 77 | S1 = [.9 .1]'; % States under policy 1
 78 | S2 = [.5 .5]'; % States under policy 2
 79 | 
 80 | C = [1 0]';    % Preferred outcomes
 81 | 
 82 | o_1 = A*S1;    % Predicted outcomes under policy 1
 83 | o_2 = A*S2;    % Predicted outcomes under policy 2
 84 | z = exp(-16);  % Small number added to preference distribution to avoid log(0)
 85 | 
 86 | risk_1 = dot(o_1,log(o_1) - log(C+z)); % Risk under policy 1
 87 | 
 88 | risk_2 = dot(o_2,log(o_2) - log(C+z)); % Risk under policy 2 
 89 | 
 90 | disp(' ');
 91 | disp('Risk Under Policy 1:');
 92 | disp(risk_1);
 93 | disp(' ');
 94 | disp('Risk Under Policy 2:');
 95 | disp(risk_2);
 96 | disp(' ');
 97 | 
 98 | 
 99 | % Calculate ambiguity (information-seeking) term under two policies
100 | 
101 | A = [.4 .2;
102 |      .6 .8];   % Likelihood
103 |  
104 | s1 = [.9 .1]'; % States under policy 1
105 | s2 = [.1 .9]'; % States under policy 2
106 | 
107 | 
108 | ambiguity_1 = -dot(diag(A'*log(A)),s1); % Ambiguity under policy 1
109 | 
110 | ambiguity_2 = -dot(diag(A'*log(A)),s2); % Ambiguity under policy 2
111 | 
112 | disp(' ');
113 | disp('Ambiguity Under Policy 1:');
114 | disp(ambiguity_1);
115 | disp(' ');
116 | disp('Ambiguity Under Policy 2:');
117 | disp(ambiguity_2);
118 | disp(' ');
119 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Active-Inference-Tutorial-Scripts
 2 | 
 3 | Supplementary scripts for Step-by-step active inference modelling tutorial
 4 | 
 5 | By Ryan Smith and Christopher Whyte
 6 | 
 7 | Step_by_Step_AI_Guide.m: 
 8 | 
 9 | This is the main tutorial script. It illustrates how to build a partially observable Markov decision process (POMDP) model within the active inference framework, using  a simple explore-exploit task as an example. It shows how to run single-trial and multi-trial simulations including perception, decision-making, and learning. It also shows how to generate simulated neuronal responses. It further illustrates how to fit task models to empirical data for behavioral studies and do subsequent Bayesian group analyses. 
10 | NOTE: This code was updated on 8/28/24 to improve the way forgetting rates are implemented. Unlike in the original published tutorial, this updated version specifies that greater omega values promote greater forgetting. Initial values for concentration parameters also now act as a floor, preventing these parameters from evolving toward implausibly low values over time. 
11 | 
12 | Step_by_Step_Hierarchical_Model:
13 | 
14 | Separate script illustrating how to build a hierarchical (deep temporal) model, using a commonly used oddball task paradigm as an example. This also shows how to simulate predicted neuronal responses (event-related potentials) observed using this task in empirical studies.
15 | 
16 | EFE_Precision_Updating:
17 | 
18 | Separate script that allows the reader to simulate updates in the expected free energy precision (gamma) through updates in its prior (beta). At the top of the script you can choose values for the prior over policies, expected free energy over policies, and variational free energy over policies after a new observation, as well as the initial prior on expected precision. The script will then simulate 16 iterative updates and plot the resulting changes in gamma. By changing the initial values of the priors and free energies, you can get more of an intuition about the dynamics of these updates and how they depend on the relationship between the initial values that are chosen.
19 | 
20 | VFE_calculation_example:
21 | 
22 | Separate script that allows the reader to calculate variational free energy for approximate posterior beliefs given a new observation. The reader can specify a generative model (priors and likelihood matrix) and an observation, and then experiment with how variational free energy is reduced as approximate posterior beliefs approach the true posteriors.
23 | 
24 | Prediction_error_example:
25 | 
26 | Separate script that allows the reader to calculate state and outcome prediction errors. These minimize variational and expected free energy, respectively. Minimizing state prediction errors maintains accurate beliefs (while also changing beliefs as little as possible). Minimizing outcome prediction errors maximizes reward and information gain.
27 | 
28 | Message_passing_example:
29 | 
30 | Separate script that allows the reader to perform (marginal) message passing. In the first example, the code follows the message passing steps described in the main text (section 2) one by one. In the second example, this is extended to also calculate firing rates and ERPs associated with message passing in the neural process theory associated with active inference.
31 | 
32 | EFE_learning_novelty_term:
33 | 
34 | Separate script that allows the reader to calculate the novelty term that is added to the expected free energy when learning the Dirichlet concentration parameters (a) for the likelihood matrix (A). Small concentration parameters lead to a larger value for the novelty term, which is subtracted from the total EFE value for a policy. Therefore, less confidence in beliefs about state-outcome mappings in the A matrix lead the agent to select policies that will increase confidence in those beliefs ('parameter exploration').
35 | 
36 | Pencil_and_paper_exercise_solutions:
37 | 
38 | Solutions to 'pencil and paper' exercises provided in the tutorial paper. These are provided to aid the reader in developing intuitions for the equations used in active inference.
39 | 
40 | spm_MDP_VB_X_tutorial:
41 | 
42 | Tutorial version of the standard routine for running active inference (POMDP) models.
43 | NOTE: This code was updated on 8/28/24 to improve the way forgetting rates are implemented. Unlike in the original published tutorial, this updated version specifies that greater omega values promote greater forgetting. Initial values for concentration parameters also now act as a floor, preventing these parameters from evolving toward implausibly low values over time.
44 | 
45 | Simplified_simulation_script:
46 | 
47 | Simplified and heavily commented version of the spm_MDB_VB_X_tutorial script. This is provided to make it easier for the reader to understand how the standard simulation routines work.
48 | NOTE: This code was updated on 8/28/24 to improve the way forgetting rates are implemented. Unlike in the original published tutorial, this updated version specifies that greater omega values promote greater forgetting. Initial values for concentration parameters also now act as a floor, preventing these parameters from evolving toward implausibly low values over time.
49 | 
50 | Estimate_parameters: 
51 | 
52 | Script called by the main tutorial script for estimating parameters on (simulated) behavioral data.
53 | 
54 | NOTE: Additional scripts are secondary functions called by the main scripts for plotting simulation outputs.
55 | 


--------------------------------------------------------------------------------
/Simplified_simulation_script.m:
--------------------------------------------------------------------------------
   1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2 | %-- Simplified Simulation Script --%
   3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   4 | 
   5 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
   6 | % Application to Empirical Data
   7 | 
   8 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
   9 | % UPDATED: 8/28/2024 (modified forgetting rate implementation)
  10 | rng('shuffle')
  11 | close all
  12 | clear
  13 | 
  14 | % This code simulates a single trial of the explore-exploit task introduced 
  15 | % in the active inference tutorial using a stripped down version of the model
  16 | % inversion scheme implemented in the spm_MDP_VB_X.m script. 
  17 | 
  18 | % Note that this implementation uses the marginal message passing scheme
  19 | % described in (Parr et al., 2019), and will return very slightly 
  20 | % (negligably) different values than the spm_MDP_VB_X.m script in 
  21 | % simulation results.
  22 | 
  23 | % Parr, T., Markovic, D., Kiebel, S., & Friston, K. J. (2019). Neuronal 
  24 | % message passing using Mean-field, Bethe, and Marginal approximations. 
  25 | % Scientific Reports, 9, 1889.
  26 | 
  27 | %% Simulation Settings
  28 | 
  29 | % To simulate the task when prior beliefs (d) are separated from the 
  30 | % generative process, set the 'Gen_model' variable directly
  31 | % below to 1. To do so for priors (d), likelihoods (a), and habits (e), 
  32 | % set the 'Gen_model' variable to 2:
  33 | 
  34 | Gen_model = 1; % as in the main tutorial code, many parameters can be adjusted
  35 |                % in the model setup, within the explore_exploit_model
  36 |                % function starting on line 810. This includes, among
  37 |                % others (similar to in the main tutorial script):
  38 | 
  39 | % prior beliefs about context (d): alter line 876
  40 | 
  41 | % beliefs about hint accuracy in the likelihood (a): alter lines 996-998
  42 | 
  43 | % to adjust habits (e), alter line 1155
  44 | 
  45 | %% Specify Generative Model
  46 | 
  47 | MDP = explore_exploit_model(Gen_model);
  48 | 
  49 | % Model specification is reproduced at the bottom of this script (starting 
  50 | % on line 810), but see main tutorial script for more complete walk-through
  51 | 
  52 | %% Model Inversion to Simulate Behavior
  53 | %==========================================================================
  54 | 
  55 | % Normalize generative process and generative model
  56 | %--------------------------------------------------------------------------
  57 | 
  58 | % before sampling from the generative process and inverting the generative 
  59 | % model we need to normalize the columns of the matrices so that they can 
  60 | % be treated as a probability distributions
  61 | 
  62 | % generative process
  63 | A = MDP.A;         % Likelihood matrices
  64 | B = MDP.B;         % Transition matrices
  65 | C = MDP.C;         % Preferences over outcomes
  66 | D = MDP.D;         % Priors over initial states    
  67 | T = MDP.T;         % Time points per trial
  68 | V = MDP.V;         % Policies
  69 | beta = MDP.beta;   % Expected free energy precision
  70 | alpha = MDP.alpha; % Action precision
  71 | eta = MDP.eta;     % Learning rate
  72 | omega = MDP.omega; % Forgetting rate
  73 | 
  74 | A = col_norm(A);
  75 | B = col_norm(B);
  76 | D = col_norm(D);
  77 | 
  78 | % generative model (lowercase matrices/vectors are beliefs about capitalized matrices/vectors)
  79 | 
  80 | NumPolicies = MDP.NumPolicies; % Number of policies
  81 | NumFactors = MDP.NumFactors;   % Number of state factors
  82 | 
  83 | % Store initial paramater values of generative model for free energy 
  84 | % calculations after learning
  85 | %--------------------------------------------------------------------------
  86 | 
  87 | % 'complexity' of d vector concentration paramaters
  88 | if isfield(MDP,'d')
  89 |     for factor = 1:numel(MDP.d)
  90 |         % store d vector values before learning
  91 |         d_prior{factor} = MDP.d{factor};
  92 |         % compute "complexity" - lower concentration paramaters have
  93 |         % smaller values creating a lower expected free energy thereby
  94 |         % encouraging 'novel' behaviour 
  95 |         d_complexity{factor} = spm_wnorm(d_prior{factor});
  96 |     end 
  97 | end 
  98 | 
  99 | if isfield(MDP,'a')
 100 |     % complexity of a maxtrix concentration parameters
 101 |     for modality = 1:numel(MDP.a)
 102 |         a_prior{modality} = MDP.a{modality};
 103 |         a_complexity{modality} = spm_wnorm(a_prior{modality}).*(a_prior{modality} > 0);
 104 |     end
 105 | end  
 106 | 
 107 | % Normalise matrices before model inversion/inference
 108 | %--------------------------------------------------------------------------
 109 | 
 110 | % normalize A matrix
 111 | if isfield(MDP,'a')
 112 |     a = col_norm(MDP.a);
 113 | else 
 114 |     a = col_norm(MDP.A);
 115 | end 
 116 | 
 117 | % normalize B matrix
 118 | if isfield(MDP,'b')
 119 |     b = col_norm(MDP.b);
 120 | else 
 121 |     b = col_norm(MDP.B);
 122 | end 
 123 | 
 124 | % normalize C and transform into log probability
 125 | for ii = 1:numel(C)
 126 |     C{ii} = MDP.C{ii} + 1/32;
 127 |     for t = 1:T
 128 |         C{ii}(:,t) = nat_log(exp(C{ii}(:,t))/sum(exp(C{ii}(:,t))));
 129 |     end 
 130 | end 
 131 | 
 132 | % normalize D vector
 133 | if isfield(MDP,'d')
 134 |     d = col_norm(MDP.d);
 135 | else 
 136 |     d = col_norm(MDP.D);
 137 | end 
 138 | 
 139 | % normalize E vector
 140 | if isfield(MDP,'e')
 141 |     E = MDP.e;
 142 |     E = E./sum(E);
 143 | elseif isfield(MDP,'E')
 144 |     E = MDP.E;
 145 |     E = E./sum(E);
 146 | else
 147 |     E = col_norm(ones(NumPolicies,1));
 148 |     E = E./sum(E);
 149 | end
 150 | 
 151 | % Initialize variables
 152 | %--------------------------------------------------------------------------
 153 | 
 154 | % numbers of transitions, policies and states
 155 | NumModalities = numel(a);                    % number of outcome factors
 156 | NumFactors = numel(d);                       % number of hidden state factors
 157 | NumPolicies = size(V,2);                     % number of allowable policies
 158 | for factor = 1:NumFactors
 159 |     NumStates(factor) = size(b{factor},1);   % number of hidden states
 160 |     NumControllable_transitions(factor) = size(b{factor},3); % number of hidden controllable hidden states for each factor (number of B matrices)
 161 | end
 162 | 
 163 | % initialize the approximate posterior over states conditioned on policies
 164 | % for each factor as a flat distribution over states at each time point
 165 | for policy = 1:NumPolicies
 166 |     for factor = 1:NumFactors
 167 |         NumStates(factor) = length(D{factor}); % number of states in each hidden state factor
 168 |         state_posterior{factor} = ones(NumStates(factor),T,policy)/NumStates(factor); 
 169 |     end  
 170 | end 
 171 | 
 172 | % initialize the approximate posterior over policies as a flat distribution 
 173 | % over policies at each time point
 174 | policy_posteriors = ones(NumPolicies,T)/NumPolicies; 
 175 | 
 176 | % initialize posterior over actions
 177 | chosen_action = zeros(ndims(B),T-1);
 178 |     
 179 | % if there is only one policy
 180 | for factors = 1:NumFactors 
 181 |     if NumControllable_transitions(factors) == 1
 182 |         chosen_action(factors,:) = ones(1,T-1);
 183 |     end
 184 | end
 185 | MDP.chosen_action = chosen_action;
 186 | 
 187 | % initialize expected free energy precision (beta)
 188 | posterior_beta = 1;
 189 | gamma(1) = 1/posterior_beta; % expected free energy precision
 190 |     
 191 | % message passing variables
 192 | TimeConst = 4; % time constant for gradient descent
 193 | NumIterations  = 16; % number of message passing iterations
 194 | 
 195 | % Lets go! Message passing and policy selection 
 196 | %--------------------------------------------------------------------------
 197 | 
 198 | for t = 1:T % loop over time points  
 199 |     
 200 |     % sample generative process
 201 |     %----------------------------------------------------------------------
 202 |     
 203 |     for factor = 1:NumFactors % number of hidden state factors
 204 |         % Here we sample from the prior distribution over states to obtain the
 205 |         % state at each time point. At T = 1 we sample from the D vector, and at
 206 |         % time T > 1 we sample from the B matrix. To do this we make a vector 
 207 |         % containing the cumulative sum of the columns (which we know sum to one), 
 208 |         % generate a random number (0-1),and then use the find function to take 
 209 |         % the first number in the cumulative sum vector that is >= the random number. 
 210 |         % For example if our D vector is [.5 .5] 50% of the time the element of the 
 211 |         % vector corresponding to the state one will be >= to the random number. 
 212 | 
 213 |         % sample states 
 214 |         if t == 1
 215 |             prob_state = D{factor}; % sample initial state T = 1
 216 |         elseif t>1
 217 |             prob_state = B{factor}(:,true_states(factor,t-1),MDP.chosen_action(factor,t-1));
 218 |         end 
 219 |         true_states(factor,t) = find(cumsum(prob_state)>= rand,1);
 220 |     end 
 221 | 
 222 |     % sample observations
 223 |     for modality = 1:NumModalities % loop over number of outcome modalities
 224 |         outcomes(modality,t) = find(cumsum(a{modality }(:,true_states(1,t),true_states(2,t)))>=rand,1);
 225 |     end
 226 |     
 227 |     % express observations as a structure containing a 1 x observations 
 228 |     % vector for each modality with a 1 in the position corresponding to
 229 |     % the observation recieved on that trial
 230 |     for modality = 1:NumModalities
 231 |         vec = zeros(1,size(a{modality},1));
 232 |         index = outcomes(modality,t);
 233 |         vec(1,index) = 1;
 234 |         O{modality,t} = vec;
 235 |         clear vec
 236 |     end 
 237 |     
 238 |     % marginal message passing (minimize F and infer posterior over states)
 239 |     %----------------------------------------------------------------------
 240 |     
 241 |     for policy = 1:NumPolicies
 242 |         for Ni = 1:NumIterations % number of iterations of message passing  
 243 |             for factor = 1:NumFactors
 244 |             lnAo = zeros(size(state_posterior{factor})); % initialise matrix containing the log likelihood of observations
 245 |                 for tau = 1:T % loop over tau
 246 |                     v_depolarization = nat_log(state_posterior{factor}(:,tau,policy)); % convert approximate posteriors into depolarisation variable v 
 247 |                     if tau<t+1 % Collect an observation from the generative process when tau <= t
 248 |                         for modal = 1:NumModalities % loop over observation modalities
 249 |                             % this line uses the observation at each tau to index
 250 |                             % into the A matrix to grab the likelihood of each hidden state
 251 |                             lnA = permute(nat_log(a{modal}(outcomes(modal,tau),:,:,:,:,:)),[2 3 4 5 6 1]);                           
 252 |                             for fj = 1:NumFactors
 253 |                                 % dot product with state vector from other hidden state factors 
 254 |                                 % (this is what allows hidden states to interact in the likleihood mapping)    
 255 |                                 if fj ~= factor        
 256 |                                     lnAs = md_dot((lnA),state_posterior{fj}(:,tau),fj);
 257 |                                     clear lnA
 258 |                                     lnA = lnAs; 
 259 |                                     clear lnAs
 260 |                                 end
 261 |                             end
 262 |                             lnAo(:,tau) = lnAo(:,tau) + lnA;
 263 |                         end
 264 |                     end
 265 |                     % 'forwards' and 'backwards' messages at each tau
 266 |                     if tau == 1 % first tau
 267 |                         lnD = nat_log(d{factor}); % forward message
 268 |                         lnBs = nat_log(B_norm(b{factor}(:,:,V(tau,policy,factor))')*state_posterior{factor}(:,tau+1,policy));% backward message
 269 |                     elseif tau == T % last tau                    
 270 |                         lnD  = nat_log((b{factor}(:,:,V(tau-1,policy,factor)))*state_posterior{factor}(:,tau-1,policy));% forward message 
 271 |                         lnBs = zeros(size(d{factor})); % backward message
 272 |                     else % 1 > tau > T
 273 |                         lnD  = nat_log(b{factor}(:,:,V(tau-1,policy,factor))*state_posterior{factor}(:,tau-1,policy));% forward message
 274 |                         lnBs = nat_log(B_norm(b{factor}(:,:,V(tau,policy,factor))')*state_posterior{factor}(:,tau+1,policy));% backward message
 275 |                     end
 276 |                     % here we both combine the messages and perform a gradient
 277 |                     % descent on the posterior 
 278 |                     v_depolarization = v_depolarization + (.5*lnD + .5*lnBs + lnAo(:,tau) - v_depolarization)/TimeConst;
 279 |                     % variational free energy at each time point
 280 |                     Ft(tau,Ni,t,factor) = state_posterior{factor}(:,tau,policy)'*(.5*lnD + .5*lnBs + lnAo(:,tau) - nat_log(state_posterior{factor}(:,tau,policy)));
 281 |                     % update posterior by running v through a softmax 
 282 |                     state_posterior{factor}(:,tau,policy) = (exp(v_depolarization)/sum(exp(v_depolarization)));    
 283 |                     % store state_posterior (normalised firing rate) from each epoch of
 284 |                     % gradient descent for each tau
 285 |                     normalized_firing_rates{factor}(Ni,:,tau,t,policy) = state_posterior{factor}(:,tau,policy);                   
 286 |                     % store v (non-normalized log posterior or 'membrane potential') 
 287 |                     % from each epoch of gradient descent for each tau
 288 |                     prediction_error{factor}(Ni,:,tau,t,policy) = v_depolarization;
 289 |                     clear v
 290 |                 end
 291 |             end
 292 |         end        
 293 |       % variational free energy for each policy (F)
 294 |       Fintermediate = sum(Ft,4); % sum over hidden state factors (Fintermediate is an intermediate F value)
 295 |       Fintermediate = squeeze(sum( Fintermediate,1)); % sum over tau and squeeze into 16x3 matrix
 296 |       % store variational free energy at last iteration of message passing
 297 |       F(policy,t) = Fintermediate(end);
 298 |       clear Fintermediate
 299 |     end 
 300 |     
 301 |     % expected free energy (G) under each policy
 302 |     %----------------------------------------------------------------------
 303 |     
 304 |     % initialize intermediate expected free energy variable (Gintermediate) for each policy
 305 |     Gintermediate = zeros(NumPolicies,1);  
 306 |     % policy horizon for 'counterfactual rollout' for deep policies (described below)
 307 |     horizon = T;
 308 | 
 309 |     % loop over policies
 310 |     for policy = 1:NumPolicies
 311 |         
 312 |         % Bayesian surprise about 'd'
 313 |         if isfield(MDP,'d')
 314 |             for factor = 1:NumFactors
 315 |                 Gintermediate(policy) = Gintermediate(policy) - d_complexity{factor}'*state_posterior{factor}(:,1,policy);
 316 |             end 
 317 |         end
 318 |          
 319 |         % This calculates the expected free energy from time t to the
 320 |         % policy horizon which, for deep policies, is the end of the trial T.
 321 |         % We can think about this in terms of a 'counterfactual rollout'
 322 |         % that asks, "what policy will best resolve uncertainty about the 
 323 |         % mapping between hidden states and observations (maximize
 324 |         % epistemic value) and bring about preferred outcomes"?
 325 |    
 326 |         for timestep = t:horizon
 327 |             % grab expected states for each policy and time
 328 |             for factor = 1:NumFactors
 329 |                 Expected_states{factor} = state_posterior{factor}(:,timestep,policy);
 330 |             end 
 331 |             
 332 |             % calculate epistemic value term (Bayesian Surprise) and add to
 333 |             % expected free energy
 334 |             Gintermediate(policy) = Gintermediate(policy) + G_epistemic_value(a(:),Expected_states(:));
 335 |             
 336 |             for modality = 1:NumModalities
 337 |                 % prior preferences about outcomes
 338 |                 predictive_observations_posterior = cell_md_dot(a{modality},Expected_states(:)); %posterior over observations
 339 |                 Gintermediate(policy) = Gintermediate(policy) + predictive_observations_posterior'*(C{modality}(:,timestep));
 340 | 
 341 |                 % Bayesian surprise about parameters 
 342 |                 if isfield(MDP,'a')
 343 |                     Gintermediate(policy) = Gintermediate(policy) - cell_md_dot(a_complexity{modality},{predictive_observations_posterior Expected_states{:}});
 344 |                 end
 345 |             end 
 346 |         end 
 347 |     end 
 348 |     
 349 |     % store expected free energy for each time point and clear intermediate
 350 |     % variable
 351 |     G(:,t) = Gintermediate;
 352 |     clear Gintermediate
 353 |     
 354 |     % infer policy, update precision and calculate BMA over policies
 355 |     %----------------------------------------------------------------------
 356 |     
 357 | 
 358 |     % loop over policy selection using variational updates to gamma to
 359 |     % estimate the optimal contribution of expeceted free energy to policy
 360 |     % selection. This has the effect of down-weighting the contribution of 
 361 |     % variational free energy to the posterior over policies when the 
 362 |     % difference between the prior and posterior over policies is large
 363 |     
 364 |     if t > 1
 365 |         gamma(t) = gamma(t - 1);
 366 |     end
 367 |     for ni = 1:Ni 
 368 |         % posterior and prior over policies
 369 |         policy_priors(:,t) = exp(log(E) + gamma(t)*G(:,t))/sum(exp(log(E) + gamma(t)*G(:,t)));% prior over policies
 370 |         policy_posteriors(:,t) = exp(log(E) + gamma(t)*G(:,t) + F(:,t))/sum(exp(log(E) + gamma(t)*G(:,t) + F(:,t))); % posterior over policies
 371 |         
 372 |         % expected free energy precision (beta)
 373 |         G_error = (policy_posteriors(:,t) - policy_priors(:,t))'*G(:,t);
 374 |         beta_update = posterior_beta - beta + G_error; % free energy gradient w.r.t gamma
 375 |         posterior_beta = posterior_beta - beta_update/2; 
 376 |         gamma(t) = 1/posterior_beta;
 377 |         
 378 |         % simulate dopamine responses
 379 |         n = (t - 1)*Ni + ni;
 380 |         gamma_update(n,1) = gamma(t); % simulated neural encoding of precision (posterior_beta^-1)
 381 |                                       % at each iteration of variational updating
 382 |         policy_posterior_updates(:,n) = policy_posteriors(:,t); % neural encoding of policy posteriors
 383 |         policy_posterior(1:NumPolicies,t) = policy_posteriors(:,t); % record posterior over policies 
 384 |     end 
 385 |     
 386 |     % bayesian model average of hidden states (averaging over policies)
 387 |     for factor = 1:NumFactors
 388 |         for tau = 1:T
 389 |             % reshape state_posterior into a matrix of size NumStates(factor) x NumPolicies and then dot with policies
 390 |             BMA_states{factor}(:,tau) = reshape(state_posterior{factor}(:,tau,:),NumStates(factor),NumPolicies)*policy_posteriors(:,t);
 391 |         end
 392 |     end
 393 |     
 394 |     % action selection
 395 |     %----------------------------------------------------------------------
 396 |     
 397 |     % The probability of emitting each particular action is a softmax function 
 398 |     % of a vector containing the probability of each action summed over 
 399 |     % each policy. E.g. if there are three policies, a posterior over policies of 
 400 |     % [.4 .4 .2], and two possible actions, with policy 1 and 2 leading 
 401 |     % to action 1, and policy 3 leading to action 2, the probability of 
 402 |     % each action is [.8 .2]. This vector is then passed through a softmax function 
 403 |     % controlled by the inverse temperature parameter alpha which by default is extremely 
 404 |     % large (alpha = 512), leading to deterministic selection of the action with 
 405 |     % the highest probability. 
 406 |     
 407 |     if t < T
 408 | 
 409 |         % marginal posterior over action (for each factor)
 410 |         action_posterior_intermediate = zeros([NumControllable_transitions(end),1])';
 411 | 
 412 |         for policy = 1:NumPolicies % loop over number of policies
 413 |             sub = num2cell(V(t,policy,:));
 414 |             action_posterior_intermediate(sub{:}) = action_posterior_intermediate(sub{:}) + policy_posteriors(policy,t);
 415 |         end
 416 |         
 417 |         % action selection (softmax function of action potential)
 418 |         sub = repmat({':'},1,NumFactors);
 419 |         action_posterior_intermediate(:) = (exp(alpha*log(action_posterior_intermediate(:)))/sum(exp(alpha*log(action_posterior_intermediate(:))))); 
 420 |         action_posterior(sub{:},t) = action_posterior_intermediate;
 421 | 
 422 |         % next action - sampled from marginal posterior
 423 |         ControlIndex = find(NumControllable_transitions>1);
 424 |         action = (1:1:NumControllable_transitions(ControlIndex)); % 1:number of control states
 425 |         for factors = 1:NumFactors 
 426 |             if NumControllable_transitions(factors) > 2 % if there is more than one control state
 427 |                 ind = find(rand < cumsum(action_posterior_intermediate(:)),1);  
 428 |                 MDP.chosen_action(factor,t) = action(ind);
 429 |             end
 430 |         end
 431 | 
 432 |     end % end of state and action selection   
 433 |          
 434 | end % end loop over time points
 435 | 
 436 | % accumulate concentration paramaters (learning)
 437 | %--------------------------------------------------------------------------
 438 | 
 439 | for t = 1:T
 440 |     % a matrix (likelihood)
 441 |     if isfield(MDP,'a')
 442 |         for modality = 1:NumModalities
 443 |             a_learning = O(modality,t)';
 444 |             for  factor = 1:NumFactors
 445 |                 a_learning = spm_cross(a_learning,BMA_states{factor}(:,t));
 446 |             end
 447 |             a_learning = a_learning.*(MDP.a{modality} > 0);
 448 |             MDP.a{modality} = (MDP.a{modality}-MDP.a_0{modality})*(1-omega) + MDP.a_0{modality} + a_learning*eta;
 449 |         end
 450 |     end 
 451 | end 
 452 |  
 453 | % initial hidden states d (priors):
 454 | if isfield(MDP,'d')
 455 |     for factor = 1:NumFactors
 456 |         i = MDP.d{factor} > 0;
 457 |         MDP.d{factor}(i) = (1-omega)*(MDP.d{factor}(i)-MDP.d_0{factor}(i)) + MDP.d_0{factor}(i) + eta*BMA_states{factor}(i,1);
 458 |     end
 459 | end
 460 | 
 461 | % policies e (habits)
 462 | if isfield(MDP,'e')
 463 |     MDP.e = (1-omega)*(MDP.e - MDP.e_0) + MDP.e_0 + eta*policy_posterior(:,T);
 464 | end
 465 | 
 466 | % Free energy of concentration parameters
 467 | %--------------------------------------------------------------------------
 468 | 
 469 | % Here we calculate the KL divergence (negative free energy) of the concentration 
 470 | % parameters of the learned distribution before and after learning has occured on 
 471 | % each trial. 
 472 | 
 473 | % (negative) free energy of a
 474 | for modality = 1:NumModalities
 475 |     if isfield(MDP,'a')
 476 |         MDP.Fa(modality) = - spm_KL_dir(MDP.a{modality},a_prior{modality});
 477 |     end
 478 | end
 479 | 
 480 | % (negative) free energy of d
 481 | for factor = 1:NumFactors
 482 |     if isfield(MDP,'d')
 483 |         MDP.Fd(factor) = - spm_KL_dir(MDP.d{factor},d_prior{factor});
 484 |     end
 485 | end
 486 | 
 487 | % (negative) free energy of e
 488 | if isfield(MDP,'e')
 489 |     MDP.Fe = - spm_KL_dir(MDP.e,E);
 490 | end
 491 | 
 492 | % simulated dopamine responses (beta updates)
 493 | %----------------------------------------------------------------------
 494 | % "deconvolution" of neural encoding of precision
 495 | if NumPolicies > 1
 496 |     phasic_dopamine = 8*gradient(gamma_update) + gamma_update/8;
 497 | else
 498 |     phasic_dopamine = [];
 499 |     gamma_update = [];
 500 | end
 501 | 
 502 | % Bayesian model average of neuronal variables; normalized firing rate and
 503 | % prediction error
 504 | %----------------------------------------------------------------------
 505 | for factor = 1:NumFactors
 506 |     BMA_normalized_firing_rates{factor} = zeros(Ni,NumStates(factor),T,T);
 507 |     BMA_prediction_error{factor} = zeros(Ni,NumStates(factor),T,T);
 508 |     for t = 1:T
 509 |         for policy = 1:NumPolicies 
 510 |             %normalised firing rate
 511 |             BMA_normalized_firing_rates{factor}(:,:,1:T,t) = BMA_normalized_firing_rates{factor}(:,:,1:T,t) + normalized_firing_rates{factor}(:,:,1:T,t,policy)*policy_posterior(policy,t);
 512 |             %depolarisation
 513 |             BMA_prediction_error{factor}(:,:,1:T,t) = BMA_prediction_error{factor}(:,:,1:T,t) + prediction_error{factor}(:,:,1:T,t,policy)*policy_posterior(policy,t);
 514 |         end
 515 |     end
 516 | end
 517 | 
 518 | % store variables in MDP structure
 519 | %----------------------------------------------------------------------
 520 | 
 521 | MDP.T  = T;                                   % number of belief updates
 522 | MDP.O  = O;                                   % outcomes
 523 | MDP.P  = action_posterior;                    % probability of action at time 1,...,T - 1
 524 | MDP.R  = policy_posterior;                    % Posterior over policies
 525 | MDP.Q  = state_posterior(:);                  % conditional expectations over N states
 526 | MDP.X  = BMA_states(:);                       % Bayesian model averages over T outcomes
 527 | MDP.C  = C(:);                                % preferences
 528 | MDP.G  = G;                                   % expected free energy
 529 | MDP.F  = F;                                   % variational free energy
 530 | 
 531 | MDP.s = true_states;                          % states
 532 | MDP.o = outcomes;                             % outcomes
 533 | MDP.u = MDP.chosen_action;                    % actions
 534 | 
 535 | MDP.w  = gamma;                               % posterior expectations of expected free energy precision (gamma)
 536 | MDP.vn = BMA_prediction_error(:);             % simulated neuronal prediction error
 537 | MDP.xn = BMA_normalized_firing_rates(:);      % simulated neuronal encoding of hidden states
 538 | MDP.un = policy_posterior_updates;            % simulated neuronal encoding of policies
 539 | MDP.wn = gamma_update;                        % simulated neuronal encoding of policy precision (beta)
 540 | MDP.dn = phasic_dopamine;                     % simulated dopamine responses (deconvolved)
 541 | 
 542 | %% Plot
 543 | %==========================================================================
 544 | 
 545 | % trial behaviour
 546 | spm_figure('GetWin','Figure 1'); clf    % display behavior
 547 | spm_MDP_VB_trial(MDP); 
 548 | 
 549 | % neuronal responces
 550 | spm_figure('GetWin','Figure 2'); clf    % display behavior
 551 | spm_MDP_VB_LFP(MDP,[],1); 
 552 | 
 553 | %% Functions
 554 | %==========================================================================
 555 | 
 556 | % normalise vector columns
 557 | function b = col_norm(B)
 558 | numfactors = numel(B);
 559 | for f = 1:numfactors
 560 |     bb{f} = B{f}; 
 561 |     z = sum(bb{f},1); %create normalizing constant from sum of columns
 562 |     bb{f} = bb{f}./z; %divide columns by constant
 563 | end 
 564 | b = bb;
 565 | end 
 566 | 
 567 | % norm the elements of B transpose as required by MMP
 568 | function b = B_norm(B)
 569 | bb = B; 
 570 | z = sum(bb,1); %create normalizing constant from sum of columns
 571 | bb = bb./z; % divide columns by constant
 572 | bb(isnan(bb)) = 0; %replace NaN with zero
 573 | b = bb;
 574 | % insert zero value condition
 575 | end 
 576 | 
 577 | % natural log that replaces zero values with very small values for numerical reasons.
 578 | function y = nat_log(x)
 579 | y = log(x+exp(-16));
 580 | end 
 581 | 
 582 | % dot product along dimension f
 583 | function B = md_dot(A,s,f)
 584 | if f == 1
 585 |     B = A'*s;
 586 | elseif f == 2
 587 |     B = A*s;
 588 | end 
 589 | end
 590 | 
 591 | 
 592 | %--- SPM functions
 593 | %==========================================================================
 594 | 
 595 | % These functions have been replicated (with permission) from the spm
 596 | % toolbox. To aid in understading, some variable names have been changed.
 597 | 
 598 | function X = cell_md_dot(X,x)
 599 | % initialize dimensions
 600 | DIM = (1:numel(x)) + ndims(X) - numel(x);
 601 | 
 602 | % compute dot product using recursive sums (and bsxfun)
 603 | for d = 1:numel(x)
 604 |     s         = ones(1,ndims(X));
 605 |     s(DIM(d)) = numel(x{d});
 606 |     X         = bsxfun(@times,X,reshape(full(x{d}),s));
 607 |     X         = sum(X,DIM(d));
 608 | end
 609 | 
 610 | % eliminate singleton dimensions
 611 | X = squeeze(X);
 612 | end 
 613 | 
 614 | % epistemic value term (Bayesian surprise) in expected free energy 
 615 | function G = G_epistemic_value(A,s)
 616 |     
 617 | % auxiliary function for Bayesian suprise or mutual information
 618 | % FORMAT [G] = spm_MDP_G(A,s)
 619 | %
 620 | % A   - likelihood array (probability of outcomes given causes)
 621 | % s   - probability density of causes
 622 | 
 623 | % Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging
 624 | 
 625 | % Karl Friston
 626 | % $Id: spm_MDP_G.m 7306 2018-05-07 13:42:02Z karl $
 627 | 
 628 | % probability distribution over the hidden causes: i.e., Q(s)
 629 | 
 630 | qx = spm_cross(s); % this is the outer product of the posterior over states
 631 |                    % calculated with respect to itself
 632 | 
 633 | % accumulate expectation of entropy: i.e., E[lnP(o|s)]
 634 | G     = 0;
 635 | qo    = 0;
 636 | for i = find(qx > exp(-16))'
 637 |     % probability over outcomes for this combination of causes
 638 |     po   = 1;
 639 |     for g = 1:numel(A)
 640 |         po = spm_cross(po,A{g}(:,i));
 641 |     end
 642 |     po = po(:);
 643 |     qo = qo + qx(i)*po;
 644 |     G  = G  + qx(i)*po'*nat_log(po);
 645 | end
 646 | 
 647 | % subtract entropy of expectations: i.e., E[lnQ(o)]
 648 | G  = G - qo'*nat_log(qo);
 649 |     
 650 | end 
 651 | 
 652 | %--------------------------------------------------------------------------
 653 | function A  = spm_wnorm(A)
 654 | % This uses the bsxfun function to subtract the inverse of each column
 655 | % entry from the inverse of the sum of the columns and then divide by 2.
 656 | % 
 657 | A   = A + exp(-16);
 658 | A   = bsxfun(@minus,1./sum(A,1),1./A)/2;
 659 | end 
 660 | 
 661 | function sub = spm_ind2sub(siz,ndx)
 662 | % subscripts from linear index
 663 | % 
 664 | 
 665 | n = numel(siz);
 666 | k = [1 cumprod(siz(1:end-1))];
 667 | for i = n:-1:1
 668 |     vi       = rem(ndx - 1,k(i)) + 1;
 669 |     vj       = (ndx - vi)/k(i) + 1;
 670 |     sub(i,1) = vj;
 671 |     ndx      = vi;
 672 | end
 673 | end 
 674 | 
 675 | %--------------------------------------------------------------------------
 676 | function [Y] = spm_cross(X,x,varargin)
 677 | % Multidimensional outer product
 678 | % FORMAT [Y] = spm_cross(X,x)
 679 | % FORMAT [Y] = spm_cross(X)
 680 | %
 681 | % X  - numeric array
 682 | % x  - numeric array
 683 | %
 684 | % Y  - outer product
 685 | %
 686 | % See also: spm_dot
 687 | % Copyright (C) 2015 Wellcome Trust Centre for Neuroimaging
 688 | 
 689 | % Karl Friston
 690 | % $Id: spm_cross.m 7527 2019-02-06 19:12:56Z karl $
 691 | 
 692 | % handle single inputs
 693 | if nargin < 2
 694 |     if isnumeric(X)
 695 |         Y = X;
 696 |     else
 697 |         Y = spm_cross(X{:});
 698 |     end
 699 |     return
 700 | end
 701 | 
 702 | % handle cell arrays
 703 | 
 704 | if iscell(X), X = spm_cross(X{:}); end
 705 | if iscell(x), x = spm_cross(x{:}); end
 706 | 
 707 | % outer product of first pair of arguments (using bsxfun)
 708 | A = reshape(full(X),[size(X) ones(1,ndims(x))]);
 709 | B = reshape(full(x),[ones(1,ndims(X)) size(x)]);
 710 | Y = squeeze(bsxfun(@times,A,B));
 711 | 
 712 | % and handle remaining arguments
 713 | for i = 1:numel(varargin)
 714 |     Y = spm_cross(Y,varargin{i});
 715 | end
 716 | end 
 717 | 
 718 | %--------------------------------------------------------------------------
 719 | function [d] = spm_KL_dir(q,p)
 720 | % KL divergence between two Dirichlet distributions
 721 | % FORMAT [d] = spm_kl_dirichlet(lambda_q,lambda_p)
 722 | %
 723 | % Calculate KL(Q||P) = <log Q/P> where avg is wrt Q between two Dirichlet 
 724 | % distributions Q and P
 725 | %
 726 | % lambda_q   -   concentration parameter matrix of Q
 727 | % lambda_p   -   concentration parameter matrix of P
 728 | %
 729 | % This routine uses an efficient computation that handles arrays, matrices 
 730 | % or vectors. It returns the sum of divergences over columns.
 731 | %
 732 | % see also: spm_kl_dirichlet.m (for rwo vectors)
 733 | % Copyright (C) 2008 Wellcome Trust Centre for Neuroimaging
 734 | 
 735 | % Will Penny 
 736 | % $Id: spm_KL_dir.m 7382 2018-07-25 13:58:04Z karl $
 737 | 
 738 | %  KL divergence based on log beta functions
 739 | d = spm_betaln(p) - spm_betaln(q) - sum((p - q).*spm_psi(q + 1/32),1);
 740 | d = sum(d(:));
 741 | 
 742 | return
 743 | 
 744 | % check on KL of Dirichlet ditributions
 745 | p  = rand(6,1) + 1;
 746 | q  = rand(6,1) + p;
 747 | p0 = sum(p);
 748 | q0 = sum(q);
 749 | 
 750 | d  = q - p;
 751 | KL = spm_betaln(p) - spm_betaln(q) + d'*spm_psi(q)
 752 | kl = gammaln(q0) - sum(gammaln(q)) - gammaln(p0) + sum(gammaln(p)) + ...
 753 |     d'*(spm_psi(q) - spm_psi(q0))
 754 | end 
 755 | 
 756 | %--------------------------------------------------------------------------
 757 | function y = spm_betaln(z)
 758 | % returns the log the multivariate beta function of a vector.
 759 | % FORMAT y = spm_betaln(z)
 760 | %   y = spm_betaln(z) computes the natural logarithm of the beta function
 761 | %   for corresponding elements of the vector z. if concerned is an array,
 762 | %   the beta functions are taken over the elements of the first to mention
 763 | %   (and size(y,1) equals one).
 764 | %
 765 | %   See also BETAINC, BETA.
 766 | %   Ref: Abramowitz & Stegun, Handbook of Mathematical Functions, sec. 6.2.
 767 | %   Copyright 1984-2004 The MathWorks, Inc. 
 768 | 
 769 | % Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging
 770 | 
 771 | % Karl Friston
 772 | % $Id: spm_betaln.m 7508 2018-12-21 09:49:44Z thomas $
 773 | 
 774 | % log the multivariate beta function of a vector
 775 | if isvector(z)
 776 |     z     = z(find(z)); %#ok<FNDSB>
 777 |     y     = sum(gammaln(z)) - gammaln(sum(z));
 778 | else
 779 |     for i = 1:size(z,2)
 780 |         for j = 1:size(z,3)
 781 |             for k = 1:size(z,4)
 782 |                 for l = 1:size(z,5)
 783 |                     for m = 1:size(z,6)
 784 |                         y(1,i,j,k,l,m) = spm_betaln(z(:,i,j,k,l,m));
 785 |                     end
 786 |                 end
 787 |             end
 788 |         end
 789 |     end
 790 | end
 791 | end 
 792 | 
 793 | %--------------------------------------------------------------------------
 794 | function [A] = spm_psi(A)
 795 | % normalisation of a probability transition rate matrix (columns)
 796 | % FORMAT [A] = spm_psi(A)
 797 | %
 798 | % A  - numeric array
 799 | %
 800 | % See also: psi.m
 801 | % Copyright (C) 2015 Wellcome Trust Centre for Neuroimaging
 802 | 
 803 | % Karl Friston
 804 | % $Id: spm_psi.m 7300 2018-04-25 21:14:07Z karl $
 805 | 
 806 | % normalization of a probability transition rate matrix (columns)
 807 | A = bsxfun(@minus, psi(A), psi(sum(A,1)));
 808 | end 
 809 | 
 810 | %% Set up POMDP model structure
 811 | 
 812 | % Please note that the main tutorial script ('Step_by_Step_AI_Guide.m') has
 813 | % more thorough descriptions of how to specify this generative model and
 814 | % the other parameters that might be included. Below we only describe the
 815 | % elements used to specify this specific model. Also, unlike the main
 816 | % tutorial script which focuses on learning initial state priors (d), 
 817 | % this version also enables habits (priors over policies; e) and separation of  
 818 | % the generative process from the generative model for the likelihood function (a).
 819 | 
 820 | function MDP = explore_exploit_model(Gen_model)
 821 | 
 822 | % Number of time points or 'epochs' within a trial: T
 823 | % =========================================================================
 824 | 
 825 | % Here, we specify 3 time points (T), in which the agent 1) starts in a 'Start'
 826 | % state, 2) first moves to either a 'Hint' state or a 'Choose Left' or 'Choose
 827 | % Right' slot machine state, and 3) either moves from the Hint state to one
 828 | % of the choice states or moves from one of the choice states back to the
 829 | % Start state.
 830 | 
 831 | T = 3;
 832 | 
 833 | % Priors about initial states: D and d
 834 | % =========================================================================
 835 | 
 836 | %--------------------------------------------------------------------------
 837 | % Specify prior probabilities about initial states in the generative 
 838 | % process (D)
 839 | % Note: By default, these will also be the priors for the generative model
 840 | %--------------------------------------------------------------------------
 841 | 
 842 | % For the 'context' state factor, we can specify that the 'left better' context 
 843 | % (i.e., where the left slot machine is more likely to win) is the true context:
 844 | 
 845 | D{1} = [1 0]';  % {'left better','right better'}
 846 | 
 847 | % For the 'behavior' state factor, we can specify that the agent always
 848 | % begins a trial in the 'start' state (i.e., before choosing to either pick
 849 | % a slot machine or first ask for a hint:
 850 | 
 851 | D{2} = [1 0 0 0]'; % {'start','hint','choose-left','choose-right'}
 852 | 
 853 | %--------------------------------------------------------------------------
 854 | % Specify prior beliefs about initial states in the generative model (d)
 855 | % Note: This is optional, and will simulate learning priors over states 
 856 | % if specified.
 857 | %--------------------------------------------------------------------------
 858 | 
 859 | % Note that these are technically what are called 'Dirichlet concentration
 860 | % paramaters', which need not take on values between 0 and 1. These values
 861 | % are added to after each trial, based on posterior beliefs about initial
 862 | % states. For example, if the agent believed at the end of trial 1 that it 
 863 | % was in the 'left better' context, then d{1} on trial 2 would be 
 864 | % d{1} = [1.5 0.5]' (although how large the increase in value is after 
 865 | % each trial depends on a learning rate). In general, higher values 
 866 | % indicate more confidence in one's beliefs about initial states, and 
 867 | % entail that beliefs will change more slowly (e.g., the shape of the 
 868 | % distribution encoded by d{1} = [25 25]' will change much more slowly 
 869 | % than the shape of the distribution encoded by d{1} = [.5 0.5]' with each 
 870 | % new observation).
 871 | 
 872 | % For context beliefs, we can specify that the agent starts out believing 
 873 | % that both contexts are equally likely, but with somewhat low confidence in 
 874 | % these beliefs:
 875 | 
 876 | d{1} = [.25 .25]';  % {'left better','right better'}
 877 | 
 878 | % For behavior beliefs, we can specify that the agent expects with 
 879 | % certainty that it will begin a trial in the 'start' state:
 880 | 
 881 | d{2} = [1 0 0 0]'; % {'start','hint','choose-left','choose-right'}
 882 | 
 883 | 
 884 | % State-outcome mappings and beliefs: A and a
 885 | % =========================================================================
 886 | 
 887 | %--------------------------------------------------------------------------
 888 | % Specify the probabilities of outcomes given each state in the generative 
 889 | % process (A)
 890 | % This includes one matrix per outcome modality
 891 | % Note: By default, these will also be the beliefs in the generative model
 892 | %--------------------------------------------------------------------------
 893 | 
 894 | % First we specify the mapping from states to observed hints (outcome
 895 | % modality 1). Here, the rows correspond to observations, the columns
 896 | % correspond to the first state factor (context), and the third dimension
 897 | % corresponds to behavior. Each column is a probability distribution
 898 | % that must sum to 1.
 899 | 
 900 | % We start by specifying that both contexts generate the 'No Hint'
 901 | % observation across all behavior states:
 902 | 
 903 | Ns = [length(D{1}) length(D{2})]; % number of states in each state factor (2 and 4)
 904 | 
 905 | for i = 1:Ns(2) 
 906 | 
 907 |     A{1}(:,:,i) = [1 1; % No Hint
 908 |                    0 0; % Machine-Left Hint
 909 |                    0 0];% Machine-Right Hint
 910 | end
 911 | 
 912 | % Then we specify that the 'Get Hint' behavior state generates a hint that
 913 | % either the left or right slot machine is better, depending on the context
 914 | % state. In this case, the hints are accurate with a probability of pHA. 
 915 | 
 916 | pHA = 1; % By default we set this to 1, but try changing its value to 
 917 |           % see how it affects model behavior
 918 | 
 919 | A{1}(:,:,2) = [0     0;      % No Hint
 920 |                pHA 1-pHA;    % Machine-Left Hint
 921 |                1-pHA pHA];   % Machine-Right Hint
 922 | 
 923 | % Next we specify the mapping between states and wins/losses. The first two
 924 | % behavior states ('Start' and 'Get Hint') do not generate either win or
 925 | % loss observations in either context:
 926 | 
 927 | for i = 1:2
 928 | 
 929 |     A{2}(:,:,i) = [1 1;  % Null
 930 |                    0 0;  % Loss
 931 |                    0 0]; % Win
 932 | end
 933 |            
 934 | % Choosing the left machine (behavior state 3) generates wins with
 935 | % probability pWin, which differs depending on the context state (columns):
 936 | 
 937 | pWin = .8; % By default we set this to 1, but try changing its value to 
 938 |           % see how it affects model behavior
 939 |            
 940 | A{2}(:,:,3) = [0      0;     % Null        
 941 |                1-pWin pWin;  % Loss
 942 |                pWin 1-pWin]; % Win
 943 | 
 944 | % Choosing the right machine (behavior state 4) generates wins with
 945 | % probability pWin, with the reverse mapping to context states from 
 946 | % choosing the left machine:
 947 |            
 948 | A{2}(:,:,4) = [0      0;     % Null
 949 |                pWin 1-pWin;  % Loss
 950 |                1-pWin pWin]; % Win
 951 |            
 952 | % Finally, we specify an identity mapping between behavior states and
 953 | % observed behaviors, to ensure the agent knows that behaviors were carried
 954 | % out as planned. Here, each row corresponds to each behavior state.
 955 |            
 956 | for i = 1:Ns(2) 
 957 | 
 958 |     A{3}(i,:,i) = [1 1];
 959 | 
 960 | end
 961 | 
 962 | %--------------------------------------------------------------------------
 963 | % Specify prior beliefs about state-outcome mappings in the generative model 
 964 | % (a)
 965 | % Note: This is optional, and will simulate learning state-outcome mappings 
 966 | % if specified.
 967 | %--------------------------------------------------------------------------
 968 |            
 969 | % Similar to learning priors over initial states, this simply
 970 | % requires specifying a matrix (a) with the same structure as the
 971 | % generative process (A), but with Dirichlet concentration parameters that
 972 | % can encode beliefs (and confidence in those beliefs) that need not
 973 | % match the generative process. Learning then corresponds to
 974 | % adding to the values of matrix entries, based on what outcomes were 
 975 | % observed when the agent believed it was in a particular state. For
 976 | % example, if the agent observed a win while believing it was in the 
 977 | % 'left better' context and the 'choose left machine' behavior state,
 978 | % the corresponding probability value would increase for that location in
 979 | % the state outcome-mapping (i.e., a{2}(3,1,3) might change from .8 to
 980 | % 1.8).
 981 | 
 982 | % One simple way to set up this matrix is by:
 983 |  
 984 | % 1. initially identifying it with the generative process 
 985 | % 2. multiplying the values by a large number to prevent learning all
 986 | %    aspects of the matrix (so the shape of the distribution changes very slowly)
 987 | % 3. adjusting the elements you want to differ from the generative process.
 988 | 
 989 | % To simulate learning the hint accuracy we
 990 | % can specify:
 991 | 
 992 | a{1} = A{1}*200;
 993 | a{2} = A{2}*200;
 994 | a{3} = A{3}*200;
 995 | 
 996 | a{1}(:,:,2) =  [0     0;     % No Hint
 997 |                .25   .25;    % Machine-Left Hint
 998 |                .25   .25];   % Machine-Right Hint
 999 |     
1000 | 
1001 | % Controlled transitions and transition beliefs : B{:,:,u} and b(:,:,u)
1002 | %==========================================================================
1003 | 
1004 | %--------------------------------------------------------------------------
1005 | % Next, we have to specify the probabilistic transitions between hidden states
1006 | % under each action (sometimes called 'control states'). 
1007 | % Note: By default, these will also be the transitions beliefs 
1008 | % for the generative model
1009 | %--------------------------------------------------------------------------
1010 | 
1011 | % Columns are states at time t. Rows are states at t+1.
1012 | 
1013 | % The agent cannot control the context state, so there is only 1 'action',
1014 | % indicating that contexts remain stable within a trial:
1015 | 
1016 | B{1}(:,:,1) = [1 0;  % 'Left Better' Context
1017 |                0 1]; % 'Right Better' Context
1018 |            
1019 | % The agent can control the behavior state, and we include 4 possible 
1020 | % actions:
1021 | 
1022 | % Move to the Start state from any other state
1023 | B{2}(:,:,1) = [1 1 1 1;  % Start State
1024 |                0 0 0 0;  % Hint
1025 |                0 0 0 0;  % Choose Left Machine
1026 |                0 0 0 0]; % Choose Right Machine
1027 |            
1028 | % Move to the Hint state from any other state
1029 | B{2}(:,:,2) = [0 0 0 0;  % Start State
1030 |                1 1 1 1;  % Hint
1031 |                0 0 0 0;  % Choose Left Machine
1032 |                0 0 0 0]; % Choose Right Machine
1033 | 
1034 | % Move to the Choose Left state from any other state
1035 | B{2}(:,:,3) = [0 0 0 0;  % Start State
1036 |                0 0 0 0;  % Hint
1037 |                1 1 1 1;  % Choose Left Machine
1038 |                0 0 0 0]; % Choose Right Machine
1039 | 
1040 | % Move to the Choose Right state from any other state
1041 | B{2}(:,:,4) = [0 0 0 0;  % Start State
1042 |                0 0 0 0;  % Hint
1043 |                0 0 0 0;  % Choose Left Machine
1044 |                1 1 1 1]; % Choose Right Machine        
1045 |            
1046 | %--------------------------------------------------------------------------
1047 | % Specify prior beliefs about state transitions in the generative model
1048 | % (b). This is a set of matrices with the same structure as B.
1049 | % Note: This is optional, and will simulate learning state transitions if 
1050 | % specified.
1051 | %--------------------------------------------------------------------------
1052 |           
1053 | % For this example, we will not simulate learning transition beliefs. 
1054 | % But, similar to learning d and a, this just involves accumulating
1055 | % Dirichlet concentration parameters. Here, transition beliefs are updated
1056 | % after each trial when the agent believes it was in a given state at time
1057 | % t and and another state at t+1.
1058 | 
1059 | % Preferred outcomes: C and c
1060 | %==========================================================================
1061 | 
1062 | %--------------------------------------------------------------------------
1063 | % Next, we have to specify the 'prior preferences', encoded here as log
1064 | % probabilities. 
1065 | %--------------------------------------------------------------------------
1066 | 
1067 | % One matrix per outcome modality. Each row is an observation, and each
1068 | % columns is a time point. Negative values indicate lower preference,
1069 | % positive values indicate a high preference. Stronger preferences promote
1070 | % risky choices and reduced information-seeking.
1071 | 
1072 | % We can start by setting a 0 preference for all outcomes:
1073 | 
1074 | No = [size(A{1},1) size(A{2},1) size(A{3},1)]; % number of outcomes in 
1075 |                                                % each outcome modality
1076 | 
1077 | C{1}      = zeros(No(1),T); % Hints
1078 | C{2}      = zeros(No(2),T); % Wins/Losses
1079 | C{3}      = zeros(No(3),T); % Observed Behaviors
1080 | 
1081 | % Then we can specify a 'loss aversion' magnitude (la) at time points 2 
1082 | % and 3, and a 'reward seeking' (or 'risk-seeking') magnitude (rs). Here,
1083 | % rs is divided by 2 at the third time point to encode a smaller win ($2
1084 | % instead of $4) if taking the hint before choosing a slot machine.
1085 | 
1086 | la = 1; % By default we set this to 1, but try changing its value to 
1087 |         % see how it affects model behavior
1088 | 
1089 | rs = 4; % By default we set this to 4, but try changing its value to 
1090 |         % see how it affects model behavior
1091 | 
1092 | C{2}(:,:) =    [0  0   0   ;  % Null
1093 |                 0 -la -la  ;  % Loss
1094 |                 0  rs  rs/2]; % win
1095 |             
1096 | %--------------------------------------------------------------------------
1097 | % One can also optionally choose to simulate preference learning by
1098 | % specifying a Dirichlet distribution over preferences (c). 
1099 | %--------------------------------------------------------------------------
1100 | 
1101 | % This will not be simulated here. However, this works by increasing the
1102 | % preference magnitude for an outcome each time that outcome is observed.
1103 | % The assumption here is that preferences naturally increase for entering
1104 | % situations that are more familiar.
1105 | 
1106 | % Allowable policies: U or V. 
1107 | %==========================================================================
1108 | 
1109 | %--------------------------------------------------------------------------
1110 | % Each policy is a sequence of actions over time that the agent can 
1111 | % consider. 
1112 | %--------------------------------------------------------------------------
1113 | 
1114 | % For our simulations, we will specify V, where rows correspond to time 
1115 | % points and should be length T-1 (here, 2 transitions, from time point 1
1116 | % to time point 2, and time point 2 to time point 3):
1117 | 
1118 | NumPolicies = 5; % Number of policies
1119 | NumFactors = 2; % Number of state factors
1120 | 
1121 | V         = ones(T-1,NumPolicies,NumFactors);
1122 | 
1123 | V(:,:,1) = [1 1 1 1 1;
1124 |             1 1 1 1 1]; % Context state is not controllable
1125 | 
1126 | V(:,:,2) = [1 2 2 3 4;
1127 |             1 3 4 1 1];
1128 |         
1129 | % For V(:,:,2), columns left to right indicate policies allowing: 
1130 | % 1. staying in the start state 
1131 | % 2. taking the hint then choosing the left machine
1132 | % 3. taking the hint then choosing the right machine
1133 | % 4. choosing the left machine right away (then returning to start state)
1134 | % 5. choosing the right machine right away (then returning to start state)
1135 | 
1136 | 
1137 | % Habits: E and e. 
1138 | %==========================================================================
1139 | 
1140 | %--------------------------------------------------------------------------
1141 | % Optional: a columns vector with one entry per policy, indicating the 
1142 | % prior probability of choosing that policy (i.e., independent of other 
1143 | % beliefs). 
1144 | %--------------------------------------------------------------------------
1145 | 
1146 | % We will not equip our agent with habits with any starting habits 
1147 | % (flat distribution over policies):
1148 | 
1149 | E = [1 1 1 1 1]';
1150 | 
1151 | % To incorporate habit learning, where policies become more likely after 
1152 | % each time they are chosen, we can also specify concentration parameters
1153 | % by specifying e:
1154 | 
1155 |  e = [1 1 1 1 1]';
1156 | 
1157 | % Additional optional parameters. 
1158 | %==========================================================================
1159 | 
1160 | % Eta: learning rate (0-1) controlling the magnitude of concentration parameter
1161 | % updates after each trial (if learning is enabled).
1162 | 
1163 |      eta = 1; % Default (maximum) learning rate
1164 |      
1165 | % Omega: forgetting rate (0-1) controlling the magnitude of reduction in concentration
1166 | % parameter values after each trial (if learning is enabled). NOTE THE FORM OF FORGETTING IMPLEMENTED HERE IS MODIFIED FROM THE DESCRIPTION IN THE PUBLISHED TUTORIAL FOR IMPROVED PERFORMANCE.
1167 | 
1168 |      omega = 0; % Default value indicating there is no forgetting (values approaching 1 indicate forgetting)
1169 | 
1170 | % Beta: Expected precision of expected free energy (G) over policies (a 
1171 | % positive value, with higher values indicating lower expected precision).
1172 | % Lower values increase the influence of habits (E) and otherwise make
1173 | % policy selection less deteriministic.
1174 | 
1175 |      beta = 1; % By default this is set to 1, but try increasing its value 
1176 |                % to lower precision and see how it affects model behavior
1177 | 
1178 | % Alpha: An 'inverse temperature' or 'action precision' parameter that 
1179 | % controls how much randomness there is when selecting actions (e.g., how 
1180 | % often the agent might choose not to take the hint, even if the model 
1181 | % assigned the highest probability to that action. This is a positive 
1182 | % number, where higher values indicate less randomness. Here we set this to 
1183 | % a fairly high value:
1184 | 
1185 |     alpha = 32; % fairly low randomness in action selection
1186 | 
1187 | %% Define POMDP Structure
1188 | %==========================================================================
1189 | 
1190 | mdp.T = T;                    % Number of time steps
1191 | mdp.V = V;                    % allowable (deep) policies
1192 | 
1193 | mdp.A = A;                    % state-outcome mapping
1194 | mdp.B = B;                    % transition probabilities
1195 | mdp.C = C;                    % preferred states
1196 | mdp.D = D;                    % priors over initial states
1197 | mdp.d = d; mdp.d_0 = d;       % enable learning priors over initial states
1198 |                               % d_0 is floor value for forgetting
1199 | if Gen_model == 1
1200 |     mdp.E = E;                % prior over policies
1201 | elseif Gen_model == 2
1202 |     mdp.a = a; mdp.a_0 = a;   % enable learning state-outcome mappings and set floor value for forgetting (a_0)
1203 |     mdp.e = e; mdp.e_0 = e;   % enable learning of prior over policies and set floor value for forgetting (e_0)
1204 | end 
1205 | 
1206 | mdp.eta = eta;                % learning rate
1207 | mdp.omega = omega;            % forgetting rate
1208 | mdp.alpha = alpha;            % action precision
1209 | mdp.beta = beta;              % expected free energy precision
1210 | 
1211 | %respecify for use in inversion script (specific to this tutorial example)
1212 | mdp.NumPolicies = NumPolicies; % Number of policies
1213 | mdp.NumFactors = NumFactors; % Number of state factors
1214 |     
1215 |    
1216 | % We can add labels to states, outcomes, and actions for subsequent plotting:
1217 | 
1218 | label.factor{1}   = 'contexts';   label.name{1}    = {'left-better','right-better'};
1219 | label.factor{2}   = 'choice states';     label.name{2}    = {'start','hint','choose left','choose right'};
1220 | label.modality{1} = 'hint';    label.outcome{1} = {'null','left hint','right hint'};
1221 | label.modality{2} = 'win/lose';  label.outcome{2} = {'null','lose','win'};
1222 | label.modality{3} = 'observed action';  label.outcome{3} = {'start','hint','choose left','choose right'};
1223 | label.action{2} = {'start','hint','left','right'};
1224 | mdp.label = label;
1225 | 
1226 | MDP = mdp;
1227 | 
1228 | end
1229 | 


--------------------------------------------------------------------------------
/Step_by_Step_AI_Guide.m:
--------------------------------------------------------------------------------
   1 | %% Step by step introduction to building and using active inference models
   2 | 
   3 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
   4 | % Application to Empirical Data
   5 | 
   6 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
   7 | % UPDATED: 8/28/2024 (modified forgetting rate implementation)
   8 | 
   9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  10 | 
  11 | % First, you need to add SPM12, the DEM toolbox of SPM12 and the
  12 | % folder with the example scripts to your path in Matlab.
  13 | 
  14 | clear all
  15 | close all      % These commands clear the workspace and close any figures
  16 | 
  17 | rng('shuffle') % This sets the random number generator to produce a different 
  18 |                % random sequence each time, which leads to variability in 
  19 |                % repeated simulation results (you can alse set to 'default'
  20 |                % to produce the same random sequence each time)
  21 | 
  22 | % Simulation options after model building below:
  23 | 
  24 | % If Sim = 1, simulate single trial. This will reproduce fig. 8. (Although
  25 |             % note that, for this and the following simulations, results 
  26 |             % will vary each time due to random sampling)
  27 | 
  28 | % If Sim = 2, simulate multiple trials where the left context is active 
  29 |             % (D{1} = [1 0]'). This will reproduce fig. 10.
  30 |              
  31 | % If Sim = 3, simulate reversal learning, where the left context is active 
  32 |             % (D{1} = [1 0]') in early trials and then reverses in later 
  33 |             % trials (D{1} = [0 1]'). This will reproduce fig. 11.
  34 |             
  35 | % If Sim = 4, run parameter estimation on simulated data with reversal
  36 |             % learning. This will reproduce the top panel of fig. 17.
  37 |             
  38 | % If Sim = 5, run parameter estimation on simulated data with reversal
  39 |             % learning from multiple participants under different models
  40 |             % (i.e., different parameter values) and perform model comparison. 
  41 |             % This will reproduce the bottom panel of fig. 17. This option
  42 |             % will also save two structures that include results of model
  43 |             % comparison, model fitting, parameter recoverability analyses,
  44 |             % and inputs needed for group (PEB) analyses.
  45 |             
  46 | rs1 = 4; % Risk-seeking parameter (set to the variable rs below) 
  47 |          % To reproduce fig. 8, use values of 4 or 8 (with Sim = 1)
  48 |          % To reproduce fig. 10, use values of 3 or 4 (with Sim = 2)
  49 |          % To reproduce fig. 11, use values of 3 or 4 (with Sim = 3)
  50 |          % This will have no effect on Sim = 4 or Sim = 5
  51 | 
  52 | Sim = 2;
  53 | 
  54 | % When Sim = 5, if PEB = 1 the script will run simulated group-level
  55 | % (Parametric Empirical Bayes) analyses.
  56 | 
  57 | PEB = 0; % Note: GCM_2 and GCM_3 (the inputs to PEB; see below) are saved 
  58 |          % after running Sim = 5 to avoid needing to re-run it each time 
  59 |          % you want to use PEB (i.e., because Sim = 5 takes a long time). 
  60 |          % After running Sim = 5 once, you can simply load GCM_2 and GCM_3 and 
  61 |          % run the PEB section separately if you want to come back to it later.
  62 | 
  63 | % You can also run the sections separately after building the model by
  64 | % simply clicking into that section and clicking 'Run Section' above
  65 | 
  66 | %% 1. Set up model structure
  67 | 
  68 | % Number of time points or 'epochs' within a trial: T
  69 | % =========================================================================
  70 | 
  71 | % Here, we specify 3 time points (T), in which the agent 1) starts in a 'Start'
  72 | % state, 2) first moves to either a 'Hint' state or a 'Choose Left' or 'Choose
  73 | % Right' slot machine state, and 3) either moves from the Hint state to one
  74 | % of the choice states or moves from one of the choice states back to the
  75 | % Start state.
  76 | 
  77 | T = 3;
  78 | 
  79 | % Priors about initial states: D and d
  80 | % =========================================================================
  81 | 
  82 | %--------------------------------------------------------------------------
  83 | % Specify prior probabilities about initial states in the generative 
  84 | % process (D)
  85 | % Note: By default, these will also be the priors for the generative model
  86 | %--------------------------------------------------------------------------
  87 | 
  88 | % For the 'context' state factor, we can specify that the 'left better' context 
  89 | % (i.e., where the left slot machine is more likely to win) is the true context:
  90 | 
  91 | D{1} = [1 0]';  % {'left better','right better'}
  92 | 
  93 | % For the 'behavior' state factor, we can specify that the agent always
  94 | % begins a trial in the 'start' state (i.e., before choosing to either pick
  95 | % a slot machine or first ask for a hint:
  96 | 
  97 | D{2} = [1 0 0 0]'; % {'start','hint','choose-left','choose-right'}
  98 | 
  99 | %--------------------------------------------------------------------------
 100 | % Specify prior beliefs about initial states in the generative model (d)
 101 | % Note: This is optional, and will simulate learning priors over states 
 102 | % if specified.
 103 | %--------------------------------------------------------------------------
 104 | 
 105 | % Note that these are technically what are called 'Dirichlet concentration
 106 | % paramaters', which need not take on values between 0 and 1. These values
 107 | % are added to after each trial, based on posterior beliefs about initial
 108 | % states. For example, if the agent believed at the end of trial 1 that it 
 109 | % was in the 'left better' context, then d{1} on trial 2 would be 
 110 | % d{1} = [1.5 0.5]' (although how large the increase in value is after 
 111 | % each trial depends on a learning rate). In general, higher values 
 112 | % indicate more confidence in one's beliefs about initial states, and 
 113 | % entail that beliefs will change more slowly (e.g., the shape of the 
 114 | % distribution encoded by d{1} = [25 25]' will change much more slowly 
 115 | % than the shape of the distribution encoded by d{1} = [.5 0.5]' with each 
 116 | % new observation).
 117 | 
 118 | % For context beliefs, we can specify that the agent starts out believing 
 119 | % that both contexts are equally likely, but with somewhat low confidence in 
 120 | % these beliefs:
 121 | 
 122 | d{1} = [.25 .25]';  % {'left better','right better'}
 123 | 
 124 | % For behavior beliefs, we can specify that the agent expects with 
 125 | % certainty that it will begin a trial in the 'start' state:
 126 | 
 127 | d{2} = [1 0 0 0]'; % {'start','hint','choose-left','choose-right'}
 128 | 
 129 | 
 130 | % State-outcome mappings and beliefs: A and a
 131 | % =========================================================================
 132 | 
 133 | %--------------------------------------------------------------------------
 134 | % Specify the probabilities of outcomes given each state in the generative 
 135 | % process (A)
 136 | % This includes one matrix per outcome modality
 137 | % Note: By default, these will also be the beliefs in the generative model
 138 | %--------------------------------------------------------------------------
 139 | 
 140 | % First we specify the mapping from states to observed hints (outcome
 141 | % modality 1). Here, the rows correspond to observations, the columns
 142 | % correspond to the first state factor (context), and the third dimension
 143 | % corresponds to behavior. Each column is a probability distribution
 144 | % that must sum to 1.
 145 | 
 146 | % We start by specifying that both contexts generate the 'No Hint'
 147 | % observation across all behavior states:
 148 | 
 149 | Ns = [length(D{1}) length(D{2})]; % number of states in each state factor (2 and 4)
 150 | 
 151 | for i = 1:Ns(2) 
 152 | 
 153 |     A{1}(:,:,i) = [1 1; % No Hint
 154 |                    0 0; % Machine-Left Hint
 155 |                    0 0];% Machine-Right Hint
 156 | end
 157 | 
 158 | % Then we specify that the 'Get Hint' behavior state generates a hint that
 159 | % either the left or right slot machine is better, depending on the context
 160 | % state. In this case, the hints are accurate with a probability of pHA. 
 161 | 
 162 | pHA = 1; % By default we set this to 1, but try changing its value to 
 163 |           % see how it affects model behavior
 164 | 
 165 | A{1}(:,:,2) = [0     0;      % No Hint
 166 |                pHA 1-pHA;    % Machine-Left Hint
 167 |                1-pHA pHA];   % Machine-Right Hint
 168 | 
 169 | % Next we specify the mapping between states and wins/losses. The first two
 170 | % behavior states ('Start' and 'Get Hint') do not generate either win or
 171 | % loss observations in either context:
 172 | 
 173 | for i = 1:2
 174 | 
 175 |     A{2}(:,:,i) = [1 1;  % Null
 176 |                    0 0;  % Loss
 177 |                    0 0]; % Win
 178 | end
 179 |            
 180 | % Choosing the left machine (behavior state 3) generates wins with
 181 | % probability pWin, which differs depending on the context state (columns):
 182 | 
 183 | pWin = .8; % By default we set this to .8, but try changing its value to 
 184 |            % see how it affects model behavior
 185 |            
 186 | A{2}(:,:,3) = [0      0;     % Null        
 187 |                1-pWin pWin;  % Loss
 188 |                pWin 1-pWin]; % Win
 189 | 
 190 | % Choosing the right machine (behavior state 4) generates wins with
 191 | % probability pWin, with the reverse mapping to context states from 
 192 | % choosing the left machine:
 193 |            
 194 | A{2}(:,:,4) = [0      0;     % Null
 195 |                pWin 1-pWin;  % Loss
 196 |                1-pWin pWin]; % Win
 197 |            
 198 | % Finally, we specify an identity mapping between behavior states and
 199 | % observed behaviors, to ensure the agent knows that behaviors were carried
 200 | % out as planned. Here, each row corresponds to each behavior state.
 201 |            
 202 | for i = 1:Ns(2) 
 203 | 
 204 |     A{3}(i,:,i) = [1 1];
 205 | 
 206 | end
 207 | 
 208 | %--------------------------------------------------------------------------
 209 | % Specify prior beliefs about state-outcome mappings in the generative model 
 210 | % (a)
 211 | % Note: This is optional, and will simulate learning state-outcome mappings 
 212 | % if specified.
 213 | %--------------------------------------------------------------------------
 214 |            
 215 | % We will not simulate, learning the 'a' matrix here.  
 216 | % However, similar to learning priors over initial states, this simply
 217 | % requires specifying a matrix (a) with the same structure as the
 218 | % generative process (A), but with Dirichlet concentration parameters that
 219 | % can encode beliefs (and confidence in those beliefs) that need not
 220 | % match the generative process. Learning then corresponds to
 221 | % adding to the values of matrix entries, based on what outcomes were 
 222 | % observed when the agent believed it was in a particular state. For
 223 | % example, if the agent observed a win while believing it was in the 
 224 | % 'left better' context and the 'choose left machine' behavior state,
 225 | % the corresponding probability value would increase for that location in
 226 | % the state outcome-mapping (i.e., a{2}(3,1,3) might change from .8 to
 227 | % 1.8).
 228 | 
 229 | % One simple way to set up this matrix is by:
 230 |  
 231 | % 1. initially identifying it with the generative process 
 232 | % 2. multiplying the values by a large number to prevent learning all
 233 | %    aspects of the matrix (so the shape of the distribution changes very slowly)
 234 | % 3. adjusting the elements you want to differ from the generative process.
 235 | 
 236 | % For example, to simulate learning the reward probabilities, we could specify:
 237 |     
 238 |     % a{1} = A{1}*200;
 239 |     % a{2} = A{2}*200;
 240 |     % a{3} = A{3}*200;
 241 |     % 
 242 |     % a{2}(:,:,3) =  [0  0;  % Null        
 243 |     %                .5 .5;  % Loss
 244 |     %                .5 .5]; % Win
 245 |     % 
 246 |     % 
 247 |     % a{2}(:,:,4) =  [0  0;  % Null        
 248 |     %                .5 .5;  % Loss
 249 |     %                .5 .5]; % Win
 250 | 
 251 | % As another example, to simulate learning the hint accuracy one
 252 | % might specify:
 253 | 
 254 |     % a{1} = A{1}*200;
 255 |     % a{2} = A{2}*200;
 256 |     % a{3} = A{3}*200;
 257 |      
 258 |     % a{1}(:,:,2) =  [0     0;     % No Hint
 259 |     %                .25   .25;    % Machine-Left Hint
 260 |     %                .25   .25];   % Machine-Right Hint
 261 |     
 262 | 
 263 | % Controlled transitions and transition beliefs : B{:,:,u} and b(:,:,u)
 264 | %==========================================================================
 265 | 
 266 | %--------------------------------------------------------------------------
 267 | % Next, we have to specify the probabilistic transitions between hidden states
 268 | % under each action (sometimes called 'control states'). 
 269 | % Note: By default, these will also be the transitions beliefs 
 270 | % for the generative model
 271 | %--------------------------------------------------------------------------
 272 | 
 273 | % Columns are states at time t. Rows are states at t+1.
 274 | 
 275 | % The agent cannot control the context state, so there is only 1 'action',
 276 | % indicating that contexts remain stable within a trial:
 277 | 
 278 | B{1}(:,:,1) = [1 0;  % 'Left Better' Context
 279 |                0 1]; % 'Right Better' Context
 280 |            
 281 | % The agent can control the behavior state, and we include 4 possible 
 282 | % actions:
 283 | 
 284 | % Move to the Start state from any other state
 285 | B{2}(:,:,1) = [1 1 1 1;  % Start State
 286 |                0 0 0 0;  % Hint
 287 |                0 0 0 0;  % Choose Left Machine
 288 |                0 0 0 0]; % Choose Right Machine
 289 |            
 290 | % Move to the Hint state from any other state
 291 | B{2}(:,:,2) = [0 0 0 0;  % Start State
 292 |                1 1 1 1;  % Hint
 293 |                0 0 0 0;  % Choose Left Machine
 294 |                0 0 0 0]; % Choose Right Machine
 295 | 
 296 | % Move to the Choose Left state from any other state
 297 | B{2}(:,:,3) = [0 0 0 0;  % Start State
 298 |                0 0 0 0;  % Hint
 299 |                1 1 1 1;  % Choose Left Machine
 300 |                0 0 0 0]; % Choose Right Machine
 301 | 
 302 | % Move to the Choose Right state from any other state
 303 | B{2}(:,:,4) = [0 0 0 0;  % Start State
 304 |                0 0 0 0;  % Hint
 305 |                0 0 0 0;  % Choose Left Machine
 306 |                1 1 1 1]; % Choose Right Machine        
 307 |            
 308 | %--------------------------------------------------------------------------
 309 | % Specify prior beliefs about state transitions in the generative model
 310 | % (b). This is a set of matrices with the same structure as B.
 311 | % Note: This is optional, and will simulate learning state transitions if 
 312 | % specified.
 313 | %--------------------------------------------------------------------------
 314 |           
 315 | % For this example, we will not simulate learning transition beliefs. 
 316 | % But, similar to learning d and a, this just involves accumulating
 317 | % Dirichlet concentration parameters. Here, transition beliefs are updated
 318 | % after each trial when the agent believes it was in a given state at time
 319 | % t and and another state at t+1.
 320 | 
 321 | % Preferred outcomes: C and c
 322 | %==========================================================================
 323 | 
 324 | %--------------------------------------------------------------------------
 325 | % Next, we have to specify the 'prior preferences', encoded here as log
 326 | % probabilities. 
 327 | %--------------------------------------------------------------------------
 328 | 
 329 | % One matrix per outcome modality. Each row is an observation, and each
 330 | % columns is a time point. Negative values indicate lower preference,
 331 | % positive values indicate a high preference. Stronger preferences promote
 332 | % risky choices and reduced information-seeking.
 333 | 
 334 | % We can start by setting a 0 preference for all outcomes:
 335 | 
 336 | No = [size(A{1},1) size(A{2},1) size(A{3},1)]; % number of outcomes in 
 337 |                                                % each outcome modality
 338 | 
 339 | C{1}      = zeros(No(1),T); % Hints
 340 | C{2}      = zeros(No(2),T); % Wins/Losses
 341 | C{3}      = zeros(No(3),T); % Observed Behaviors
 342 | 
 343 | % Then we can specify a 'loss aversion' magnitude (la) at time points 2 
 344 | % and 3, and a 'reward seeking' (or 'risk-seeking') magnitude (rs). Here,
 345 | % rs is divided by 2 at the third time point to encode a smaller win ($2
 346 | % instead of $4) if taking the hint before choosing a slot machine.
 347 | 
 348 | la = 1; % By default we set this to 1, but try changing its value to 
 349 |         % see how it affects model behavior
 350 | 
 351 | rs = rs1; % We set this value at the top of the script. 
 352 |           % By default we set it to 4, but try changing its value to 
 353 |           % see how it affects model behavior (higher values will promote
 354 |           % risk-seeking, as described in the main text)
 355 | 
 356 | C{2}(:,:) =    [0  0   0   ;  % Null
 357 |                 0 -la -la  ;  % Loss
 358 |                 0  rs  rs/2]; % win
 359 |             
 360 | % Note that, expanded out, this means that the other C-matrices will be:
 361 | 
 362 | % C{1} =      [0 0 0;    % No Hint
 363 | %              0 0 0;    % Machine-Left Hint
 364 | %              0 0 0];   % Machine-Right Hint
 365 | % 
 366 | % C{3} =      [0 0 0;  % Start State
 367 | %              0 0 0;  % Hint
 368 | %              0 0 0;  % Choose Left Machine
 369 | %              0 0 0]; % Choose Right Machine
 370 | 
 371 |             
 372 | %--------------------------------------------------------------------------
 373 | % One can also optionally choose to simulate preference learning by
 374 | % specifying a Dirichlet distribution over preferences (c). 
 375 | %--------------------------------------------------------------------------
 376 | 
 377 | % This will not be simulated here. However, this works by increasing the
 378 | % preference magnitude for an outcome each time that outcome is observed.
 379 | % The assumption here is that preferences naturally increase for entering
 380 | % situations that are more familiar. To do so, you can specify starting
 381 | % concentration parameters. For example:
 382 | 
 383 | % c{1}      = zeros(No(1),T); % Hints
 384 | % c{2}      = zeros(No(2),T); % Wins/Losses
 385 | % c{3}      = zeros(No(3),T); % Observed Behaviors
 386 | % 
 387 | % c{2}(:,:) =    [1  1  1  ;  % Null
 388 | %                 1  0  0.5;  % Loss
 389 | %                 1  2  1.5]; % win
 390 | 
 391 | % NOTE: These values must be non-negative; higher values = more preferred
 392 | 
 393 | % Allowable policies: U or V. 
 394 | %==========================================================================
 395 | 
 396 | %--------------------------------------------------------------------------
 397 | % Each policy is a sequence of actions over time that the agent can 
 398 | % consider. 
 399 | %--------------------------------------------------------------------------
 400 | 
 401 | % Policies can be specified as 'shallow' (looking only one step
 402 | % ahead), as specified by U. Or policies can be specified as 'deep' 
 403 | % (planning actions all the way to the end of the trial), as specified by
 404 | % V. Both U and V must be specified for each state factor as the third
 405 | % matrix dimension. This will simply be all 1s if that state is not
 406 | % controllable.
 407 | 
 408 | % For example, specifying U could simply be:
 409 | 
 410 |     % Np = 4; % Number of policies
 411 |     % Nf = 2; % Number of state factors
 412 |     % 
 413 |     % U         = ones(1,Np,Nf);
 414 |     % 
 415 |     % U(:,:,1) = [1 1 1 1]; % Context state is not controllable
 416 |     % U(:,:,2) = [1 2 3 4]; % All four actions in B{2} are allowed
 417 | 
 418 | % For our simulations, we will specify V, where rows correspond to time 
 419 | % points and should be length T-1 (here, 2 transitions, from time point 1
 420 | % to time point 2, and time point 2 to time point 3):
 421 | 
 422 | Np = 5; % Number of policies
 423 | Nf = 2; % Number of state factors
 424 | 
 425 | V         = ones(T-1,Np,Nf);
 426 | 
 427 | V(:,:,1) = [1 1 1 1 1;
 428 |             1 1 1 1 1]; % Context state is not controllable
 429 | 
 430 | V(:,:,2) = [1 2 2 3 4;
 431 |             1 3 4 1 1];
 432 |         
 433 | % For V(:,:,2), columns left to right indicate policies allowing: 
 434 | % 1. staying in the start state 
 435 | % 2. taking the hint then choosing the left machine
 436 | % 3. taking the hint then choosing the right machine
 437 | % 4. choosing the left machine right away (then returning to start state)
 438 | % 5. choosing the right machine right away (then returning to start state)
 439 | 
 440 | 
 441 | % Habits: E and e. 
 442 | %==========================================================================
 443 | 
 444 | %--------------------------------------------------------------------------
 445 | % Optional: a columns vector with one entry per policy, indicating the 
 446 | % prior probability of choosing that policy (i.e., independent of other 
 447 | % beliefs). 
 448 | %--------------------------------------------------------------------------
 449 | 
 450 | % We will not equip our agent with habits in our example simulations, 
 451 | % but this could be specified as a follows if one wanted to include a
 452 | % strong habit to choose the 4th policy:
 453 | 
 454 | % E = [.1 .1 .1 .6 .1]';
 455 | 
 456 | % To incorporate habit learning, where policies become more likely after 
 457 | % each time they are chosen, one can also specify concentration parameters
 458 | % by specifying e. For example:
 459 | 
 460 | % e = [1 1 1 1 1]';
 461 | 
 462 | % Additional optional parameters. 
 463 | %==========================================================================
 464 | 
 465 | % Eta: learning rate (0-1) controlling the magnitude of concentration parameter
 466 | % updates after each trial (if learning is enabled).
 467 | 
 468 |     eta = 0.5; % By default we here set this to 0.5, but try changing its value  
 469 |                % to see how it affects model behavior
 470 | 
 471 | % Omega: forgetting rate (0-1) controlling the reduction in concentration parameter
 472 | % magnitudes after each trial (if learning is enabled). This controls the
 473 | % degree to which newer experience can 'over-write' what has been learned
 474 | % from older experiences. It is adaptive in environments where the true
 475 | % parameters in the generative process (priors, likelihoods, etc.) can
 476 | % change over time. A high value for omega can be seen as a prior that the
 477 | % world is volatile and that contingencies change over time.
 478 | 
 479 |   omega = 0.0; % By default we here set this to 0 (indicating no forgetting, 
 480 |                % but try changing its value to see how it affects model behavior. 
 481 |                % Values approaching 1 indicate greater rates of forgetting.
 482 |                % NOTE: Trial 1 concentration parameter values are set as
 483 |                % floor values (forgetting cannot reduce counts below those
 484 |                % values - THIS IS MODIFIED FROM THE PUBLISHED TUTORIAL VERSION 
 485 |                % SO THAT CONCENTRATION PARAMETERS ABOVE THE FLOOR VALUE 
 486 |                % ARE MULTIPLIED BY 1-OMEGA)
 487 |                
 488 | % Beta: Expected precision of expected free energy (G) over policies (a 
 489 | % positive value, with higher values indicating lower expected precision).
 490 | % Lower values increase the influence of habits (E) and otherwise make
 491 | % policy selection less deteriministic. For our example simulations we will
 492 | % simply set this to its default value of 1:
 493 | 
 494 |      beta = 1; % By default this is set to 1, but try increasing its value 
 495 |                % to lower precision and see how it affects model behavior
 496 | 
 497 | % Alpha: An 'inverse temperature' or 'action precision' parameter that 
 498 | % controls how much randomness there is when selecting actions (e.g., how 
 499 | % often the agent might choose not to take the hint, even if the model 
 500 | % assigned the highest probability to that action. This is a positive 
 501 | % number, where higher values indicate less randomness. Here we set this to 
 502 | % a high value:
 503 | 
 504 |     alpha = 32;  % Any positive number. 1 is very low, 32 is fairly high; 
 505 |                  % an extremely high value can be used to specify
 506 |                  % deterministic action (e.g., 512)
 507 | 
 508 | % ERP: This parameter controls the degree of belief resetting at each 
 509 | % time point in a trial when simulating neural responses. A value of 1
 510 | % indicates no resetting, in which priors smoothly carry over. Higher
 511 | % values indicate degree of loss in prior confidence at each time step.
 512 | 
 513 |     erp = 1; % By default we here set this to 1, but try increasing its value  
 514 |              % to see how it affects simulated neural (and behavioral) responses
 515 |                           
 516 | % tau: Time constant for evidence accumulation. This parameter controls the
 517 | % magnitude of updates at each iteration of gradient descent. Larger values 
 518 | % of tau will lead to smaller updates and slower convergence time, 
 519 | % but will also promote greater stability in posterior beliefs. 
 520 | 
 521 |     tau = 12; % Here we set this to 12 to simulate smooth physiological responses,   
 522 |               % but try adjusting its value to see how it affects simulated
 523 |               % neural (and behavioral) responses
 524 |               
 525 | % Note: If these values are left unspecified, they are assigned default
 526 | % values when running simulations. These default values can be found within
 527 | % the spm_MDP_VB_X script (and in the spm_MDP_VB_X_tutorial script we
 528 | % provide in this tutorial).
 529 | 
 530 | % Other optional constants. 
 531 | %==========================================================================
 532 | 
 533 | % Chi: Occam's window parameter for the update threshold in deep temporal 
 534 | % models. In hierarchical models, this parameter controls how quickly
 535 | % convergence is 'cut off' during lower-level evidence accumulation. 
 536 | % specifically, it sets an uncertainty threshold, below which no additional 
 537 | % trial epochs are simulated. By default, this is set to 1/64. Smaller 
 538 | % numbers (e.g., 1/128) indicate lower uncertainty (greater confidence) is
 539 | % required before which the number of trial epochs are shortened.
 540 | 
 541 | % zeta: Occam's window for policies. This parameter controls the threshold
 542 | % at which a policy ceases to be considered if its free energy
 543 | % becomes too high (i.e., when it becomes too implausible to consider
 544 | % further relative to other policies). It is set to default at a value of 
 545 | % 3. Higher values indicate a higher threshold. For example, a value of 6
 546 | % would indicate that a greater difference between a given policy and the
 547 | % best policy before that policy was 'pruned' (i.e., ceased to be
 548 | % considered). Policies will therefore be removed more quickly with smaller
 549 | % zeta values.
 550 |          
 551 | % Note: The spm_MDP_VB_X function is also equipped with broader functionality
 552 | % allowing incorporation of mixed (discrete and continuous) models,
 553 | % plotting, simulating Bayesian model reduction during simulated
 554 | % rest/sleep, among others. We do not describe these in detail here, but
 555 | % are described in the documentation at the top of the function.
 556 | 
 557 | % True states and outcomes: s and o. 
 558 | %==========================================================================
 559 | 
 560 | %--------------------------------------------------------------------------
 561 | % Optionally, one can also specify true states and outcomes for some or all
 562 | % time points with s and o. If not specified, these will be 
 563 | % generated by the generative process. 
 564 | %--------------------------------------------------------------------------
 565 | 
 566 | % For example, this means the true states at time point 1 are left context 
 567 | % and start state:
 568 | 
 569 |     %      s = [1;
 570 |     %           1]; % the later time points (rows for each state factor) are 0s,
 571 |     %               % indicating not specified.
 572 |       
 573 | 
 574 | % And this means the observations at time point 1 are the No Hint, Null,
 575 | % and Start behavior observations.
 576 | 
 577 |     %      o = [1;
 578 |     %           1;
 579 |     %           1]; % the later time points (rows for each outcome modality) are 
 580 |     %               % 0s, indicating not specified
 581 |  
 582 | %% 2. Define MDP Structure
 583 | %==========================================================================
 584 | %==========================================================================
 585 | 
 586 | mdp.T = T;                    % Number of time steps
 587 | mdp.V = V;                    % allowable (deep) policies
 588 | 
 589 |     %mdp.U = U;                   % We could have instead used shallow 
 590 |                                   % policies (specifying U instead of V).
 591 | 
 592 | mdp.A = A;                    % state-outcome mapping
 593 | mdp.B = B;                    % transition probabilities
 594 | mdp.C = C;                    % preferred states
 595 | mdp.D = D;                    % priors over initial states
 596 | 
 597 | mdp.d = d; mdp.d_0 = mdp.d;   % enable learning priors over initial states
 598 |                               %     and set lower bound on concentration paramaters (d_0)
 599 | mdp.eta = eta;                % learning rate
 600 | mdp.omega = omega;            % forgetting rate
 601 | mdp.alpha = alpha;            % action precision
 602 | mdp.beta = beta;              % expected precision of expected free energy over policies
 603 | mdp.erp = erp;                % degree of belief resetting at each timestep
 604 | mdp.tau = tau;                % time constant for evidence accumulation
 605 | 
 606 | % Note, here we are not including habits:
 607 | 
 608 |     % mdp.E = E;
 609 | 
 610 | % or learning other parameters:
 611 |     % mdp.a = a;  mdp.a_0 = mdp.a;                  
 612 |     % mdp.b = b;  mdp.b_0 = mdp.b;
 613 |     % mdp.c = c;  mdp.c_0 = mdp.c; clear mdp.C = C;
 614 |     % mdp.e = e;  mdp.e_0 = mdp.e;        
 615 | 
 616 | % or specifying true states or outcomes:
 617 | 
 618 |     % mdp.s = s;
 619 |     % mdp.o = o;
 620 |     
 621 | % or specifying other optional parameters (described above):
 622 | 
 623 |     % mdp.chi = chi;    % confidence threshold for ceasing evidence
 624 |                         % accumulation in lower levels of hierarchical models
 625 |     % mdp.zeta = zeta;  % occams window for ceasing to consider implausible
 626 |                         % policies
 627 |       
 628 | % We can add labels to states, outcomes, and actions for subsequent plotting:
 629 | 
 630 | label.factor{1}   = 'contexts';   label.name{1}    = {'left-better','right-better'};
 631 | label.factor{2}   = 'choice states';     label.name{2}    = {'start','hint','choose left','choose right'};
 632 | label.modality{1} = 'hint';    label.outcome{1} = {'null','left hint','right hint'};
 633 | label.modality{2} = 'win/lose';  label.outcome{2} = {'null','lose','win'};
 634 | label.modality{3} = 'observed action';  label.outcome{3} = {'start','hint','choose left','choose right'};
 635 | label.action{2} = {'start','hint','left','right'};
 636 | mdp.label = label;
 637 | 
 638 | clear beta
 639 | clear alpha
 640 | clear eta
 641 | clear omega
 642 | clear la
 643 | clear rs % We clear these so we can re-specify them in later simulations
 644 | 
 645 | %--------------------------------------------------------------------------
 646 | % Use a script to check if all matrix-dimensions are correct:
 647 | %--------------------------------------------------------------------------
 648 | mdp = spm_MDP_check(mdp);
 649 | 
 650 | 
 651 | if Sim ==1
 652 | %% 3. Single trial simulations
 653 |  
 654 | %--------------------------------------------------------------------------
 655 | % Now that the generative process and model have been generated, we can
 656 | % simulate a single trial using the spm_MDP_VB_X script. Here, we provide 
 657 | % a version specific to this tutorial - spm_MDP_VB_X_tutorial - that adds 
 658 | % the learning rate (eta) for initial state priors (d), and adds forgetting rate (omega), 
 659 | % which are not included in the current SPM version (as of 05/08/21).
 660 | %--------------------------------------------------------------------------
 661 | 
 662 | MDP = spm_MDP_VB_X_tutorial(mdp);
 663 | 
 664 | % We can then use standard plotting routines to visualize simulated neural 
 665 | % responses
 666 | 
 667 | spm_figure('GetWin','Figure 1'); clf    % display behavior
 668 | spm_MDP_VB_LFP(MDP); 
 669 | 
 670 | %  and to show posterior beliefs and behavior:
 671 | 
 672 | spm_figure('GetWin','Figure 2'); clf    % display behavior
 673 | spm_MDP_VB_trial(MDP); 
 674 | 
 675 | % Please see the main text for figure interpretations
 676 | 
 677 | elseif Sim == 2
 678 | %% 4. Multi-trial simulations
 679 | 
 680 | % Next, we can expand the mdp structure to include multiple trials
 681 | 
 682 | N = 30; % number of trials
 683 | 
 684 | MDP = mdp;
 685 | 
 686 | [MDP(1:N)] = deal(MDP);
 687 | 
 688 | MDP = spm_MDP_VB_X_tutorial(MDP);
 689 | 
 690 | % We can again visualize simulated neural responses
 691 | 
 692 | spm_figure('GetWin','Figure 3'); clf    % display behavior
 693 | spm_MDP_VB_game_tutorial(MDP); 
 694 | 
 695 | elseif Sim == 3
 696 | %% 5. Simulate reversal learning
 697 | 
 698 | N = 32; % number of trials (must be multiple of 8)
 699 | 
 700 | MDP = mdp;
 701 | 
 702 | [MDP(1:N)] = deal(MDP);
 703 | 
 704 |     for i = 1:N/8
 705 |         MDP(i).D{1}   = [1 0]'; % Start in the 'left-better' context for 
 706 |                                 % early trials
 707 |     end
 708 | 
 709 |     for i = (N/8)+1:N
 710 |         MDP(i).D{1}   = [0 1]'; % Switch to 'right-better' context for 
 711 |                                 % the remainder of the trials
 712 |     end
 713 |     
 714 | MDP = spm_MDP_VB_X_tutorial(MDP);
 715 | 
 716 | % We can again visualize simulated neural responses
 717 | 
 718 | spm_figure('GetWin','Figure 4'); clf    % display behavior
 719 | spm_MDP_VB_game_tutorial(MDP); 
 720 | 
 721 | elseif Sim == 4
 722 | %% 6. Model inversion to recover parameters (action precision and risk-seeking)
 723 | %==========================================================================
 724 | %==========================================================================
 725 | 
 726 | close all
 727 | 
 728 | % Generate simulated behavior under specific parameter values:
 729 | %==========================================================================
 730 | 
 731 | % We will again use the reversal learning version
 732 | 
 733 | N = 32; % number of trials
 734 | 
 735 | MDP = mdp;
 736 | 
 737 | [MDP(1:N)] = deal(MDP);
 738 | 
 739 |     for i = 1:N/8
 740 |         MDP(i).D{1}   = [1 0]'; % Start in the 'left-better' context for 
 741 |                                 % early trials
 742 |     end
 743 | 
 744 |     for i = (N/8)+1:N
 745 |         MDP(i).D{1}   = [0 1]'; % Switch to 'right-better' context for 
 746 |                                 % the remainder of the trials
 747 |     end
 748 |     
 749 | %==========================================================================
 750 | % true parameter values (to try to recover during estimation):
 751 | %==========================================================================
 752 | 
 753 | alpha = 4; % specify a lower action precision (4) than the prior value (16)
 754 | la = 1;    % keep loss aversion at a value of 1
 755 | rs = 6;    % specify higher risk-seeking (6) than the prior value (5)
 756 | 
 757 | C_fit = [0  0   0 ;    % Null
 758 |          0 -la -la  ;  % Loss
 759 |          0  rs  rs/2]; % Win
 760 | 
 761 | [MDP(1:N).alpha] = deal(alpha); 
 762 | 
 763 | for i = 1:N
 764 |     MDP(i).C{2} = C_fit; 
 765 | end
 766 |                            
 767 |                             
 768 | % If you wanted, you could also adjust the true value for other
 769 | % parameters in the same manner. For example:
 770 | 
 771 |     % beta = 5; % specify a lower expected policy precision (5) than the prior value (1)
 772 |     % [MDP(1:N).beta] = deal(beta); 
 773 | 
 774 |     % eta = .9; % specify a higher learning rate (.9) than the prior value (.5)
 775 |     % [MDP(1:N).eta] = deal(eta); 
 776 |     
 777 | %==========================================================================
 778 | 
 779 | MDP = spm_MDP_VB_X_tutorial(MDP);
 780 | 
 781 | 
 782 | % Invert model and try to recover original parameters:
 783 | %==========================================================================
 784 | 
 785 | %--------------------------------------------------------------------------
 786 | % This is where we do model inversion. Model inversion is based on variational
 787 | % Bayes. Here we will maximize (negative) variational free energy with
 788 | % respect to the free parameters (here: alpha and rs). This corresponds to 
 789 | % maximising the likelihood of the data under these parameters (i.e., maximizing
 790 | % accuracy) and at the same time penalizing for strong deviations from the
 791 | % priors over the parameters (i.e., minimizing complexity), which prevents
 792 | % overfitting.
 793 | % 
 794 | % You can specify the prior mean and variance of each parameter at the
 795 | % beginning of the Estimate_parameters script.
 796 | %--------------------------------------------------------------------------
 797 | mdp.la_true = la;   % Carries over true la value for use in estimation script
 798 | mdp.rs_true = rs;   % Carries over true rs value for use in estimation script
 799 | 
 800 | DCM.MDP   = mdp;                  % MDP model that will be estimated
 801 | DCM.field = {'alpha','rs'};       % parameter (field) names to optimise
 802 | 
 803 | % Note: If you wanted to fit other parameters, you can simply add their
 804 | % field names, such as:
 805 | 
 806 |  % DCM.field = {'alpha','rs','eta'}; % if you wanted to fit learning rate
 807 |  
 808 | % This requires that those parameters are also included in the possible
 809 | % parameters specified in the Estimate_parameters script.
 810 | 
 811 | % Next we add the true observations and actions of a (simulated)
 812 | % participant
 813 | 
 814 | DCM.U     = {MDP.o};              % include the observations made by (real 
 815 |                                   % or simulated) participants
 816 |                                   
 817 | DCM.Y     = {MDP.u};              % include the actions made by (real or 
 818 |                                   % simulated) participants
 819 |  
 820 | DCM       = Estimate_parameters(DCM); % Run the parameter estimation function
 821 |  
 822 | subplot(2,2,3)
 823 | xticklabels(DCM.field),xlabel('Parameter')
 824 | subplot(2,2,4)
 825 | xticklabels(DCM.field),xlabel('Parameter')
 826 |  
 827 | % Check deviation of prior and posterior means & posterior covariance
 828 | %==========================================================================
 829 | 
 830 | %--------------------------------------------------------------------------
 831 | % re-transform values and compare prior with posterior estimates
 832 | %--------------------------------------------------------------------------
 833 | 
 834 | field = fieldnames(DCM.M.pE);
 835 | for i = 1:length(field)
 836 |     if strcmp(field{i},'eta')
 837 |         prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
 838 |         posterior(i) = 1/(1+exp(-DCM.Ep.(field{i}))); 
 839 |     elseif strcmp(field{i},'omega')
 840 |         prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
 841 |         posterior(i) = 1/(1+exp(-DCM.Ep.(field{i})));
 842 |     else
 843 |         prior(i) = exp(DCM.M.pE.(field{i}));
 844 |         posterior(i) = exp(DCM.Ep.(field{i}));
 845 |     end
 846 | end
 847 | 
 848 | figure, set(gcf,'color','white')
 849 | subplot(2,1,1),hold on
 850 | title('Means')
 851 | bar(prior,'FaceColor',[.5,.5,.5]),bar(posterior,0.5,'k')
 852 | xlim([0,length(prior)+1]),set(gca, 'XTick', 1:length(prior)),set(gca, 'XTickLabel', DCM.field)
 853 | legend({'Prior','Posterior'})
 854 | hold off
 855 | subplot(2,1,2)
 856 | imagesc(DCM.Cp),caxis([0 1]),colorbar
 857 | title('(Co-)variance')
 858 | set(gca, 'XTick', 1:length(prior)),set(gca, 'XTickLabel', DCM.field)
 859 | set(gca, 'YTick', 1:length(prior)),set(gca, 'YTickLabel', DCM.field)
 860 |  
 861 | % To show evidence of recoverability, you may want to estimate parameters
 862 | % from simulated data generated by a range of parameters, and then check
 863 | % the strengt of the correlation between the true parameters and estimated
 864 | % parameters to make sure there is a reasonably strong relationship. We try
 865 | % this at the end of section 7.
 866 | 
 867 | elseif Sim == 5
 868 | %% 7. Model comparison
 869 | %==========================================================================
 870 | %==========================================================================
 871 |  
 872 | % Now we will simulate data for 6 participants and fit them to two models:
 873 | % One which only fits action precision (alpha) and risk-seeking (rs), and 
 874 | % another that also fits learning rate (eta).
 875 | 
 876 | % Create vectors/matrices that will store results
 877 | 
 878 | F_2_params = [];
 879 | F_3_params = [];
 880 | 
 881 | avg_LL_2_params = [];
 882 | avg_prob_2_params = [];
 883 | avg_LL_3_params = [];
 884 | avg_prob_3_params = [];
 885 | 
 886 | GCM_2 = {};
 887 | GCM_3 = {};
 888 | 
 889 | Sim_params_2 = [];
 890 | true_params_2 = [];
 891 | Sim_params_3 = [];
 892 | true_params_3 = [];
 893 | 
 894 | % Set up reversal learning trials like before
 895 | 
 896 | N = 32; % number of trials
 897 | 
 898 | MDP = mdp;
 899 | 
 900 | [MDP(1:N)] = deal(MDP);
 901 | 
 902 |     for i = 1:N/8
 903 |         MDP(i).D{1}   = [1 0]'; % Start in the 'left-better' context for 
 904 |                                 % early trials
 905 |     end
 906 | 
 907 |     for i = (N/8)+1:N
 908 |         MDP(i).D{1}   = [0 1]'; % Switch to 'right-better' context for 
 909 |                                 % the remainder of the trials
 910 |     end
 911 | 
 912 | % Generate free energies for model fits for 2 parameter model (without eta)
 913 | 
 914 | rs_sequence = [4 6];   % specify different true risk-seeking values (prior = 5)
 915 | alpha_sequence = [4 16 24]; % specify different true action precisions (prior = 16)
 916 |      
 917 | 
 918 | for rs = rs_sequence  % specify different true risk-seeking values (prior = 5)
 919 |     for alpha = alpha_sequence   % specify different true action precisions (prior = 16) 
 920 |         
 921 |         
 922 | MDP_temp = MDP;
 923 |         
 924 | la = 1;   % keep loss aversion at a value of 1
 925 | 
 926 | C_fit = [0  0   0 ;    % Null
 927 |          0 -la -la  ;  % Loss
 928 |          0  rs  rs/2]; % Win
 929 | 
 930 | [MDP_temp(1:N).alpha] = deal(alpha); 
 931 | 
 932 | for i = 1:N
 933 |     MDP_temp(i).C{2} = C_fit; 
 934 | end
 935 | 
 936 | mdp.la_true = la;   % Carries over true la value for use in estimation script
 937 | mdp.rs_true = rs;   % Carries over true rs value for use in estimation script
 938 | 
 939 | MDP_temp = spm_MDP_VB_X_tutorial(MDP_temp);
 940 | 
 941 | spm_figure('GetWin','Figure 5'); clf    % display behavior to fit
 942 | spm_MDP_VB_game_tutorial(MDP_temp); 
 943 | 
 944 | DCM.MDP   = mdp;                  % MDP model that will be estimated
 945 | DCM.field = {'alpha','rs'};       % parameter (field) names to optimise
 946 | 
 947 | DCM.U     = {MDP_temp.o};              % include the observations made by (real 
 948 |                                   % or simulated) participants
 949 |                                   
 950 | DCM.Y     = {MDP_temp.u};              % include the actions made by (real or 
 951 |                                   % simulated) participants
 952 |  
 953 | DCM       = Estimate_parameters(DCM); % Run the parameter estimation function
 954 | 
 955 | % Convert parameters back out of log- or logit-space
 956 | 
 957 | field = fieldnames(DCM.M.pE);
 958 | for i = 1:length(field)
 959 |     if strcmp(field{i},'eta')
 960 |         DCM.prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
 961 |         DCM.posterior(i) = 1/(1+exp(-DCM.Ep.(field{i})));
 962 |     elseif strcmp(field{i},'omega')
 963 |         DCM.prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
 964 |         DCM.posterior(i) = 1/(1+exp(-DCM.Ep.(field{i})));
 965 |     else
 966 |         DCM.prior(i) = exp(DCM.M.pE.(field{i}));
 967 |         DCM.posterior(i) = exp(DCM.Ep.(field{i}));
 968 |     end
 969 | end
 970 | 
 971 | F_2_params = [F_2_params DCM.F];% Get free energies for each participant's model
 972 | 
 973 | GCM_2   = [GCM_2;{DCM}]; % Save DCM for each participant
 974 | 
 975 | % Get Log-likelihood and action probabilities for best-fit model
 976 | 
 977 | MDP_best = MDP;
 978 | 
 979 | [MDP_best(1:N).alpha] = deal(DCM.posterior(1)); 
 980 | 
 981 | C_fit_best = [0  0   0 ;                                % Null
 982 |               0 -la -la  ;                              % Loss
 983 |               0  DCM.posterior(2)  DCM.posterior(2)/2]; % Win
 984 | 
 985 | for i = 1:N
 986 |     MDP_best(i).C{2} = C_fit_best; 
 987 | end
 988 | 
 989 | for i = 1:N
 990 |     MDP_best(i).o = MDP_temp(i).o; 
 991 | end
 992 | 
 993 | for i = 1:N
 994 |     MDP_best(i).u = MDP_temp(i).u; 
 995 | end
 996 | 
 997 | MDP_best   = spm_MDP_VB_X_tutorial(MDP_best); % run model with best parameter values
 998 | 
 999 | % Get sum of log-likelihoods for each action across trials
1000 | 
1001 | L     = 0; % start (log) probability of actions given the model at 0
1002 | total_prob = 0;
1003 | 
1004 | for i = 1:numel(MDP_best) % Get probability of true actions for each trial
1005 |     for j = 1:numel(MDP_best(1).u(2,:)) % Only get probability of the second (controllable) state factor
1006 |         
1007 |         L = L + log(MDP_best(i).P(:,MDP_best(i).u(2,j),j)+ eps); % sum the (log) probabilities of each action
1008 |                                                                  % given a set of possible parameter values
1009 |         total_prob = total_prob + MDP_best(i).P(:,MDP_best(i).u(2,j),j); % sum the (log) probabilities of each action
1010 |                                                                      % given a set of possible parameter values
1011 | 
1012 |     end
1013 | end 
1014 | 
1015 | % Get the average log-likelihood for each participant and average action
1016 | % probability of each participant under best-fit parameters
1017 | 
1018 | avg_LL_2 = L/(size(MDP_best,2)*2);
1019 | 
1020 | avg_LL_2_params = [avg_LL_2_params; avg_LL_2];
1021 | 
1022 | avg_prob_2 = total_prob/(size(MDP_best,2)*2);
1023 | 
1024 | avg_prob_2_params = [avg_prob_2_params; avg_prob_2];
1025 | 
1026 | % Store true and estimated parameters to assess recoverability
1027 | 
1028 | Sim_params_2 = [Sim_params_2; DCM.posterior];% Get posteriors
1029 | true_params_2 = [true_params_2; [alpha rs]];% Get true params
1030 | 
1031 | clear DCM
1032 | clear MDP_temp
1033 | clear MDP_best
1034 | 
1035 |     end
1036 | end
1037 | 
1038 | % Separately store true and simulated parameters
1039 | 
1040 | True_alpha_2 = true_params_2(:,1);
1041 | Estimated_alpha_2 = Sim_params_2(:,1);  
1042 | True_rs_2 = true_params_2(:,2);
1043 | Estimated_rs_2 = Sim_params_2(:,2); 
1044 | 
1045 | % Generate free energies for model fits for 3 parameter model (with eta)
1046 | 
1047 | for rs = rs_sequence  % specify different true risk-seeking values (prior = 2)
1048 |     for alpha = alpha_sequence   % specify different true action precisions (prior = 16) 
1049 |         
1050 | MDP_temp = MDP;
1051 |         
1052 | la = 1;   % keep loss aversion at a value of 1
1053 | 
1054 | if rs == rs_sequence(1,1)
1055 |     eta = .2; % set lower value of eta than the estimation prior (.5) for 3 participants
1056 | elseif rs == rs_sequence(1,2)
1057 |     eta = .8; % set higher value of eta than the estimation prior (.5) for 3 participants
1058 | end
1059 | 
1060 | 
1061 | C_fit = [0  0   0 ;    % Null
1062 |          0 -la -la  ;  % Loss
1063 |          0  rs  rs/2]; % Win
1064 | 
1065 | [MDP_temp(1:N).alpha] = deal(alpha);
1066 | [MDP_temp(1:N).eta] = deal(eta);
1067 | 
1068 | for i = 1:N
1069 |     MDP_temp(i).C{2} = C_fit; 
1070 | end
1071 | 
1072 | mdp.la_true = la;   % Carries over true la value for use in estimation script
1073 | mdp.rs_true = rs;   % Carries over true rs value for use in estimation script
1074 | 
1075 | MDP_temp = spm_MDP_VB_X_tutorial(MDP_temp);
1076 | 
1077 | spm_figure('GetWin','Figure 6'); clf    % display behavior to fit
1078 | spm_MDP_VB_game_tutorial(MDP_temp); 
1079 | 
1080 | DCM.MDP   = mdp;                  % MDP model that will be estimated
1081 | DCM.field = {'alpha','rs','eta'}; % parameter (field) names to optimise
1082 | 
1083 | DCM.U     = {MDP_temp.o};              % include the observations made by (real 
1084 |                                   % or simulated) participants
1085 |                                   
1086 | DCM.Y     = {MDP_temp.u};              % include the actions made by (real or 
1087 |                                   % simulated) participants
1088 |  
1089 | DCM       = Estimate_parameters(DCM); % Run the parameter estimation function
1090 | 
1091 | % Convert parameters back out of log- or logit-space
1092 | 
1093 | field = fieldnames(DCM.M.pE);
1094 | for i = 1:length(field)
1095 |     if strcmp(field{i},'eta')
1096 |         DCM.prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
1097 |         DCM.posterior(i) = 1/(1+exp(-DCM.Ep.(field{i})));
1098 |     elseif strcmp(field{i},'omega')
1099 |         DCM.prior(i) = 1/(1+exp(-DCM.M.pE.(field{i})));
1100 |         DCM.posterior(i) = 1/(1+exp(-DCM.Ep.(field{i})));
1101 |     else
1102 |         DCM.prior(i) = exp(DCM.M.pE.(field{i}));
1103 |         DCM.posterior(i) = exp(DCM.Ep.(field{i}));
1104 |     end
1105 | end
1106 | 
1107 | 
1108 | F_3_params = [F_3_params DCM.F]; % Get free energies for each participant's model
1109 | 
1110 | GCM_3   = [GCM_3;{DCM}]; % Save DCM for each participant
1111 | 
1112 | % Get Log-likelihood and action probabilities for best-fit model
1113 | 
1114 | MDP_best = MDP;
1115 | 
1116 | [MDP_best(1:N).alpha] = deal(DCM.posterior(1)); 
1117 | 
1118 | C_fit_best = [0  0   0 ;                                % Null
1119 |               0 -la -la  ;                              % Loss
1120 |               0  DCM.posterior(2)  DCM.posterior(2)/2]; % Win
1121 | 
1122 | for i = 1:N
1123 |     MDP_best(i).C{2} = C_fit_best; 
1124 | end
1125 | 
1126 | if rs == rs_sequence(1,1)
1127 |     eta = .2; % set lower value of eta than the estimation prior (.5) for 3 participants
1128 | elseif rs == rs_sequence(1,2)
1129 |     eta = .8; % set higher value of eta than the estimation prior (.5) for 3 participants
1130 | end
1131 | 
1132 | [MDP_best(1:N).eta] = deal(eta);
1133 | 
1134 | 
1135 | for i = 1:N
1136 |     MDP_best(i).o = MDP_temp(i).o; 
1137 | end
1138 | 
1139 | for i = 1:N
1140 |     MDP_best(i).u = MDP_temp(i).u; 
1141 | end
1142 | 
1143 | MDP_best   = spm_MDP_VB_X_tutorial(MDP_best); % run model with best parameter values
1144 | 
1145 | % Get sum of log-likelihoods for each action across trials
1146 | 
1147 | L     = 0; % start (log) probability of actions given the model at 0
1148 | total_prob = 0;
1149 | 
1150 | for i = 1:numel(MDP_best) % Get probability of true actions for each trial
1151 |     for j = 1:numel(MDP_best(1).u(2,:)) % Only get probability of the second (controllable) state factor
1152 |         
1153 |         L = L + log(MDP_best(i).P(:,MDP_best(i).u(2,j),j)+ eps); % sum the (log) probabilities of each action
1154 |                                                                  % given a set of possible parameter values
1155 |         total_prob = total_prob + MDP_best(i).P(:,MDP_best(i).u(2,j),j); % sum the (log) probabilities of each action
1156 |                                                                      % given a set of possible parameter values
1157 | 
1158 |     end
1159 | end 
1160 | 
1161 | % Get the average log-likelihood for each participant and average action
1162 | % probability of each participant under best-fit parameters
1163 | 
1164 | avg_LL_3 = L/(size(MDP_best,2)*2);
1165 | 
1166 | avg_LL_3_params = [avg_LL_3_params; avg_LL_3];
1167 | 
1168 | avg_prob_3 = total_prob/(size(MDP_best,2)*2);
1169 | 
1170 | avg_prob_3_params = [avg_prob_3_params; avg_prob_3];
1171 | 
1172 | % Store true and estimated parameters to assess recoverability
1173 | 
1174 | Sim_params_3 = [Sim_params_3; DCM.posterior];% Get posteriors
1175 | true_params_3 = [true_params_3; [alpha rs eta]];% Get true params
1176 | 
1177 | clear DCM
1178 | clear MDP_temp
1179 | clear MDP_best
1180 | 
1181 |     end
1182 | end
1183 | 
1184 | % Separately store true and simulated parameters
1185 | 
1186 | True_alpha_3 = true_params_3(:,1);
1187 | Estimated_alpha_3 = Sim_params_3(:,1);  
1188 | True_rs_3 = true_params_3(:,2);
1189 | Estimated_rs_3 = Sim_params_3(:,2); 
1190 | True_eta_3 = true_params_3(:,3);
1191 | Estimated_eta_3 = Sim_params_3(:,3); 
1192 | 
1193 | clear alpha
1194 | 
1195 | % Random Effects Bayesian Model Comparison (of Free Energies of best-fit 
1196 | % models per participant):
1197 | 
1198 | F_2_params = F_2_params';
1199 | F_3_params = F_3_params'; % Convert free energies to column vectors
1200 | 
1201 | [alpha,exp_r,xp,pxp,bor] = spm_BMS([F_2_params F_3_params]);
1202 | 
1203 | disp(' ');
1204 | disp(' ');
1205 | disp('Protected exceedance probability (pxp):');
1206 | disp(pxp);
1207 | disp(' ');
1208 | 
1209 | % The pxp value is the protected exceedance probability (pxp), which will 
1210 | % provide a probability of each model being the best-fit model. For example, 
1211 | % pxp = [.37 .63] would indicate a higher probability of the 3-parameter model 
1212 | 
1213 | %--------------------------------------------------------------------------
1214 | 
1215 | % We can also calculate the average probability and log-likelihood (LL) of the 
1216 | % actions under the 2- and 3-parameter models:
1217 | 
1218 | average_LL_2p = mean(avg_LL_2_params);
1219 | average_action_probability_2p = mean(avg_prob_2_params);
1220 | average_LL_3p = mean(avg_LL_3_params);
1221 | average_action_probability_3p = mean(avg_prob_3_params);
1222 | 
1223 | disp(' ');
1224 | fprintf('Average log-likelihood under the 2-parameter model: %.2g\n',average_LL_2p);
1225 | fprintf('Average action probability under the 2-parameter model: %.2g\n',average_action_probability_2p);
1226 | disp(' ');
1227 | fprintf('Average log-likelihood under the 3-parameter model: %.2g\n',average_LL_2p);
1228 | fprintf('Average action probability under the 3-parameter model: %.2g\n',average_action_probability_2p);
1229 | disp(' ');
1230 | 
1231 | %% Brief continuation of section 6 on recoverability
1232 | %==========================================================================
1233 | % Here we can also compute the strength of the relationship between true
1234 | % and estimated parameters to check recoverability. 
1235 | %==========================================================================
1236 | 
1237 | % Assemble matrices for correlation (2-parameter model)
1238 | recover_check_alpha_2 = [True_alpha_2 Estimated_alpha_2];
1239 | recover_check_rs_2 = [True_rs_2 Estimated_rs_2];
1240 | 
1241 | % Get correlations and significance
1242 | [Correlations_alpha_2, Significance_alpha_2] = corrcoef(recover_check_alpha_2);
1243 | [Correlations_rs_2, Significance_rs_2] = corrcoef(recover_check_rs_2);
1244 | 
1245 | % In this case, the correlations appear quite high for rs, and moderate for
1246 | % alpha
1247 | 
1248 | disp(' ');
1249 | disp('2-parameter model:');
1250 | disp(' ');
1251 | fprintf('Alpha recoverability: r = %.2g\n',Correlations_alpha_2(1,2));
1252 | fprintf('Correlation significance: p = %.2g\n',Significance_alpha_2(1,2));
1253 | disp(' ');
1254 | fprintf('Risk-seeking recoverability: r = %.2g\n',Correlations_rs_2(1,2));
1255 | fprintf('Correlation significance: p = %.2g\n',Significance_rs_2(1,2));
1256 | disp(' ');
1257 | 
1258 | % Assemble matrices for correlation (3-parameter model)
1259 | recover_check_alpha_3 = [True_alpha_3 Estimated_alpha_3];
1260 | recover_check_rs_3 = [True_rs_3 Estimated_rs_3];
1261 | recover_check_eta_3 = [True_eta_3 Estimated_eta_3];
1262 | 
1263 | % Get correlations and significance
1264 | [Correlations_alpha_3, Significance_alpha_3] = corrcoef(recover_check_alpha_3);
1265 | [Correlations_rs_3, Significance_rs_3] = corrcoef(recover_check_rs_3);
1266 | [Correlations_eta_3, Significance_eta_3] = corrcoef(recover_check_eta_3);
1267 | 
1268 | % In this case, the correlations appear high for rs and alpha, and moderate for
1269 | % learning rate. Note, however, that a wider range of values should be simulated to
1270 | % confirm recoverability in actual studies (and with a larger number of subjects).
1271 | 
1272 | disp(' ');
1273 | disp('3-parameter model:');
1274 | disp(' ');
1275 | fprintf('Alpha recoverability: r = %.2g\n',Correlations_alpha_3(1,2));
1276 | fprintf('Correlation significance: p = %.2g\n',Significance_alpha_3(1,2));
1277 | disp(' ');
1278 | fprintf('Risk-seeking recoverability: r = %.2g\n',Correlations_rs_3(1,2));
1279 | fprintf('Correlation significance: p = %.2g\n',Significance_rs_3(1,2));
1280 | disp(' ');
1281 | fprintf('Learning rate recoverability: r = %.2g\n',Correlations_eta_3(1,2));
1282 | fprintf('Correlation significance: p = %.2g\n',Significance_eta_3(1,2));
1283 | disp(' ');
1284 | %% Organize and save results
1285 | 
1286 | two_parameter_model_estimates.alpha_true = recover_check_alpha_2(:,1);
1287 | two_parameter_model_estimates.alpha_estimated = recover_check_alpha_2(:,2);
1288 | two_parameter_model_estimates.risk_seeking_true = recover_check_rs_2(:,1);
1289 | two_parameter_model_estimates.risk_seeking_estimated = recover_check_rs_2(:,2);
1290 | two_parameter_model_estimates.final_log_likelihoods = avg_LL_2_params;
1291 | two_parameter_model_estimates.final_action_probabilities = avg_prob_2_params;
1292 | two_parameter_model_estimates.protected_exceedance_probability = pxp;
1293 | 
1294 | three_parameter_model_estimates.alpha_true = recover_check_alpha_3(:,1);
1295 | three_parameter_model_estimates.alpha_estimated = recover_check_alpha_3(:,2);
1296 | three_parameter_model_estimates.risk_seeking_true = recover_check_rs_3(:,1);
1297 | three_parameter_model_estimates.risk_seeking_estimated = recover_check_rs_3(:,2);
1298 | three_parameter_model_estimates.learning_rate_true = recover_check_eta_3(:,1);
1299 | three_parameter_model_estimates.learning_rate_estimated = recover_check_eta_3(:,2);
1300 | three_parameter_model_estimates.final_log_likelihoods = avg_LL_3_params;
1301 | three_parameter_model_estimates.final_action_probabilities = avg_prob_3_params;
1302 | three_parameter_model_estimates.protected_exceedance_probability = pxp;
1303 | 
1304 | save('Two_parameter_model_estimates','two_parameter_model_estimates');
1305 | save('Three_parameter_model_estimates','three_parameter_model_estimates');
1306 | save('GCM_2','GCM_2');
1307 | save('GCM_3','GCM_3');
1308 | 
1309 | figure
1310 | scatter(two_parameter_model_estimates.alpha_true,two_parameter_model_estimates.alpha_estimated,'filled')
1311 | lsline
1312 | title('Recoverability: Alpha (two-parameter model)')
1313 | xlabel('True (Generative) Alpha') 
1314 | ylabel('Estimated Alpha') 
1315 | [Corr_alpha_2, Sig_alpha_2] = corrcoef(two_parameter_model_estimates.alpha_true,two_parameter_model_estimates.alpha_estimated);
1316 | text(1, 23, ['r = ' num2str(Corr_alpha_2(1,2))])
1317 | text(1, 22, ['p = ' num2str(Sig_alpha_2(1,2))])
1318 | 
1319 | figure
1320 | scatter(two_parameter_model_estimates.risk_seeking_true,two_parameter_model_estimates.risk_seeking_estimated,'filled')
1321 | lsline
1322 | title('Recoverability: Risk-Seeking (two-parameter model)')
1323 | xlabel('True (Generative) Risk-Seeking') 
1324 | ylabel('Estimated Risk-Seeking') 
1325 | [Corr_rs_2, Sig_rs_2] = corrcoef(two_parameter_model_estimates.risk_seeking_true,two_parameter_model_estimates.risk_seeking_estimated);
1326 | text(4.1, 6.75, ['r = ' num2str(Corr_rs_2(1,2))])
1327 | text(4.1, 6.5, ['p = ' num2str(Sig_rs_2(1,2))])
1328 | 
1329 | figure
1330 | scatter(three_parameter_model_estimates.alpha_true,three_parameter_model_estimates.alpha_estimated,'filled')
1331 | lsline
1332 | title('Recoverability: Alpha (three-parameter model)')
1333 | xlabel('True (Generative) Alpha') 
1334 | ylabel('Estimated Alpha') 
1335 | [Corr_alpha_3, Sig_alpha_3] = corrcoef(three_parameter_model_estimates.alpha_true,three_parameter_model_estimates.alpha_estimated);
1336 | text(1, 29, ['r = ' num2str(Corr_alpha_3(1,2))])
1337 | text(1, 27, ['p = ' num2str(Sig_alpha_3(1,2))])
1338 | 
1339 | figure
1340 | scatter(three_parameter_model_estimates.risk_seeking_true,three_parameter_model_estimates.risk_seeking_estimated,'filled')
1341 | lsline
1342 | title('Recoverability: Risk-Seeking (three-parameter model)')
1343 | xlabel('True (Generative) Risk-Seeking') 
1344 | ylabel('Estimated Risk-Seeking') 
1345 | [Corr_rs_3, Sig_rs_3] = corrcoef(three_parameter_model_estimates.risk_seeking_true,three_parameter_model_estimates.risk_seeking_estimated);
1346 | text(4.1, 6.75, ['r = ' num2str(Corr_rs_3(1,2))])
1347 | text(4.1, 6.5, ['p = ' num2str(Sig_rs_3(1,2))])
1348 | 
1349 | figure
1350 | scatter(three_parameter_model_estimates.learning_rate_true,three_parameter_model_estimates.learning_rate_estimated,'filled')
1351 | lsline
1352 | title('Recoverability: Learning Rate (three-parameter model)')
1353 | xlabel('True (Generative) Learning Rate') 
1354 | ylabel('Estimated Learning Rate') 
1355 | [Corr_lr_3, Sig_lr_3] = corrcoef(three_parameter_model_estimates.learning_rate_true,three_parameter_model_estimates.learning_rate_estimated);
1356 | text(.25, .53, ['r = ' num2str(Corr_lr_3(1,2))])
1357 | text(.25, .52, ['p = ' num2str(Sig_lr_3(1,2))])
1358 | 
1359 | 
1360 | if PEB == 1
1361 | %% 10. Hierarchical Bayes (between-subjects)
1362 | %==========================================================================
1363 | % clear and re-load saved GCMs for second-level analyses
1364 | 
1365 | % This will allow you to reload the GCM data later to use PEB without 
1366 | % needing to re-run the 'Sim = 5' option.
1367 | 
1368 | clear GCM_2 
1369 | clear GCM_3
1370 | load('GCM_2.mat')
1371 | load('GCM_3.mat')
1372 | %==========================================================================
1373 | 
1374 | %--------------------------------------------------------------------------
1375 | % Using PEB, you can test  the evidence for a 'full' model that assumes a
1376 | % group difference in all parameters and simpler models that assume no
1377 | % differences in one or more parameters.
1378 | % 
1379 | % This allows testing evidence for a difference (or for no difference)
1380 | % in estimated parameters. PEB uses a general linear (random effects) model,  
1381 | % which also allows testing evidence for individual difference effects 
1382 | % (e.g., age, symptom severity).
1383 | 
1384 | % See relevant literature on these routines, e.g., 
1385 | % Friston, Litvak, Oswal, Razi, Stephan, van Wijk, Ziegler, & Zeidman, 2016
1386 | % Zeidman, P., Jafarian, A., Seghier, M. L., Litvak, V., et al., 2019
1387 | %--------------------------------------------------------------------------
1388 | 
1389 | % Second-level model
1390 | 
1391 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1392 | % First, specify whether you want to use the 2- or 3-parameter model:
1393 | 
1394 | GCM_PEB = GCM_3; % either GCM_2 or GCM_3
1395 |   
1396 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1397 | 
1398 | % Specify default PEB parameters and between-subjects model (M)
1399 | 
1400 | M       = struct();
1401 | M.alpha = 1;        % prior PEB parameter variance = 1/alpha
1402 | M.beta  = 16;       % prior expectation of between-subject variability (random effects precision) = 1/(prior DCM parameter variance/M.beta)
1403 | M.hE    = 0;        % default
1404 | M.hC    = 1/16;     % default
1405 | M.Q     = 'all';    % covariance components: {'single','fields','all','none'}
1406 | M.X     = [];       % design matrix for general linear model
1407 | 
1408 | M.X = ones(length(GCM_PEB),1); % first column in general linear model is the mean of all participants
1409 | 
1410 | for i = 1:length(GCM_PEB)
1411 |     if i < (length(GCM_PEB)/2)+1 % in this simulation group 1 is the first half of the simulated sample
1412 |         M.X(i,2) = 1;            % and group 2 is the second
1413 |     else
1414 |         M.X(i,2) = -1;
1415 |     end
1416 | end
1417 | 
1418 | M.X(:,3) = 30 + 5.*randn(size(M.X,1),1); % Simulate a range of ages (mean = 30, SD = 5)
1419 |     
1420 | M.X(:,2) = detrend(M.X(:,2),'constant'); % Center group values around 0
1421 | M.X(:,3) = detrend(M.X(:,3),'constant'); % Center age values around 0
1422 | 
1423 | PEB_model  = spm_dcm_peb(GCM_PEB,M); % Specify PEB model
1424 | PEB_model.Xnames = {'Mean','Group','Age'}; % Specify covariate names
1425 | 
1426 | [BMA,BMR] = spm_dcm_peb_bmc(PEB_model); % Estimate PEB model
1427 | 
1428 | spm_dcm_peb_review(BMA,GCM_PEB); % Review results
1429 | 
1430 | % If you select the 'Second-level effect - Group' you can see that rs is
1431 | % significantly different between groups
1432 | 
1433 | % Please see main text for further information about how to interpet 
1434 | % results figures
1435 |     
1436 | end
1437 | 
1438 | end
1439 | 
1440 | %==========================================================================
1441 | % This completes the tutorial script. By adapting these scripts you can 
1442 | % now build a generative model of a task, run simulations, assess parameter
1443 | % recoverability, do bayesian model comparison, and do hierarchical
1444 | % bayesian group analyses. See the main text for further explanation of
1445 | % other aspects of these steps.


--------------------------------------------------------------------------------
/Step_by_Step_Hierarchical_Model.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %-- Hierarchical Model Tutorial --%
  3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  6 | % Application to Empirical Data
  7 | 
  8 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  9 | 
 10 | % Step by step tutorial for building hierarchical POMDPs using the active
 11 | % inference framework. Here we simulate the now classic "Local Global" auditory
 12 | % mismatch paradigm. This will reproduce results similar to figs. 14-16.
 13 | 
 14 | clear
 15 | close all
 16 | 
 17 | %% Level 1: Perception of individual stimuli
 18 | %==========================================================================
 19 | 
 20 | % prior beliefs about initial states
 21 | %--------------------------------------------------------------------------
 22 | 
 23 | D{1} = [1 1]';% stimulus tone {high, low}
 24 | 
 25 | d = D;
 26 | 
 27 | % Here we seperate the generative process (the capital D)
 28 | % from the generative model (the lower case d) allowing learning to occur
 29 | % (i.e. to acccumulate concentration paramaters) in the generative model, 
 30 | % independent of the generative process.
 31 | 
 32 | % probabilistic (likelihood) mapping from hidden states to outcomes: A
 33 | %--------------------------------------------------------------------------
 34 | 
 35 | % outcome modality 1: stimulus tone
 36 | A{1}= [1 0; %high tone
 37 |        0 1];%low tone
 38 |    
 39 | % seperate generative model from generative process
 40 | a = A;
 41 | 
 42 | % reduce precision
 43 | pr1 = 2; % precision (inverse termperature) parameter (lower = less precise)
 44 | a{1} = spm_softmax(pr1*log(A{1}+exp(-4)));
 45 | 
 46 | a = a{1}*100;
 47 | 
 48 | % By passing the a matrix  through a softmax function with a precision paramater of 2
 49 | % we slightly reduce the precision of the generative model, analagous to introducing 
 50 | % a degree of noise into our model of tone perception. We then multiply it
 51 | % by 100 so that the level of noise stays constant across trials.
 52 | 
 53 | % Transitions between states: B
 54 | %--------------------------------------------------------------------------
 55 | 
 56 | B{1}= [1 0; %high tone
 57 |        0 1];%low tone
 58 | 
 59 | % MDP Structure
 60 | %--------------------------------------------------------------------------
 61 | mdp_1.T = 1;                      % number of updates
 62 | mdp_1.A = A;                      % likelihood mapping
 63 | mdp_1.B = B;                      % transition probabilities
 64 | mdp_1.D = D;                      % prior over initial states
 65 | mdp_1.d = d;
 66 | mdp_1.a = a;
 67 | mdp_1.erp = 1;
 68 | 
 69 | mdp_1.Aname = {'Stimulus'};
 70 | mdp_1.Bname = {'Stimulus'};
 71 | 
 72 | clear a d A B D
 73 | 
 74 | MDP_1 = spm_MDP_check(mdp_1);
 75 | 
 76 | clear mdp_1
 77 | 
 78 | %% Level 2: Slower-timescale representations of perceived stimulus sequences
 79 | %==========================================================================
 80 | 
 81 | % prior beliefs about initial states in generative process (D) and
 82 | % generative model (d) in terms of counts (i.e., concentration parameters)
 83 | %--------------------------------------------------------------------------
 84 | D2{1} = [1 1 1 1]'; % Sequence type: {high, low, high-low, low-high}
 85 | D2{2} = [1 0 0 0 0 0]'; % time in trial
 86 | D2{3} = [1 0 0]'; % Report: {null, same, different} 
 87 | 
 88 | d2 = D2;
 89 | d2{2} = d2{2}*100;
 90 | d2{3} = d2{3}*100;
 91 | 
 92 | % Again, we here seperate the generative model from the generative process,
 93 | % and multiply d2{2} and d2{3} by 100 to prevent learning in the model's
 94 | % representation of task phase (time in trial) and report state probabilities.
 95 | 
 96 | % probabilistic (likelihood) mapping from hidden states to outcomes: A
 97 | %--------------------------------------------------------------------------
 98 | 
 99 | % outcomes: A{1} stim (2), A{2} Report Feedback (3)
100 | 
101 | %--- Stimulus
102 | for i = 1:6
103 |     for j = 1:3
104 |         A2{1}(:,:,i,j) = [1 0 1 0;%high
105 |                           0 1 0 1];%low
106 |     end 
107 | end
108 | 
109 | % oddball at fourth timestep
110 | for i = 4
111 |     for j = 1:3
112 |         A2{1}(:,:,i,j) = [1 0 0 1;%high
113 |                           0 1 1 0];%low
114 |     end
115 | end
116 | 
117 | %--- Report
118 | for i = 1:6
119 |     for j = 1:3
120 |         A2{2}(:,:,i,j) = [1 1 1 1; %null
121 |                           0 0 0 0; %incorrect
122 |                           0 0 0 0];%correct                    
123 |     end
124 | end
125 | 
126 | % report "same"
127 | for i = 6
128 |     for j = 2
129 |         A2{2}(:,:,i,j) = [0 0 0 0; %null
130 |                           0 0 1 1; %incorrect
131 |                           1 1 0 0];%correct
132 |     end
133 | end
134 | 
135 | % report "different"
136 | for i = 6
137 |     for j = 3
138 |         A2{2}(:,:,i,j) = [0 0 0 0; %null
139 |                           1 1 0 0; %incorrect
140 |                           0 0 1 1];%correct
141 |     end
142 | end
143 | 
144 | a2 = A2; % likelihood (concentration parameters) for generative model
145 | 
146 | % reduce precision
147 | pr2 = 2; % precision (inverse termperature) parameter (lower = less precise)
148 | a2{1} = spm_softmax(pr2*log(A2{1}+exp(-4)));
149 | 
150 | a2{1} = a2{1}*100;
151 | a2{2} = a2{2}*100;
152 | 
153 | % Transition probabilities: B
154 | %--------------------------------------------------------------------------
155 | 
156 | % Precision of sequence mapping
157 | B2{1} = eye(4,4); % maximally precise identity matrix (i.e., the true 
158 |                   % sequence is stable within a trial)
159 | 
160 | B2{2} = [0 0 0 0 0 0;
161 |          1 0 0 0 0 0;
162 |          0 1 0 0 0 0;
163 |          0 0 1 0 0 0;
164 |          0 0 0 1 0 0;
165 |          0 0 0 0 1 1]; % Deterministically transition through trial sequence
166 |      
167 | % Report
168 | B2{3}(:,:,1) = [1 1 1;
169 |                 0 0 0;
170 |                 0 0 0]; % Pre-report    
171 | B2{3}(:,:,2) = [0 0 0;
172 |                 1 1 1;
173 |                 0 0 0]; % Report "same"   
174 | B2{3}(:,:,3) = [0 0 0;
175 |                 0 0 0;
176 |                 1 1 1]; % Report "different"
177 |            
178 | % Policies
179 | %--------------------------------------------------------------------------
180 | 
181 |  T = 6;  % number of timesteps
182 |  Nf = 3; % number of factors
183 |  Pi = 2; % number of policies
184 |  V2 = ones(T-1,Pi,Nf);
185 | 
186 | % Report: "same" (left column) or "different" (right column)
187 |  V2(:,:,3) = [1 1; 
188 |               1 1;
189 |               1 1;
190 |               1 1;
191 |               2 3];
192 | 
193 | % C matrices (outcome modality by timestep)
194 | %--------------------------------------------------------------------------
195 | C2{1} = zeros(2,T);
196 | 
197 | % report
198 | C2{2} = [0 0 0 0 0 0;  % no feedback yet
199 |          0 0 0 0 0 -1; % preference not to be incorrect at last timestep
200 |          0 0 0 0 0 1]; % preference for being correct at last timestep
201 | 
202 | % MDP Structure
203 | %--------------------------------------------------------------------------
204 | mdp.MDP  = MDP_1;
205 | mdp.link = [1 0]; % identifies lower level state factors (rows) with higher  
206 |                   % level observation modalities (columns). Here this means the
207 |                   % first observation at the higher level corresponds to
208 |                   % the first state factor at the lower level.
209 | 
210 | mdp.T = T;                      % number of time points
211 | mdp.A = A2;                     % likelihood mapping for generative process
212 | mdp.a2 = a2;                    % likelihood mapping for generative model
213 | mdp.B = B2;                     % transition probabilities
214 | mdp.C = C2;                     % preferred outcomes
215 | mdp.D = D2;                     % priors over initial states for generative process
216 | mdp.d = d2;                     % priors over initial states for generative model
217 | mdp.V = V2;                     % policies
218 | mdp.erp = 1;                    % reset/decay paramater
219 | 
220 | mdp.Aname = {'Stimulus', 'Report Feedback'};
221 | mdp.Bname = {'Sequence', 'Time in trial', 'Report'};
222 | 
223 | 
224 | % level one labels 
225 | label.factor{1}   = 'Stimulus';   label.name{1}    = {'High','Low'};
226 | label.modality{1} = 'Stimulus';   label.outcome{1} = {'High','Low'};
227 | mdp.MDP.label = label;
228 | 
229 | label.factor{1}   = 'Sequence type';   label.name{1}    = {'High','Low','High-low','Low-high'};
230 | label.factor{2}   = 'Time in trial';    label.name{2}    = {'T1', 'T2', 'T3', 'T4', 'T5', 'T6'};
231 | label.factor{3}   = 'Report';    label.name{3}    = {'Null', 'Same', 'Different'};
232 | label.modality{1} = 'Tone';    label.outcome{1} = {'High', 'Low'};
233 | label.modality{2} = 'Feedback';  label.outcome{2} = {'Null','Incorrect','Correct'};
234 | label.action{3} = {'Null','Same','Different'};
235 | mdp.label = label;
236 | 
237 | mdp = spm_MDP_check(mdp);
238 | MDP = spm_MDP_VB_X_tutorial(mdp);
239 | 
240 | %Plot trial
241 | spm_figure('GetWin','trial'); clf
242 | spm_MDP_VB_trial(MDP);
243 | 
244 | %% Simulate all conditions
245 | 
246 | % Here we specify the number of trials N and use a deal function (which copies 
247 | % the input to N outputs) to create 10 identical mdp structures. We can
248 | % then pass this to the spm_MDP_VB_X_tutorial() script, which sequentially updates
249 | % the concentration paramaters aquired on each trial and passes them to the
250 | % mdp structure for the next trial (allowing learning to occur).
251 | 
252 | N = 10; %number of trials
253 | 
254 | % Local deviation - global standard
255 | mdp.s = 3; % first nine trials are high-low 
256 | MDP_condition1(1:N) = deal(mdp);
257 | MDP_condition1(10).s = 3; % tenth trial is also high-low 
258 | MDP_LDGS = spm_MDP_VB_X_tutorial(MDP_condition1);
259 | 
260 | % Local standard - global deviation
261 | mdp.s = 3; % first nine trials are high-low
262 | MDP_condition2(1:N) = deal(mdp);
263 | MDP_condition2(10).s = 1; % tenth trial is a high trial 
264 | MDP_LSGD = spm_MDP_VB_X_tutorial(MDP_condition2);
265 | 
266 | %% Plot ERPs using standard routines for each of the four conditions
267 | 
268 | % These are slightly modified versions of the standard plotting scripts
269 | % given in the SPM software.
270 | 
271 | spm_figure('GetWin','ERP T1 - Local deviation - global standard'); clf
272 | spm_MDP_VB_ERP_tutorial(MDP_LDGS(1));
273 | spm_figure('GetWin','Trial T1 - Local deviation - global standard'); clf
274 | spm_MDP_VB_trial(MDP_LDGS(1));
275 | spm_figure('GetWin','ERP T10 - Local deviation - global standard'); clf
276 | spm_MDP_VB_ERP_tutorial(MDP_LDGS(10));
277 | spm_figure('GetWin','Trial T10 - Local deviation - global standard'); clf
278 | spm_MDP_VB_trial(MDP_LDGS(10));
279 | 
280 | spm_figure('GetWin','ERP T1 - Local standard - global deviation'); clf
281 | spm_MDP_VB_ERP_tutorial(MDP_LSGD(1));
282 | spm_figure('GetWin','Trial T1 - Local standard - global deviation'); clf
283 | spm_MDP_VB_trial(MDP_LSGD(1));
284 | spm_figure('GetWin','ERP T10 - Local standard  - global deviation'); clf
285 | spm_MDP_VB_ERP_tutorial(MDP_LSGD(10));
286 | spm_figure('GetWin','Trial T10 - Local standard - global deviation'); clf
287 | spm_MDP_VB_trial(MDP_LSGD(10));
288 | 
289 | %% custom ERP plots
290 | 
291 | % The ERP plotting routines give three outputs: 
292 | % [level 2 ERPs, level 1 ERPs, indices]
293 | % There are 32 time indices per time step/epoch of gradient decent. Here   
294 | % there are 6 timesteps so there are 32x6 = 192 individual time indexes.
295 | % The level 1 and 2 ERPs are the first derivitives at each time index.
296 | 
297 | [u1_1,v1_1,ind] = spm_MDP_VB_ERP_tutorial(MDP_LDGS(1),1);  
298 | [u1_10,v1_10] = spm_MDP_VB_ERP_tutorial(MDP_LDGS(10),1); 
299 | 
300 | [u2_1,v2_1] = spm_MDP_VB_ERP_tutorial(MDP_LSGD(1),1);  
301 | [u2_10,v2_10] = spm_MDP_VB_ERP_tutorial(MDP_LSGD(10),1); 
302 | 
303 | % The indexes below are arbitarily chosen to best represent the ERPs at the
304 | % 4th time step, which starts at 96ms and ends at 128ms. To do this for 
305 | % yourself we recommend just plotting the ERPs and selecting the appropiate
306 | % time window. For example, the 1st level ERPs start at the begining of 
307 | % the epoch whereas the 2nd ERPs appear towards the end of the epoch. So to
308 | % include baseline periods in the plot you will likley have to select 
309 | % slightly different time windows for each level as we have done here.
310 | 
311 | % index into 2nd level
312 | index = (96:140); 
313 | u1_1  = u1_1(index,:); % level 2
314 | u1_10 = u1_10(index,:);
315 | 
316 | u2_1  = u2_1(index,:);% level 2
317 | u2_10  = u2_10(index,:);
318 | 
319 | % index into ist level
320 | index = (70:120); 
321 | v1_1  = v1_1(index,:);% level 1
322 | v1_10  = v1_10(index,:);
323 | v2_1  = v2_1(index,:);% level 1
324 | v2_10  = v2_10(index,:);
325 | 
326 | time_low = (1:length(v1_1)); 
327 | time_high = (1:length(u1_1)); 
328 | 
329 | %--- Lets make the plots! 
330 | 
331 | % low level plot
332 | limits = [20 45 -.5 1.2];
333 | 
334 | figure(10)
335 | hold on
336 | plot(time_low,sum(v2_10,2),'b','LineWidth',4) % local standard
337 | plot(time_low,sum(v1_10,2),'r','LineWidth',4) % local deviation
338 | axis(limits)
339 | set(gca,'FontSize',10)
340 | title('Mismatch negativity')
341 | legend('Local standard', 'Local deviation')
342 | 
343 | % high level plot
344 | limits = [1 45 -.5 .5];
345 | 
346 | figure(11)
347 | hold on
348 | plot(time_high,sum(u1_10,2),'b','LineWidth',4) % Global standard
349 | plot(time_high,sum(u2_10,2),'r','LineWidth',4) % Global deviation
350 | axis(limits)
351 | set(gca,'FontSize',10)
352 | title('P300')
353 | legend('Global standard', 'Global deviation')
354 | 
355 | % MMN (standard - mismatch)
356 | limits = [20 45 -1.2 .5];
357 | 
358 | figure(12)
359 | hold on
360 | plot(time_low,sum(v2_10-v1_10,2),'k','LineWidth',4) 
361 | axis(limits)
362 | set(gca,'FontSize',10)
363 | title('Mismatch negativity: local standard - local deviation')
364 | 
365 | % P300 (standard - mismatch)
366 | limits = [1 45 -.5 .5];
367 | 
368 | figure(13)
369 | hold on
370 | plot(time_high,sum(u1_10-u2_10,2),'k','LineWidth',4) 
371 | axis(limits)
372 | set(gca,'FontSize',10)
373 | title('P300: Global standard - Global deviation')
374 | 


--------------------------------------------------------------------------------
/VFE_calculation_example.m:
--------------------------------------------------------------------------------
  1 | %% Variational free energy calculation example
  2 | 
  3 | % Supplementary Code for: A Step-by-Step Tutorial on Active Inference Modelling and its 
  4 | % Application to Empirical Data
  5 | 
  6 | % By: Ryan Smith, Karl J. Friston, Christopher J. Whyte
  7 | 
  8 | clear all
  9 | 
 10 | True_observation = [1 0]'; % Set observation; Note that this could be set
 11 |                            % to include more observations. For example, 
 12 |                            % it could be set to [0 0 1]' to present a third
 13 |                            % observation. Note that this would require
 14 |                            % adding a corresponding third row to the
 15 |                            % Likelihood matrix below to specify the
 16 |                            % probabilities of the third observation under
 17 |                            % each state. One could similarly add a third
 18 |                            % state by adding a third entry into the Prior
 19 |                            % and a corresponding third column into the
 20 |                            % likelihood.
 21 | 
 22 | %% Generative Model
 23 | 
 24 | % Specify Prior and likelihood
 25 | 
 26 | Prior = [.5 .5]'; % Prior distribution p(s)
 27 | 
 28 | Likelihood = [.8 .2;
 29 |               .2 .8]; % Likelihood distribution p(o|s): columns=states, 
 30 |                       % rows = observations
 31 | 
 32 | Likelihood_of_observation = Likelihood'*True_observation; 
 33 | 
 34 | Joint_probability = Prior.*Likelihood_of_observation; % Joint probability 
 35 |                                                       % distribution p(o,s)
 36 | 
 37 | Marginal_probability = sum(Joint_probability,1); % Marginal observation 
 38 |                                                  % probabilities p(o)
 39 | %% Bayes theorem: exact posterior
 40 | 
 41 | % This is the distribution we want to approximate using variational 
 42 | % inference. In many practical applications, we can not solve for this 
 43 | % directly.
 44 | 
 45 | Posterior = Joint_probability...
 46 |     /Marginal_probability; % Posterior given true observation p(s|o)
 47 | 
 48 | disp(' ');
 49 | disp('Exact Posterior:');
 50 | disp(Posterior);
 51 | disp(' ');
 52 | 
 53 | %% Variational Free Energy
 54 | 
 55 | % Note: q(s) = approximate posterior belief: we want to get this as close as 
 56 | % possible to the true posterior p(s|o) after a new observation.
 57 | 
 58 | % Different decompisitions of Free Energy (F)
 59 | 
 60 | % 1. F=E_q(s)[ln(q(s)/p(o,s))]
 61 | 
 62 | % 2. F=E_q(s)[ln(q(s)/p(s))] - E_q(s)[ln(p(o|s))] % Complexity-accuracy
 63 | % version
 64 | 
 65 | % The first term can be interpreted as a complexity term (the KL divergence 
 66 | % between prior beliefs p(s) and approximate posterior beliefs q(s)). In 
 67 | % other words, how much beliefs have changed after a bew observation.
 68 | 
 69 | % The second term (excluding the minus sign) is the accuracy or (including the 
 70 | % minus sign) the entropy (= expected surprisal) of observations given 
 71 | % approximate posterior beliefs q(s). Written in this way 
 72 | % free-energy-minimisation is equivalent to a statistical Occam's razor, 
 73 | % where the agent tries to find the most accurate posterior belief that also
 74 | % changes its beliefs as little as possible.
 75 | 
 76 | % 3. F=E_q(s)[ln(q(s)) - ln(p(s|o)p(o))]
 77 | 
 78 | % 4. F=E_q(s)[ln(q(s)/p(s|o))] - ln(p(o))
 79 | 
 80 | % These two versions similarly show F in terms of a difference between
 81 | % q(s) and the true posterior p(s|o). Here we focus on #4.
 82 | 
 83 | % The first term is the KL divergence between the approximate posterior q(s)  
 84 | % and the unknown exact posterior p(s|o), also called the relative entropy. 
 85 | 
 86 | % The second term (excluding the minus sign) is the log evidence or (including 
 87 | % the minus sign) the surprisal of observations. Note that ln(p(o)) does 
 88 | % not depend on q(s), so its expectation value under q(s) is simply ln(p(o)).
 89 | 
 90 | % Since this term does not depend on q(s), minimizing free energy means that 
 91 | % q(s) comes to approximate p(s|o), which is our unknown, desired quantity.
 92 | 
 93 | % 5. F=E_q(s)[ln(q(s))-ln(p(o|s)p(s))]
 94 | 
 95 | % We will use this decomposition for convenience when doing variational
 96 | % inference below. Note how this decomposition is equivalent to the expression 
 97 | % shown in Figure 3 - F=E_q(s)(ln(q(s)/p(o,s)) - because ln(x)-ln(y) = ln(x/y)
 98 | % and p(o|s)p(s)=p(o,s)
 99 | 
100 | %% Variational inference
101 | 
102 | Initial_approximate_posterior = Prior; % Initial approximate posterior distribution.
103 |                                        % Set this to match generative model prior 
104 | 
105 | % Calculate F
106 | Initial_F = Initial_approximate_posterior(1)*(log(Initial_approximate_posterior(1))...
107 |     -log(Joint_probability(1)))+Initial_approximate_posterior(2)...
108 |     *(log(Initial_approximate_posterior(2))-log(Joint_probability(2)));
109 | 
110 | Optimized_approximate_posterior = Posterior; % Set approximate distribution to true posterior
111 | 
112 | % Calculate F
113 | Minimized_F = Optimized_approximate_posterior(1)*(log(Optimized_approximate_posterior(1))...
114 |     -log(Joint_probability(1)))+Optimized_approximate_posterior(2)...
115 |     *(log(Optimized_approximate_posterior(2))-log(Joint_probability(2)));
116 | 
117 | % We see that F is lower when the approximate posterior q(s) is closer to 
118 | % the true distribution p(s|o)
119 | 
120 | disp(' ');
121 | disp('Initial Approximate Posterior:');
122 | disp(Initial_approximate_posterior);
123 | disp(' ');
124 | 
125 | disp(' ');
126 | disp('Initial Variational Free Energy:');
127 | disp(Initial_F);
128 | disp(' ');
129 | 
130 | disp(' ');
131 | disp('Optimized Approximate Posterior:');
132 | disp(Optimized_approximate_posterior);
133 | disp(' ');
134 | 
135 | disp(' ');
136 | disp('Minimized Variational Free Energy:');
137 | disp(Minimized_F);
138 | disp(' ');
139 | 


--------------------------------------------------------------------------------
/spm_MDP_VB_ERP_tutorial.m:
--------------------------------------------------------------------------------
  1 | function [x,y,ind,xx_yy] = spm_MDP_VB_ERP_tutorial(MDP,FACTOR,T)
  2 | % auxiliary routine for hierarchical electrophysiological responses
  3 | % FORMAT [x,y] = spm_MDP_VB_ERP(MDP,FACTOR,T)
  4 | %
  5 | % MDP    - structure (see spm_MDP_VB)
  6 | % FACTOR - the hidden factors (at the second alevel) to plot
  7 | % T      - flag to return cell of expectations (at time T; usually 1)
  8 | %
  9 | % x      - simulated ERPs (high-level)
 10 | % y      - simulated ERPs (low level)
 11 | % ind    - indices or bins at the end of each (synchronised) epoch
 12 | %
 13 | % This routine combines first and second level hidden expectations by
 14 | % synchronising them; such that first level updating is followed by an
 15 | % epoch of second level updating - during which updating is suspended
 16 | % (and expectations are held constant). The ensuing spike rates can be
 17 | % regarded as showing delay period activity. In this routine, simulated
 18 | % local field potentials are band pass filtered spike rates (between eight
 19 | % and 32 Hz).
 20 | %
 21 | % Graphics are provided for first and second levels, in terms of simulated
 22 | % spike rates (posterior expectations), which are then combined to show
 23 | % simulated local field potentials for both levels (superimposed).
 24 | %
 25 | % At the lower level, only expectations about hidden states in the first
 26 | % epoch are returned (because the number of epochs can differ from trial
 27 | % to trial).
 28 | %__________________________________________________________________________
 29 | % Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging
 30 | 
 31 | % Karl Friston
 32 | % $Id: spm_MDP_VB_ERP.m 7382 2018-07-25 13:58:04Z karl $
 33 | 
 34 | 
 35 | % defaults: assume the first factor is of interest
 36 | %==========================================================================
 37 | try, f1 = FACTOR(1); catch, f1 = 1; end
 38 | try, f2 = FACTOR(2); catch, f2 = 1; end
 39 | 
 40 | % and T = 1
 41 | %--------------------------------------------------------------------------
 42 | if nargin < 3, T = 1; end
 43 | 
 44 | for m = 1:numel(MDP)
 45 | 
 46 |     % dimensions
 47 |     %----------------------------------------------------------------------
 48 |     xn  = MDP(m).xn{f1};      % neuronal responses
 49 |     Nb  = size(xn,1);         % number of time bins per epochs
 50 |     Nx  = size(xn,2);         % number of states
 51 |     Ne  = size(xn,3);         % number of epochs
 52 |     
 53 |     
 54 |     % expected hidden states
 55 |     %======================================================================
 56 |     x     = cell(Ne,Nx);
 57 |     y     = cell(Ne);
 58 |     for k = 1:Ne
 59 |         for j = 1:Nx
 60 |             x{k,j} = xn(:,j,T,k);
 61 |         end
 62 |         if isfield(MDP,'mdp')
 63 |             y{k}   = spm_MDP_VB_ERP_tutorial(MDP(m).mdp(k),f2,1);
 64 |         else
 65 |             y{k}   = [];
 66 |         end
 67 |     end
 68 |     
 69 |     if nargin > 2, return, end
 70 |     
 71 |     % synchronise responses
 72 |     %----------------------------------------------------------------------
 73 |     u   = {};
 74 |     v   = {};
 75 |     uu  = spm_cat(x(1,:));
 76 |     for k = 1:Ne
 77 |         
 78 |         % low-level
 79 |         %------------------------------------------------------------------
 80 |         v{end + 1,1} = spm_cat(y{k});
 81 |         if k > 1
 82 |             u{end + 1,1} = ones(size(v{end,:},1),1)*u{end,1}(end,:);
 83 |         else
 84 |             u{end + 1,1} = ones(size(v{end,:},1),1)*uu(1,:);
 85 |         end
 86 |         
 87 |         % time bin indices
 88 |         %------------------------------------------------------------------
 89 |         ind(k) = size(u{end},1);
 90 |         
 91 |         % high-level
 92 |         %------------------------------------------------------------------
 93 |         u{end + 1,1} = spm_cat(x(k,:));
 94 |         v{end + 1,1} = ones(size(u{end,:},1),1)*v{end,1}(end,:);
 95 |         
 96 |         % time bin indices
 97 |         %------------------------------------------------------------------
 98 |         ind(k) = ind(k) + size(u{end},1);
 99 | 
100 |     end
101 |     
102 |     % accumulate over trials
103 |     %----------------------------------------------------------------------
104 |     U{m,1} = u;
105 |     V{m,1} = v;
106 |     
107 | end
108 | 
109 | % time bin (seconds)
110 | %--------------------------------------------------------------------------
111 | u  = spm_cat(U);
112 | v  = spm_cat(V);
113 | dt = 1/64;
114 | t  = (1:size(u,1))*dt;
115 | 
116 | % bandpass filter between 8 and 32 Hz
117 | %--------------------------------------------------------------------------
118 | c  = 1/32;
119 | x  = log(u + c);
120 | y  = log(v + c);
121 | x  = spm_conv(x,2,0) - spm_conv(x,16,0);
122 | y  = spm_conv(y,2,0) - spm_conv(y,16,0);
123 | 
124 | xx = x';
125 | xx(end+1,:) = sum(xx,1);
126 | yy = y';
127 | yy(end+1,:) = sum(yy,1);
128 | xx_yy = xx(end,:)+yy(end,:);
129 | 
130 | if nargout > 2, return, end
131 | 
132 | % simulated firing rates and the local field potentials
133 | %==========================================================================
134 | 
135 | % higher-level unit responses
136 | %--------------------------------------------------------------------------
137 | factor = MDP(1).label.factor{f1};
138 | name   = MDP(1).label.name{f1};
139 | 
140 | subplot(4,1,1), image(t,1:(size(u,2)),64*(1 - u')), ylabel('Unit')
141 | title(sprintf('Unit reponses : %s',factor),'FontSize',16)
142 | if numel(name) < 16
143 |     grid on, set(gca,'YTick',1:numel(name))
144 |     set(gca,'YTickLabel',name)
145 | end
146 | 
147 | % lower-level unit responses
148 | %--------------------------------------------------------------------------
149 | factor = MDP(1).MDP(1).label.factor{f2};
150 | name   = MDP(1).MDP(1).label.name{f2};
151 | 
152 | subplot(4,1,3), image(t,1:(size(v,2)),64*(1 - v')), ylabel('Unit')
153 | title(sprintf('Unit reponses : %s',factor),'FontSize',16)
154 | if numel(factor) < 16
155 |     grid on, set(gca,'YTick',1:numel(name))
156 |     set(gca,'YTickLabel',name)
157 | end
158 | 
159 | % event related responses at both levels
160 | %--------------------------------------------------------------------------
161 | % subplot(6,1,3), plot(t,x',t,y','-.')
162 | % title('Local field potentials','FontSize',16)
163 | % ylabel('Depolarisation'),spm_axis tight
164 | % grid on, set(gca,'XTick',(1:(length(t)/Nb))*Nb*dt)
165 | 
166 | % event related responses summed
167 | %--------------------------------------------------------------------------
168 | 
169 | subplot(4,1,2), plot(t,xx(end,:))
170 | title('Local field potentials (Level 2)','FontSize',16)
171 | ylabel('Depolarisation'),ylim([-.2 1.1]) %spm_axis tight
172 | grid on, %set(gca,'XTick',(1:(length(t)/Nb))*Nb*dt)
173 | 
174 | subplot(4,1,4), plot(t,yy(end,:))
175 | title('Local field potentials (Level 1)','FontSize',16)
176 | ylabel('Depolarisation'),ylim([-1 1]) %spm_axis tight
177 | grid on, %set(gca,'XTick',(1:(length(t)/Nb))*Nb*dt)
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/spm_MDP_VB_game_tutorial.m:
--------------------------------------------------------------------------------
  1 | function Q = spm_MDP_VB_game_tutorial(MDP)
  2 | % auxiliary plotting routine for spm_MDP_VB - multiple trials
  3 | % FORMAT Q = spm_MDP_VB_game(MDP)
  4 | %
  5 | % MDP.P(M,T)      - probability of emitting action 1,...,M at time 1,...,T
  6 | % MDP.Q(N,T)      - an array of conditional (posterior) expectations over
  7 | %                   N hidden states and time 1,...,T
  8 | % MDP.X           - and Bayesian model averages over policies
  9 | % MDP.R           - conditional expectations over policies
 10 | % MDP.O(O,T)      - a sparse matrix encoding outcomes at time 1,...,T
 11 | % MDP.S(N,T)      - a sparse matrix encoding states at time 1,...,T
 12 | % MDP.U(M,T)      - a sparse matrix encoding action at time 1,...,T
 13 | % MDP.W(1,T)      - posterior expectations of precision
 14 | %
 15 | % MDP.un  = un    - simulated neuronal encoding of hidden states
 16 | % MDP.xn  = Xn    - simulated neuronal encoding of policies
 17 | % MDP.wn  = wn    - simulated neuronal encoding of precision
 18 | % MDP.da  = dn    - simulated dopamine responses (deconvolved)
 19 | % MDP.rt  = rt    - simulated dopamine responses (deconvolved)
 20 | %
 21 | % returns summary of performance:
 22 | %
 23 | %     Q.X  = x    - expected hidden states
 24 | %     Q.R  = u    - final policy expectations
 25 | %     Q.S  = s    - initial hidden states
 26 | %     Q.O  = o    - final outcomes
 27 | %     Q.p  = p    - performance
 28 | %     Q.q  = q    - reaction times
 29 | %
 30 | % please see spm_MDP_VB
 31 | %__________________________________________________________________________
 32 | % Copyright (C) 2005 Wellcome Trust Centre for Neuroimaging
 33 | 
 34 | % Karl Friston
 35 | % $Id: spm_MDP_VB_game.m 7307 2018-05-08 09:44:04Z karl $
 36 | 
 37 | % numbers of transitions, policies and states
 38 | %--------------------------------------------------------------------------
 39 | if iscell(MDP(1).X)
 40 |     Nf = numel(MDP(1).B);                 % number of hidden state factors
 41 |     Ng = numel(MDP(1).A);                 % number of outcome factors
 42 | else
 43 |     Nf = 1;
 44 |     Ng = 1;
 45 | end
 46 | 
 47 | % graphics
 48 | %==========================================================================
 49 | Nt    = length(MDP);               % number of trials
 50 | Ne    = size(MDP(1).V,1) + 1;      % number of epochs per trial
 51 | Np    = size(MDP(1).V,2) + 1;      % number of policies
 52 | for i = 1:Nt
 53 |     
 54 |     % assemble expectations of hidden states and outcomes
 55 |     %----------------------------------------------------------------------
 56 |     for j = 1:Ne
 57 |         for k = 1:Ne
 58 |             for f = 1:Nf
 59 |                 try
 60 |                     x{f}{i,1}{k,j} = gradient(MDP(i).xn{f}(:,:,j,k)')';
 61 |                 catch
 62 |                     x{f}{i,1}{k,j} = gradient(MDP(i).xn(:,:,j,k)')';
 63 |                 end
 64 |             end
 65 |         end
 66 |     end
 67 |     s(:,i) = MDP(i).s(:,2);
 68 |     o(:,i) = MDP(i).o(2,:)';
 69 |     act_prob(:,i) = MDP(i).P(:,:,1)';
 70 |     act(:,i) = MDP(i).u(2,1);
 71 |     w(:,i) = mean(MDP(i).dn,2);
 72 |     
 73 |     
 74 |     % assemble context learning
 75 |     %----------------------------------------------------------------------
 76 |     for f = 1:Nf
 77 |         try
 78 |             try
 79 |                 D = MDP(i).d{f};
 80 |             catch
 81 |                 D = MDP(i).D{f};
 82 |             end
 83 |         catch
 84 |             try
 85 |                 D = MDP(i).d;
 86 |             catch
 87 |                 D = MDP(i).D;
 88 |             end
 89 |         end
 90 |         d{f}(:,i) = D/sum(D);
 91 |     end
 92 |     
 93 |     % assemble performance
 94 |     %----------------------------------------------------------------------
 95 |     p(i)  = 0;
 96 |     for g = 1:Ng
 97 |         try
 98 |             U = spm_softmax(MDP(i).C{g});
 99 |         catch
100 |             U = spm_softmax(MDP(i).C);
101 |         end
102 |         for t = 1:Ne
103 |             p(i) = p(i) + log(U(MDP(i).o(g,t),t))/Ne;
104 |         end
105 |     end
106 |     q(i)   = sum(MDP(i).rt(2:end));
107 |     
108 | end
109 | 
110 | % assemble output structure if required
111 | %--------------------------------------------------------------------------
112 | if nargout
113 |     Q.X  = x;            % expected hidden states
114 |     Q.R  = act_prob;     % final policy expectations
115 |     Q.S  = s;            % inital hidden states
116 |     Q.O  = o;            % final outcomes
117 |     Q.p  = p;            % performance
118 |     Q.q  = q;            % reaction times
119 |     return
120 | end
121 | 
122 | 
123 | % Initial states and expected policies (habit in red)
124 | %--------------------------------------------------------------------------
125 | col   = {'r.','g.','b.','c.','m.','k.'};
126 | t     = 1:Nt;
127 | subplot(5,1,1)
128 | if Nt < 64
129 |     MarkerSize = 24;
130 | else
131 |     MarkerSize = 16;
132 | end
133 | 
134 | image(64*(1 - act_prob)),  hold on
135 | 
136 | plot(act,col{3},'MarkerSize',MarkerSize)
137 | 
138 | try
139 |     plot(Np*(1 - act_prob(Np,:)),'r')
140 | end
141 | try
142 |     E = spm_softmax(spm_cat({MDP.e}));
143 |     plot(Np*(1 - E(end,:)),'r:')
144 | end
145 | title('Action selection and action probabilities')
146 | xlabel('Trial'),ylabel('Action'), hold off
147 | yticklabels({'Start','Hint','Choose Left','Choose Right'})
148 | % Performance
149 | %--------------------------------------------------------------------------
150 | 
151 | subplot(5,1,2), bar(p,'k'),   hold on
152 | 
153 | for i = 1:size(o,2)
154 | %     j(i,1) = max(o(:,i));
155 |     if MDP(i).o(3,2) == 2
156 |         j(i,1) = MDP(i).o(2,3)-1;
157 |     else
158 |         j(i,1) = MDP(i).o(2,2)-1;
159 |     end
160 |     if j(i,1) == 1
161 |         jj(i,1) = 1;
162 |     else
163 |         jj(i,1) = -2;
164 |     end
165 | end
166 | 
167 | 
168 | 
169 | plot((j),col{2},'MarkerSize',MarkerSize);
170 | plot((jj),col{6},'MarkerSize',MarkerSize);
171 | 
172 | 
173 | title('Win/Loss and Free energies')
174 | ylabel('Value and Win/Loss'), spm_axis tight, hold off, box off
175 | set(gca,'YTick',[-4:1:3])
176 | yticklabels({'','','','Free Energy','','Loss','Win'})
177 | 
178 | % Initial states (context)
179 | %--------------------------------------------------------------------------
180 | subplot(5,1,3)
181 | col   = {'r','b','g','c','m','k','r','b','g','c','m','k'};
182 | for f = 1:Nf
183 |     if Nf > 1
184 |         plot(spm_cat(x{f}),col{f}), hold on
185 |     else
186 |         plot(spm_cat(x{f}))
187 |     end
188 | end
189 | title('State estimation (ERPs)'), ylabel('Response'), 
190 | spm_axis tight, hold off, box off
191 | 
192 | % Precision (dopamine)
193 | %--------------------------------------------------------------------------
194 | subplot(5,1,4)
195 | w   = spm_vec(w);
196 | if Nt > 8
197 |     fill([1 1:length(w) length(w)],[0; w.*(w > 0); 0],'k'), hold on
198 |     fill([1 1:length(w) length(w)],[0; w.*(w < 0); 0],'k'), hold off
199 | else
200 |     bar(w,1.1,'k')
201 | end
202 | title('Precision (dopamine)')
203 | ylabel('Precision','FontSize',12), spm_axis tight, box off
204 | YLim = get(gca,'YLim'); YLim(1) = 0; set(gca,'YLim',YLim);
205 | set(gca,'XTickLabel',{});
206 | 
207 | % learning - D
208 | %--------------------------------------------------------------------------
209 | for f = 1
210 |     subplot(5*Nf,1,Nf*4 + f), image(64*(1 - d{f}))
211 |     if f < 2
212 |         title('Context Learning')
213 |     end
214 |     set(gca,'XTick',1:Nt);
215 | %     if f < Nf
216 | %         set(gca,'XTickLabel',{});
217 | %     end
218 | %     set(gca,'YTick',1);
219 | %     try
220 | %         set(gca,'YTickLabel',MDP(1).label.factor{f});
221 | %     end
222 | %     try
223 | %         set(gca,'YTickLabel',MDP(1).Bname{f});
224 | %     end
225 |     
226 |     yticklabels({'Left-Win','Right-Win'})
227 |     
228 | end
229 | 


--------------------------------------------------------------------------------