├── README.md
├── find_non_empty_cells.m
├── get_CV_R2.m
├── get_f_pvals_reg.m
├── make_predictor_matrix_generalcase.m
├── process_encoding_model.m
└── spline_basis30_int.mat


/README.md:
--------------------------------------------------------------------------------
 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | 
 3 | Ben Engelhard, Princeton University (2019).
 4 | 
 5 | This package is provided free without any warranty; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. If this code is used, please cite: B Engelhard et al. Specialized coding of sensory, motor, and cognitive variables in VTA dopamine neurons. Nature, 2019.
 6 | 
 7 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 8 | 
 9 | Encoding Model
10 | 
11 | This package preprocesses joint behavioral and neuronal data and then processes it through an encoding model in order to obtain a quantitative measure of the contributions of the behavioral variables to the activity of single neurons, as described in the following paper: B Engelhard et al. Specialized coding of sensory, motor, and cognitive variables in midbrain dopamine neurons. Nature, 2019. Please see the paper for details on the encoding model.
12 | 
13 | Function list:
14 | make_predictor_matrix_generalcase.m
15 | process_encoding_model.m
16 | find_non_empty_cells.m
17 | get_CV_R2.m
18 | 
19 | Data files list:
20 | spline_basis30_int.mat
21 | 
22 | Instructions:
23 | First, the data has to be formatted to be used in the make_predictor_matrix_generalcase function. See the function header for specific details. After the predictor matrix is generated, it can be directly processed with the process_encoding_model function. In order to obtain a measure of significance for the relationship between behavioral variables and the neural activity, the obtained F-statistic for each behavioral variable should then be compared to a distribution of F-statistics obtained from a (reasonably large) number of shuffled data instantiations (i.e. the neural activity shuffled but with the same matrix of predictors). 
24 | 
25 | Notes: 
26 | Currently, event variables are convolved with a basis set composed of 7 splines and 30 timepoints. If you wish to change this, you may use the following package to generate a different spline basis set:
27 | Ramsay JO (2014). fdaM: Functional Data Analysis. MATLAB package, URL http://www. psych.mcgill.ca/misc/fda/downloads/FDAfuns/Matlab/.
28 | The generated spline basis set should be named spline_basis and saved in a matfile named 'spline_basis30_int.mat'.
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/find_non_empty_cells.m:
--------------------------------------------------------------------------------
1 | function [inds,inds2]=find_non_empty_cells(x)
2 | if nargout<2
3 |     inds=find(~cellfun('isempty', x));
4 | else
5 |     [inds,inds2]=find(~cellfun('isempty', x));
6 | end


--------------------------------------------------------------------------------
/get_CV_R2.m:
--------------------------------------------------------------------------------
  1 | % drop_type: 'norefit' - calculate regression weights with the full model, then zero the weights correspoding to the predictors being dropped
  2 | % drop_type: 'refit'   - calculate regression weights without the weights correspoding to the predictors being dropped (partial model)
  3 | 
  4 | function [R2,all_predicted,B_all] = get_CV_R2(full_predmat_cell,cur_neural_act_mat,test_trials_folds,train_trials_folds,trial_length_vec,inds_to_drop,drop_type,trial_types_to_match)
  5 | 
  6 | if nargin<6
  7 |     inds_to_drop=[];
  8 | end
  9 | if nargin<7
 10 |     drop_type='norefit';
 11 | end
 12 | if nargin<8
 13 |     trial_types_to_match = [];
 14 | end
 15 | if ~isempty(trial_types_to_match)
 16 |     for trctr=1:length(cur_neural_act_mat)
 17 |         tr_types_cell{trctr,1} = ones(size(cur_neural_act_mat{trctr},1),1)*trial_types_to_match(trctr);
 18 |     end
 19 | end
 20 | 
 21 | all_predicted = cell(size(cur_neural_act_mat));
 22 | all_neural_act = cell(size(cur_neural_act_mat));
 23 | for k=1:length(test_trials_folds)
 24 |     
 25 |     cur_Xtrain = cell2mat(full_predmat_cell(train_trials_folds{k}));
 26 |     cur_Ytrain = cell2mat(cur_neural_act_mat(train_trials_folds{k}));
 27 |     cur_Xtest  = cell2mat(full_predmat_cell(test_trials_folds{k}));
 28 |     cur_Ytest  = cell2mat(cur_neural_act_mat(test_trials_folds{k}));
 29 |     
 30 |     if ~isempty(inds_to_drop)
 31 |         switch drop_type
 32 |             case 'norefit'
 33 |                 if isempty(trial_types_to_match)
 34 |                     curB = glmfit(cur_Xtrain,cur_Ytrain,'normal','constant','on');
 35 |                 else
 36 |                     
 37 |                     cur_tr_types = cell2mat(tr_types_cell(train_trials_folds{k}));
 38 |                     all_tr_types = unique(cur_tr_types);
 39 |                     tr_types_num = zeros(1,length(all_tr_types));
 40 |                     for l=1:length(all_tr_types)
 41 |                         tr_types_num(l) = sum(cur_tr_types==all_tr_types(l));
 42 |                     end
 43 |                     weights_types = prod(tr_types_num)./tr_types_num/sum(tr_types_num);
 44 |                     weights_timepoints = zeros(size(cur_tr_types));
 45 |                     for l=1:length(all_tr_types)
 46 |                         weights_timepoints(cur_tr_types==all_tr_types(l)) = weights_types(l);
 47 |                     end
 48 |                     
 49 |                     curB = glmfit(cur_Xtrain,cur_Ytrain,'normal','constant','on','weights',weights_timepoints);
 50 |                 end
 51 |                 curB(inds_to_drop+1)=0;
 52 |                 cur_Ypred = [ones(size(cur_Xtest,1),1) cur_Xtest]*curB;
 53 |             case 'refit'
 54 |                 inds_to_use = setdiff(1:size(cur_Xtrain,2),inds_to_drop);
 55 |                 if isempty(trial_types_to_match)
 56 |                     curB = glmfit(cur_Xtrain(:,inds_to_use ),cur_Ytrain,'normal','constant','on');
 57 |                 else
 58 |                     
 59 |                     cur_tr_types = cell2mat(tr_types_cell(train_trials_folds{k}));
 60 |                     all_tr_types = unique(cur_tr_types);
 61 |                     tr_types_num = zeros(1,length(all_tr_types));
 62 |                     for l=1:length(all_tr_types)
 63 |                         tr_types_num(l) = sum(cur_tr_types==all_tr_types(l));
 64 |                     end
 65 |                     weights_types = prod(tr_types_num)./tr_types_num/sum(tr_types_num);
 66 |                     weights_timepoints = zeros(size(cur_tr_types));
 67 |                     for l=1:length(all_tr_types)
 68 |                         weights_timepoints(cur_tr_types==all_tr_types(l)) = weights_types(l);
 69 |                     end
 70 |                     
 71 |                     curB = glmfit(cur_Xtrain(:,inds_to_use ),cur_Ytrain,'normal','constant','on','weights',weights_timepoints);
 72 |                 end
 73 |                 cur_Ypred = [ones(size(cur_Xtest,1),1) cur_Xtest(:,inds_to_use)]*curB;
 74 |             otherwise
 75 |                 error('unknown drop type')
 76 |         end
 77 |     else
 78 |         if isempty(trial_types_to_match)
 79 |             
 80 |             curB = glmfit(cur_Xtrain,cur_Ytrain,'normal','constant','on');
 81 |         else
 82 |             
 83 |             cur_tr_types = cell2mat(tr_types_cell(train_trials_folds{k}));
 84 |             all_tr_types = unique(cur_tr_types);
 85 |             tr_types_num = zeros(1,length(all_tr_types));
 86 |             for l=1:length(all_tr_types)
 87 |                 tr_types_num(l) = sum(cur_tr_types==all_tr_types(l));
 88 |             end
 89 |             weights_types = prod(tr_types_num)./tr_types_num/sum(tr_types_num);
 90 |             weights_timepoints = zeros(size(cur_tr_types));
 91 |             for l=1:length(all_tr_types)
 92 |                 weights_timepoints(cur_tr_types==all_tr_types(l)) = weights_types(l);
 93 |             end
 94 |             curB = glmfit(cur_Xtrain,cur_Ytrain,'normal','constant','on','weights',weights_timepoints);
 95 |             
 96 |         end
 97 |         cur_Ypred = [ones(size(cur_Xtest,1),1) cur_Xtest]*curB;
 98 |     end
 99 |     all_predicted(test_trials_folds{k},1) = mat2cell(cur_Ypred ,trial_length_vec(test_trials_folds{k}),1);
100 |     all_neural_act(test_trials_folds{k},1) = mat2cell(cur_Ytest ,trial_length_vec(test_trials_folds{k}),1);
101 | end
102 | R2 = corr(cell2mat(all_neural_act),cell2mat(all_predicted)).^2;
103 | 
104 | 
105 | % get weights for regression on all data
106 | if nargout>2
107 |     cur_Xall = cell2mat(full_predmat_cell);
108 |     cur_Yall = cell2mat(cur_neural_act_mat);
109 |     
110 |     if isempty(trial_types_to_match)
111 |         B_all = glmfit(cur_Xall ,cur_Yall ,'normal','constant','on');
112 |     else
113 |         
114 |         cur_tr_types = cell2mat(tr_types_cell);
115 |         all_tr_types = unique(cur_tr_types);
116 |         tr_types_num = zeros(1,length(all_tr_types));
117 |         for l=1:length(all_tr_types)
118 |             tr_types_num(l) = sum(cur_tr_types==all_tr_types(l));
119 |         end
120 |         weights_types = prod(tr_types_num)./tr_types_num/sum(tr_types_num);
121 |         weights_timepoints = zeros(size(cur_tr_types));
122 |         for l=1:length(all_tr_types)
123 |             weights_timepoints(cur_tr_types==all_tr_types(l)) = weights_types(l);
124 |         end
125 |         
126 |         B_all = glmfit(cur_Xall ,cur_Yall,'normal','constant','on','weights',weights_timepoints);
127 |     end
128 |     
129 | end
130 | 
131 | 


--------------------------------------------------------------------------------
/get_f_pvals_reg.m:
--------------------------------------------------------------------------------
 1 | % X: the matrix of predictors (without the constant term)
 2 | % y: the dependent variable
 3 | % preds_to_test_cell: a cell with the indexes of predictors to be tested, for example {[1 2],[3 5],6}
 4 | 
 5 | function [Fp_vec,F_vec]=get_f_pvals_reg(X,y,preds_to_test_cell)
 6 | 
 7 | X = [ones(size(X,1),1) X];
 8 | n = size(X,1);  % number of observations
 9 | k = size(X,2);  % number of variables (including constant)
10 | 
11 | b = X \ y;                 % estimate b with least squares
12 | u = y - X * b;             % calculates residuals
13 | s2 = u' * u / (n - k);     % estimate variance of error term 
14 | BCOV = inv(X'*X) * s2;     % get covariance matrix of b 
15 | bse = diag(BCOV).^.5;      % standard errors
16 | 
17 | 
18 | clear Fp_vec
19 | for l=1:length(preds_to_test_cell)
20 |         preds_to_test_cell{l} = preds_to_test_cell{l}+1; %because of the constant
21 | 
22 |     R = zeros(length(preds_to_test_cell{l}),k);
23 |     for l2 = 1:length(preds_to_test_cell{l})
24 |         R(l2, preds_to_test_cell{l}(l2))=1;
25 |     end
26 |     
27 |     r = zeros(length(preds_to_test_cell{l}),1);          % Testing restriction: R * b = r
28 |     
29 |     num_restrictions = size(R, 1);
30 |     F = (R*b - r)'*inv(R * BCOV * R')*(R*b - r) / num_restrictions;   % F-stat 
31 |     F_vec(l) = F;
32 |     Fp_vec(l) = 1 - fcdf(F, num_restrictions, n - k);  % F p-val
33 |     
34 | end
35 | 
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/make_predictor_matrix_generalcase.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %
  3 | %    Ben Engelhard, Princeton University (2019).
  4 | %
  5 | %    This program is provided free without any warranty; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation.
  6 | %    If this code is used, please cite: B Engelhard et al. Specialized coding of sensory, motor, and cognitive variables in VTA dopamine neurons. Nature, 2019.
  7 | %
  8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  9 | 
 10 | %%% make_predictor_matrix_generalcase.m
 11 | %%%
 12 | %%% Description: Make the predictor matrix to be used with the process_encoding_model function
 13 | %
 14 | % arguments: base_variables - a cell array where each term is a base variable which can be either a cell array or a vector. in both cases the division is by trials in a session. For a cell array base variable, 
 15 | %                             each term is a vector that corresponds to one trial in the session, and include the base variable's values in all timepoints of that trial. A vector base variable is necessarily a
 16 | %                             'whole-trial' variable where each term is '0' or '1' and denotes the value of that variable for all timepoints in that trial. At least one base variable needs to be a cell array.          
 17 | %            var_types      - a cell array with the same length as the 'base_variables' argument where each term denotes the type of base variable. possible types are: 
 18 | %                             'event'      : a binary base variable where values of '1' denote the occurence of the event variable.
 19 | %                             'whole-trial': a binary bas e variable that takes the same value in all timepoints of a given trial, corrsponding to wether the variable's occurence was valid in that trial. 
 20 | %                             'continuous' : a base variable that takes different values at different timepoints.
 21 | %            groupings      - a cell array that defines how the base variables are grouped into the behavioral variables used for subsequent processing in the encoding model. each term is a vector of
 22 | %                             indices (corresponding to the base_variables argument). All indices in one term of the cell arary are considered to belong to the same variable when processing the encoding model.
 23 | %                             Currently, only base variables of the same type can be grouped.
 24 | %
 25 | % outputs:   pred_allmat       - cell array corresponding to a matrix of predictors, each term corresponds to one trial where rows are timepoints and columns are the different predictors
 26 | %            pred_inds_cell    - cell array where each term has a vector of indices of the predictors that belong to a specific behavioral variable
 27 | %            grouped_var_types - cell array where each term has the type ('event','whole-trial', or 'continuous') of the corresponding behavioral variable. if no value is given, then each base variable is 
 28 | %                                considered to be a behavioral variable
 29 | 
 30 | function [pred_allmat,pred_inds_cell,grouped_var_types] = make_predictor_matrix_generalcase(base_variables,var_types,groupings)
 31 | 
 32 | if nargin<3
 33 |     groupings = mat2cell(1:length(base_variables),1,ones(1,length(base_variables)));
 34 | end
 35 | 
 36 | num_base_vars = length(base_variables);
 37 | numtrials = length(base_variables{1});
 38 | load('spline_basis30_int.mat'); % this results in the variable 'spline_basis' which is used for event variables. It is curently built using 7 splines and 30 timepoints. 
 39 |                                 % change this if you want a different spline basis set.
 40 | 
 41 | % find the first cell array base variable to get from it the number of timepoints in each trial
 42 | num_points_per_trial = zeros(numtrials,1);
 43 | found_cellarraybasevar = 0;
 44 | for basevarctr = 1:num_base_vars
 45 |     if iscell(base_variables{basevarctr})
 46 |         for k=1:numtrials
 47 |             num_points_per_trial(k) = length(base_variables{basevarctr}{k});
 48 |         end
 49 |         found_cellarraybasevar = 1;
 50 |         continue
 51 |     end
 52 | end
 53 | if ~found_cellarraybasevar
 54 |     error('At least one base variable needs to be a cell array. If necessary, convert a whole trial-variable to a cell array variable')
 55 | end
 56 | 
 57 | for k=1:numtrials
 58 |     clear pred_curmat
 59 |     
 60 |     % process event variables
 61 |     event_vars_inds = find_non_empty_cells(strfind(var_types,'event'));
 62 |     
 63 |     for varctr = 1:length(event_vars_inds)
 64 |         clear cur_event_var
 65 |         for spctr = 1:size(spline_basis,2)
 66 |             w = conv(base_variables{event_vars_inds(varctr)}{k} ,spline_basis(:,spctr));
 67 |             cur_event_var(:,spctr) = w(1:length(base_variables{event_vars_inds(varctr)}{k}));
 68 |         end
 69 |         pred_curmat{1,event_vars_inds(varctr)} = cur_event_var;
 70 |     end
 71 | 
 72 |     % process whole-trial variables. they can be either vectors or cell arrays
 73 |     wholetrial_vars_inds = find_non_empty_cells(strfind(var_types,'whole-trial'));
 74 |     
 75 |     for varctr = 1:length(wholetrial_vars_inds)
 76 |         if iscell(base_variables{wholetrial_vars_inds(varctr)})
 77 |             cur_wt_var = base_variables{wholetrial_vars_inds(varctr)}{k};
 78 |         else
 79 |             cur_wt_var  = zeros(num_points_per_trial(k),1) + base_variables{wholetrial_vars_inds(varctr)}(k);
 80 |         end
 81 |         pred_curmat{1,wholetrial_vars_inds(varctr)} = cur_wt_var;
 82 |     end
 83 | 
 84 |     % process continuous variables. they can be either vectors or cell arrays
 85 |     cont_vars_inds = find_non_empty_cells(strfind(var_types,'continuous'));
 86 |     
 87 |     for varctr = 1:length(cont_vars_inds)
 88 |         pred_curmat{1,cont_vars_inds(varctr)} = base_variables{cont_vars_inds(varctr)}{k};
 89 |     end
 90 | 
 91 |     pred_allmat{k,1} = cell2mat(pred_curmat);
 92 |     
 93 |     % get the indices of the predictors coresponding to the different variables
 94 |     if k==1
 95 |         for varctr = 1:size(pred_curmat,2)
 96 |            preds_inds_basevars{varctr} = (1:size(pred_curmat{1,varctr},2)) + size(cell2mat(pred_curmat(1,1:varctr-1)),2);
 97 |         end
 98 |         for grpctr = 1:length(groupings)
 99 |             pred_inds_cell{grpctr} = cell2mat(preds_inds_basevars(groupings{grpctr}));
100 |             cur_types = var_types(groupings{grpctr});
101 |             grouped_var_types{grpctr} = cur_types{1};
102 |             
103 |             if ~isempty(setdiff(cur_types,cur_types{1}))
104 |                error('Grouped variables must have the same type') 
105 |             end
106 |         end
107 |     end
108 |     
109 | end
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/process_encoding_model.m:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %
  3 | %    Ben Engelhard, Princeton University (2019).
  4 | %
  5 | %    This program is provided free without any warranty; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation.
  6 | %    If this code is used, please cite: B Engelhard et al. Specialized coding of sensory, motor, and cognitive variables in VTA dopamine neurons. Nature, 2019.
  7 | %
  8 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  9 | 
 10 | %%% process_encoding_model.m
 11 | %%%
 12 | %%% Description: Process the encoding model on a matrix of predictors belonging to different behavioral variables and a matrix of neural activity traces correspoding to one or more neurons. return the relative
 13 | %%% contribution of each behavioral variable to each neuron and the F-statistic derived from a nested model comparison for each behavioral variable and neuron to determine if the behavioral variable is 
 14 | %%% significantly represented in the neuron's activity.
 15 | %
 16 | % arguments: pred_allmat      - cell array correspoding to a matrix of behavioral predictors, each term is a trial and contains a matrix where rows are timepoints and columns are behavioral predictors.
 17 | %            pred_inds_cell   - cell array where each term has a vector of indices of the predictors that belong to a specific behavioral variable
 18 | %            neural_act_mat   - cell array correspoding to a matrix of activity traces, each term is a trial and contains a matrix where rows are timepoints and columns are traces correspoding to different
 19 | %                               neurons. Timepoints where the neuronal activity is not defined (e.g. a the imaging became unstable) are filled with NaNs (Currently it is assumed that before the first NaN
 20 | %                               activity was always defined, and after the first NaN activity is never defined).
 21 | %            pred_types_cell  - cell array where each terms indicates the type of behavioral variable ('event', 'whole-trial', or 'continuous').
 22 | %            approach         - 'norefit': calculate regression weights with the full model, then zero the weights correspoding to the predictors being dropped. 'refit' : calculate regression weights
 23 | %                               without the weights correspoding to the predictors being dropped (partial model).
 24 | %            trial_types_to_match - If it is desired to match the effects on the regression of trials of different types, then this argument should be a vector the same length as pred_allmat where each term is 
 25 | %                                   a number denoting trial type. 
 26 | %
 27 | % outputs:   relative_contrib - a matrix where rows are behavioral variables and columns are neurons. each term is the relative contribution of the behavioral variable to the neural activity.
 28 | %            Fstat_mat        - a matrix where rows are behavioral variables and columns are neurons. each term is the F-statistic associated with the nested model comparison where the predicted variable is the
 29 | %                               activity of the neuron, the full model is the model containing the predictors from al behavioral variables, and the partial model is the model containing the predictors from all
 30 | %                               variables except the one being tested. The value of this statistic shoudl be compared to a distirbution of staitsitc values obtained by erforing the same oprateion on shuffled 
 31 | %                               data, directly using the p-value assocaited with the staitstic is not valid given the autocorrelations in the data.
 32 | %            full_R2_vec      - a vector indicating the R2 of the full model for each neuron.
 33 | %            B_all            - cell array with predictor weights for each neuron calculated by performing the regression on all data (without crossvalidation).
 34 | %            pred_inds_cell_opt - cell array where each term corresponds to a neuron and contains a cell array where each term has a vector of indices of the predictors that belong to a specific behavioral 
 35 | %                                 variable, updated to reflect the optimal model for the given neuron.
 36 | 
 37 | function [relative_contrib,Fstat_mat,full_R2_vec,predicted_gcamp,B_all,pred_inds_cell_opt] = process_encoding_model(pred_allmat, pred_inds_cell, neural_act_mat, pred_types_cell,approach, trial_types_to_match)
 38 | if nargin<5
 39 |     approach = 'norefit';
 40 | end
 41 | if nargin<6
 42 |     trial_types_to_match = [];
 43 | end
 44 | 
 45 | neural_act_mat=neural_act_mat(:); % make it a column cell array in case it's not
 46 | 
 47 | numcells = size(neural_act_mat{1},2);
 48 | numtrials_all = length(pred_allmat);
 49 | 
 50 | % find for each neuron trials where activity is defined, and also the length of each trial
 51 | defined_mat = zeros(numtrials_all,numcells);
 52 | trial_length_vec = zeros(numtrials_all,1);
 53 | 
 54 | for trctr=1:numtrials_all
 55 |     defined_mat(trctr,:) = ~sum(isnan(neural_act_mat{trctr}));
 56 |     trial_length_vec (trctr) = size(neural_act_mat{trctr},1);
 57 | end
 58 | 
 59 | 
 60 | % use crossvalidation to find the best polynomial degree to apply for the continuous variables
 61 | num_cv_folds = 5;
 62 | max_poly_deg = 3;
 63 | 
 64 | % rng(0,'twister')
 65 | full_R2_vec = zeros(numcells,1);
 66 | partial_R2_vec = zeros(numcells,length(pred_inds_cell));
 67 | relative_contrib = zeros(numcells,length(pred_inds_cell));
 68 | for cellctr = 1:numcells
 69 |     cur_good_trials = 1:find(defined_mat(:,cellctr),1,'last');
 70 |     num_trials_per_fold = ceil(length(cur_good_trials)/num_cv_folds);
 71 |     if isempty(cur_good_trials)
 72 |         continue
 73 |     end
 74 |     temp_neural_act = cell2mat(neural_act_mat(cur_good_trials));
 75 |     cur_neural_act_mat = mat2cell(temp_neural_act(:, cellctr),trial_length_vec(cur_good_trials),1);
 76 |     
 77 |     % zscore the predictors
 78 |     cur_pred_allmat_z = mat2cell(zscore(cell2mat(pred_allmat(cur_good_trials))),trial_length_vec(cur_good_trials),size(pred_allmat{1},2));
 79 |     rng('default')
 80 |     cur_random_vector = randperm(length(cur_good_trials));
 81 |     
 82 |     % get indices of test and train trials for CV
 83 |     for foldctr = 1:num_cv_folds
 84 |         kf_inds{foldctr} = num_trials_per_fold*(foldctr-1)+1:min(num_trials_per_fold*foldctr,length(cur_good_trials));
 85 |         test_trials_folds{foldctr} = cur_random_vector(kf_inds{foldctr});
 86 |         train_trials_folds{foldctr} = setdiff(cur_random_vector,test_trials_folds{foldctr});
 87 |     end
 88 |     
 89 |     [~,F_vec] = get_f_pvals_reg(cell2mat(cur_pred_allmat_z),zscore(cell2mat(cur_neural_act_mat)),pred_inds_cell);
 90 |     Fstat_mat(cellctr,:) = F_vec;
 91 |     
 92 |     % make matrix of all possible combinations of polynomial degrees for all continuous variables
 93 |     cont_inds = find_non_empty_cells(strfind(pred_types_cell,'continuous'));
 94 |     non_cont_inds = setdiff(1:length(pred_types_cell),cont_inds);
 95 |     num_cont_inds = length(cont_inds);
 96 |     all_degs_mat = [];
 97 |     clear cur_cont_preds_inds cur_base_preds
 98 |     temp_predmat = cell2mat(cur_pred_allmat_z);
 99 |     
100 |     if num_cont_inds>0
101 |         for cont_var_ctr = 1:num_cont_inds
102 |             all_degs_mat  = ceil([all_degs_mat mod((1:max_poly_deg^num_cont_inds)/(max_poly_deg^(cont_var_ctr-1)),max_poly_deg+.01)']);
103 |             cur_cont_preds_inds{cont_var_ctr} = pred_inds_cell{cont_inds(cont_var_ctr)};
104 |             cur_base_preds{cont_var_ctr} = temp_predmat(:,cur_cont_preds_inds{cont_var_ctr}); % predictors for the current continuous variable before adding any additional polynomial degrees
105 |         end
106 |         all_cont_pred_inds = cell2mat(cur_cont_preds_inds);
107 |         all_non_cont_pred_inds = setdiff(1:size(cur_pred_allmat_z{1},2),all_cont_pred_inds);
108 |         non_cont_predmat = temp_predmat(:,all_non_cont_pred_inds);
109 |         
110 |         
111 |         deg_R2_vec = zeros(1,size(all_degs_mat,1));
112 |         for degctr = 1:size(all_degs_mat,1)
113 |             full_predmat = non_cont_predmat;
114 |             for cont_var_ctr = 1:num_cont_inds
115 |                 cur_cont_pred_add = [];
116 |                 for curdegctr = 1:all_degs_mat(degctr,cont_var_ctr)
117 |                     cur_cont_pred_add = [cur_cont_pred_add cur_base_preds{cont_var_ctr}.^curdegctr];
118 |                 end
119 |                 full_predmat = [full_predmat cur_cont_pred_add];
120 |             end
121 |             
122 |             %get CV R2 for this predictor matrix
123 |             full_predmat_cell = mat2cell(full_predmat,trial_length_vec(cur_good_trials),size(full_predmat,2));
124 |             if nargout<5
125 |                 [cur_R2,cur_predicted] = get_CV_R2(full_predmat_cell,cur_neural_act_mat,test_trials_folds,train_trials_folds,trial_length_vec(cur_good_trials),[],approach,trial_types_to_match);
126 |             else
127 |                 [cur_R2,cur_predicted,curB] = get_CV_R2(full_predmat_cell,cur_neural_act_mat,test_trials_folds,train_trials_folds,trial_length_vec(cur_good_trials),[],approach,trial_types_to_match);
128 |                 weights_reg_cell{degctr} = curB;                
129 |             end
130 |             deg_R2_vec(degctr) = cur_R2;
131 |             deg_predicted_cell{degctr} = cur_predicted;
132 |         end
133 |         
134 |         [cur_full_R2,best_deg_ind] = max(deg_R2_vec);
135 |         full_R2_vec(cellctr,1) = cur_full_R2;
136 |         predicted_gcamp{cellctr} = deg_predicted_cell{best_deg_ind};
137 |         if nargout>=5
138 |             B_all{cellctr} = weights_reg_cell{best_deg_ind};
139 |         end
140 |         % now make the matrix with the optimal poly degree for each continuous variable and update the predictor indices as well
141 |         clear pred_inds_cell_new
142 |         full_predmat = [];
143 |         pred_inds_cell_new = {};
144 |         new_pred_inds_ctr = 1;
145 |         for non_cont_var_ctr = 1:length(non_cont_inds)
146 |             cur_predmatvar = temp_predmat(:,cell2mat(pred_inds_cell(non_cont_inds(non_cont_var_ctr))));
147 |             cur_num_preds = size(full_predmat ,2);
148 |             full_predmat = [full_predmat cur_predmatvar];
149 |             pred_inds_cell_new{non_cont_inds(non_cont_var_ctr)} = (1:size(cur_predmatvar,2))+cur_num_preds ;
150 |         end
151 |         for cont_var_ctr = 1:num_cont_inds
152 |             all_cont_pred_add{cont_var_ctr} = [];
153 |             for curdegctr = 1:all_degs_mat(best_deg_ind,cont_var_ctr)
154 |                 all_cont_pred_add{cont_var_ctr} = [all_cont_pred_add{cont_var_ctr} cur_base_preds{cont_var_ctr}.^curdegctr];
155 |             end
156 |             cur_num_preds = size(full_predmat ,2);
157 |             full_predmat = [full_predmat zscore(all_cont_pred_add{cont_var_ctr})];
158 |             pred_inds_cell_new{cont_inds(cont_var_ctr)} = (1:size(all_cont_pred_add{cont_var_ctr},2))+cur_num_preds ;
159 |         end
160 |         
161 |         full_predmat_cell = mat2cell(full_predmat,trial_length_vec(cur_good_trials),size(full_predmat,2));
162 |         
163 |     else
164 |         full_predmat_cell = cur_pred_allmat_z;
165 |         full_R2_vec(cellctr,1) = get_CV_R2(full_predmat_cell,cur_neural_act_mat,test_trials_folds,train_trials_folds,trial_length_vec(cur_good_trials),[],approach,trial_types_to_match);
166 |         pred_inds_cell_new = pred_inds_cell;
167 |     end
168 |     pred_inds_cell_opt{cellctr} = pred_inds_cell_new;
169 |     % now calculate the relative contributions.  first calculate the R2 when each variable is omitted.
170 |     
171 |     for varctr = 1:length(pred_inds_cell_new)
172 |         partial_R2_vec(cellctr,varctr) =  get_CV_R2(full_predmat_cell,cur_neural_act_mat,test_trials_folds,train_trials_folds,trial_length_vec(cur_good_trials),cell2mat(pred_inds_cell_new(varctr)),approach,trial_types_to_match);
173 |     end
174 |     
175 |     cur_R2_diff = (full_R2_vec(cellctr,1) - partial_R2_vec(cellctr,:))/full_R2_vec(cellctr,1);
176 |     cur_R2_diff(cur_R2_diff<0)=0;
177 |     cur_R2_diff(cur_R2_diff>1)=1;
178 |     relative_contrib(cellctr,:) = cur_R2_diff/sum(cur_R2_diff);
179 |     
180 |     
181 | end
182 | 
183 | 
184 | 
185 | 


--------------------------------------------------------------------------------
/spline_basis30_int.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/benengx/encodingmodel/b557d2db3721e9ede4c82ce95aeabdf75f4ea1cf/spline_basis30_int.mat


--------------------------------------------------------------------------------