├── Functions
    ├── DietConstrain.m
    ├── GenerateMSPInformation.m
    ├── KeepNecessaryRxns.m
    ├── MakeCommunity.m
    ├── MetaGenomicsReactionScore.m
    ├── MetagenomeToReactions.m
    ├── ReactionAbundanceGenerator.m
    ├── ReactobiomeGenerator.m
    ├── RxnRichnessGenerator.m
    ├── USDAcreatingDiet.m
    ├── checkCatalog.m
    ├── contextSpecificModelGenertion.m
    ├── contextSpecificModelTune.m
    ├── convertCatalogAnnotation.m
    ├── fakeModelGenerator.m
    ├── microbiomeGEMgeneration.m
    └── pRSEGenerator.m
├── MIGRENE_pipeline.m
├── README.md
├── data
    ├── BacterialAbundance.xlsx
    ├── MSPgeneProfile.txt
    ├── SubSet_hs_10_4_igc2_annot.txt
    ├── Taxonomy.xlsx
    ├── ec2rn.txt
    ├── ko2rn.txt
    └── pathways.xlsx
├── mat
    ├── DietInput.mat
    ├── Diets.mat
    ├── KBase.mat
    ├── MetInformation.mat
    ├── RefMetabolicModel.mat
    ├── USDAdataset.mat
    ├── bibliome.mat
    └── microbiomeGEM.mat
├── saveDir
    └── test.txt
└── tutorials
    ├── IntegrationCatalogToModel.m
    ├── MAGMAgeneration.m
    └── PersonalizedMicrobiomeMetabolism.m


/Functions/DietConstrain.m:
--------------------------------------------------------------------------------
 1 | function [constrainedModel]= DietConstrain(model,dietOption)
 2 | % this function constrain GEM using the dietOption.
 3 | %inputs:
 4 | %   model:				metabolic Model with COBRA or RAVEN format,
 5 | %   dietOption:			1 to 5 (% 1:high Fibre Plant Based, 2:high Fibre omnivore, 3:high Protein Plant based
 6 | %						4:high protein omnivore, 5:UK average.)
 7 | %outputs: 
 8 | %   constrainedModel:   constrained metabolic Model
 9 | 
10 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
11 | 
12 | %type of diet
13 | index=dietOption;
14 | % get path to where the MIGRENE Toolbox is located
15 | MIGDIR = fileparts(which('MIGRENE_pipeline'));
16 | % provide the path to the diets
17 | load([MIGDIR filesep 'mat' filesep 'diets.mat']);
18 | 
19 | for i =1:length(diets.rxn)
20 |     value= -(diets.value(i,index));
21 |     if value ~= 0
22 |         model.lb(find(strcmp(model.rxns, diets.rxn{i})))=value;
23 |     end
24 | end
25 | constrainedModel=model;
26 | end


--------------------------------------------------------------------------------
/Functions/GenerateMSPInformation.m:
--------------------------------------------------------------------------------
 1 | function [MSPInformation]= GenerateMSPInformation(taxo,RXNDIR,model)
 2 | % this function gather the taxonomy information and reaction state for bacteria.
 3 | %inputs:
 4 | %	taxo:				taxonomy profile
 5 | %   RXNDIR:             path to where RxnState and modelforMSP for each bacterium (MSP) was saved 
 6 | %   model:        		reference metabolic model with COBRA or RAVEN format, 
 7 | 
 8 | %                       
 9 | % output:
10 | %   MSPInformation:     Structure includes:
11 | %						taxoLevel, the taxonomy name. taxoInfo, taxonomy information for each
12 | % 						bacteria. taxoGroup: taxonomy group fot each bacteria. rxns, the reaction
13 | % 						name in reference model. bacteria, MSP IDs. BacteriaNames, species name.
14 | % 						RxnStateAll, the reaction state (absent/present) for each bacteria
15 | 
16 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
17 | 
18 | % read taxo info
19 | T = readtable(taxo);
20 | % get the bacterial species name
21 | mspNames=table2cell(T(:,1));
22 | % provide the taxo info from bottom to top taxonomical levels  (genus to
23 | % phylum level)
24 | TaxoAll=T.Properties.VariableNames;
25 | levels={'genus' 'family' 'order' 'class' 'phylum'};
26 | Index=[];
27 | for i=1:length(levels)
28 |     index=find(ismember(TaxoAll,levels{i}));
29 |     if ~isempty(index)
30 |         Index=[Index index];
31 |     end
32 | end
33 | Taxo=TaxoAll(Index); % name of sorted levels
34 |  if isempty(Index)
35 |      error('there is no taxonomy info in the provided excel file. make sure the first row provide the taxonomy name i.e. genus to phylum')
36 |  else
37 |     infoFile=table2cell(T(1:end ,Index)); % taxonomy info for each species
38 |     %dedicate the groups in each taxonomy level 
39 |     infoFile1=zeros(size(infoFile,1),size(infoFile,2));
40 |     for i=1:size(Taxo,2)
41 | 	[~,~,ic]=unique(infoFile(:,i));
42 | 	infoFile1(:,i)=ic;
43 |     end
44 |  end
45 | 
46 | % collect all the info in a structure
47 | MSPInformation.taxoLevel=Taxo;
48 | MSPInformation.taxoInfo=infoFile;
49 | MSPInformation.taxoGroup=infoFile1;
50 | MSPInformation.rxns=model.rxns;
51 | MSPInformation.bacteria=mspNames;
52 | 
53 | index1=find(ismember(TaxoAll,'species'));
54 | if ~isempty(index1)
55 |     MSPInformation.BacteriaNames=table2cell(T(1:end ,index1));
56 | else
57 |     MSPInformation.BacteriaNames={};
58 | end
59 |     
60 | % all the RxnStates were collected from reactionProfile directory
61 | MSPInformation.RxnStateAll=[];
62 | for i =1:numel(MSPInformation.bacteria)
63 |     if exist([RXNDIR filesep MSPInformation.bacteria{i} '.mat'])
64 |         load ([RXNDIR filesep MSPInformation.bacteria{i} '.mat'],'RxnState');
65 |         if ~isempty(ismember(model.rxns, 'Biomass_Bacteria'))
66 |             RxnState(find(strcmp(model.rxns, 'Biomass_Bacteria')))=1;
67 |         else 
68 |             matchStr = regexp(lower(model.rxns),'biomass','match');
69 |             RxnState(find(not(cellfun('isempty',matchStr))))=1;
70 |         end
71 |        MSPInformation.RxnStateAll=[MSPInformation.RxnStateAll RxnState];
72 |     else
73 |         error(['there is no information for ' MSPInformation.bacteria{i} '. please check the dedicated directory. Besides, you might not generate it by MetagemenomeToReactions function'])
74 |     end
75 | end
76 | 


--------------------------------------------------------------------------------
/Functions/KeepNecessaryRxns.m:
--------------------------------------------------------------------------------
 1 | function reducedModelTemp = KeepNecessaryRxns(model, score, threshold, min)
 2 | %inputs:
 3 | %   model: 				metabolic model with COBRA or RAVEN format.
 4 | %   score:				a numeric vector that shows the score of each reaction in the model
 5 | %	min:				minimum fraction of objective
 6 | %outputs:
 7 | %   reducedModelTemp: 			reference model with the bacterial genes and gene rules
 8 | 
 9 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
10 | [tempModel,~,IndexRev2irrev,IndexIrrev2rev] = convertToIrreversible(model);
11 | expressionRxnsIrrev = zeros(length(tempModel.rxns),1);
12 | for i1=1:length(tempModel.rxns)
13 |     expressionRxnsIrrev(i1,1) = score(IndexIrrev2rev(i1,1),1);
14 | end
15 | expressionRxnsIrrev(find(expressionRxnsIrrev==-1))=0;
16 | cc=optimizeCbModel(model);
17 | tempModel.lb(find(tempModel.c ==1),1)=cc.f*min; % minimum fraction of objective 
18 | tempModel.c(:,1)=0;
19 | for i1=1:length(tempModel.rxns)
20 |     if (expressionRxnsIrrev(i1,1) < threshold)
21 |         tempModel.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight
22 |     end
23 | end
24 | gimmeSolution = optimizeCbModel(tempModel,'min');
25 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1);
26 | if (gimmeSolution.stat ~= 1)
27 |     reactionScoreTransition(:,1) = 0;
28 | end
29 | reactionScoreTransition(find(gimmeSolution.x>0),1)=1;
30 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1;
31 | %Translate reactionActivity to reversible model
32 | reactionScoreRev = zeros(length(model.rxns),1);
33 | for i=1:length(model.rxns)
34 |     temp1=IndexRev2irrev{i,1}';
35 |     for j=1:length(temp1)
36 |         if reactionScoreTransition(temp1(j)) > 0
37 |             reactionScoreRev(i,1) = reactionScoreTransition(temp1(j));
38 |         end
39 |     end
40 | end
41 | rxn2remove = model.rxns(reactionScoreRev == 0);
42 | reducedModelTemp = removeRxns(model,rxn2remove); 
43 | end
44 | 


--------------------------------------------------------------------------------
/Functions/MakeCommunity.m:
--------------------------------------------------------------------------------
  1 | function [report]= MakeCommunity(modelList,PathToModels,abundance,sampleName,PathToSave,biomass)
  2 | %inputs:
  3 | %   modelList: 				list of model names.
  4 | %   PathToModels:			a structure includes the path where the models are available
  5 | %							and the name of model assigned in the .mat files
  6 | %	abundance:				matrix of microbiome (MSP) abundance profile
  7 | %	sampleName:				list of sample names
  8 | %	PathToSave:				a string showing the directory to save generated community models
  9 | %OPTIONAL INPUT
 10 | %	biomass:				a structure that specify the metabolite ID and exchange reaction name for biomass
 11 | %outputs:					
 12 | %   report:					shows generated community models for samples 
 13 | 
 14 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidhkori@kcl.ac.uk
 15 | if nargin<6
 16 |     biomass={};
 17 | end
 18 | index=[];
 19 | for h1 = 1:size(modelList,1)
 20 | 	if exist([PathToModels.path filesep modelList{h1} '.mat'])
 21 | 		index=[index;h1];
 22 | 	end
 23 | end
 24 | modelList=modelList(index,:);
 25 | abundance=abundance(index,:);
 26 | %keep the bacteria with at leaset one nonzero observation
 27 | abundance=abundance(sum(abundance,2)~=0,:);
 28 | modelList=modelList(sum(abundance,2)~=0,:);
 29 | %keep the sample with at leaset one nonzero observation
 30 | abundance=abundance(:,sum(abundance,1)~=0);
 31 | sampleName=sampleName(sum(abundance,1)~=0);
 32 | exchangeMetabolites={};
 33 | report={};
 34 | for h1 = 1:size(modelList,1)
 35 | 	load([PathToModels.path filesep modelList{h1}],PathToModels.name)
 36 | 	model=eval(PathToModels.name);
 37 |     models{h1,1}=model;
 38 |     exchangeMets=GetExchangeMetabolite(model);
 39 |     exchangeMetabolites=vertcat(exchangeMetabolites,exchangeMets);
 40 | end
 41 | exchangeMetabolites=unique(exchangeMetabolites);
 42 | mets_art=exchangeMetabolites;
 43 | mets_art(:,2) = strrep(mets_art(:,1), '[e]', '[lu]');
 44 | mets_art(:,3) = strrep(mets_art(:,1), '[e]', '[fo]');
 45 | mets_art(:,4) = strrep(mets_art(:,1), '[e]', '[fe]');
 46 | reactions={};
 47 | for h1=1:length(models)
 48 |     temp=models{h1};
 49 |     indexEx=strfind(temp.rxns,'Ex_');
 50 |     IndexEx = find(not(cellfun('isempty',indexEx)));
 51 |     rxn=temp.rxns(IndexEx,1);
 52 |     rxn(:,2)=printRxnFormula(temp,rxn);
 53 |     reactions=vertcat(reactions,rxn);
 54 | end
 55 | [~,idx]=unique(strcat(reactions(:,1), 'rows'));
 56 | reactions1=reactions(idx,:)
 57 | 
 58 | Ex_art=[]
 59 | for h1=1:size(mets_art,1)
 60 | IndexC = strfind(reactions1(:,2),mets_art{h1,1});
 61 | Index = find(not(cellfun('isempty',IndexC)));
 62 | Ex_art{h1,1}=reactions1{Index,1}
 63 | end
 64 | 
 65 | Ex_art(:,2) = strrep(Ex_art(:,1), 'Ex_', 'FoEx_');
 66 | Ex_art(:,3) = strrep(Ex_art(:,1), 'Ex_', 'Fo_');
 67 | Ex_art(:,4) = strrep(Ex_art(:,1), 'Ex_', 'Fe_');
 68 | Ex_art(:,5) = strrep(Ex_art(:,1), 'Ex_', 'FeEx_');
 69 | [fakemodel, fakemodel1]=fakeModelGenerator(mets_art,Ex_art);
 70 | relativeAbundance=abundance;
 71 | if any(sum(abundance,1)~=1)
 72 |     SumOfAbun=sum(abundance,1)
 73 |     for i=1:length(sampleName)
 74 |        relativeAbundance(:,i)=abundance(:,i)/SumOfAbun(i);
 75 |     end
 76 | end
 77 | abundance=relativeAbundance;
 78 | 
 79 | for h1=1:numel(sampleName)
 80 |     abundanceS=abundance(:,h1)
 81 |     modelListN=modelList(find(abundanceS ~= 0));
 82 |     abundanceN=abundanceS(find(abundanceS ~= 0));
 83 | models={};
 84 | for h11 = 1:size(modelListN,1)
 85 | 	load([PathToModels.path filesep modelListN{h11}],PathToModels.name)
 86 | 	model=eval(PathToModels.name);
 87 |     minLB=min(model.lb);
 88 |     %remove the constraint from the model
 89 |     model.lb(find(model.lb > minLB & model.lb < 0))=-1000;
 90 |     models{h11,1}=model
 91 | end    
 92 |     
 93 | modelList1=strcat(modelListN, '_');
 94 | modelsM={};
 95 | for h2 = 1:size(modelListN,1)
 96 |     model=models{h2}
 97 |     exchangeCom=intersect(model.rxns, fakemodel1.rxns);
 98 |     fakemodelS = removeRxns(fakemodel1,setdiff(fakemodel1.rxns,exchangeCom))
 99 |     model = removeRxns(model,Ex_art(:,1));
100 |     model= mergeTwoModels(model, fakemodelS, 1, false);
101 |     if ~isempty(biomass)
102 |         model.lb(find(strcmp(model.rxns,biomass.EXrxn)))=0;
103 |     else
104 |         model.lb(find(strcmp(model.rxns,'Ex_Biomass')))=0;
105 |     end
106 |     model.rxns = strcat(modelList1{h2, 1}, model.rxns);
107 |     Lumen=model.mets(find(cellfun('isempty',strfind(model.mets,'ee[lu]'))))
108 |     if ~isempty(biomass)
109 |         Lumen=union(Lumen,biomass.mets)
110 |     else
111 |         Lumen=union(Lumen,'cpd11416ee[lu]')
112 |     end
113 |     model.mets(find(ismember(model.mets,Lumen)))=strcat(modelList1{h2, 1}, model.mets(find(ismember(model.mets,Lumen))))
114 |     modelsM{h2,1}=model;
115 | end
116 |    
117 | merged=mergeTwoModels(modelsM{1}, modelsM{2}, 1, false)
118 | for i = 3:size(modelsM,1)
119 |     merged= mergeTwoModels(merged, modelsM{i}, 1, false);
120 | end
121 | mergedModelS=mergeTwoModels(merged, fakemodel, 1, false)   
122 |       
123 | % make a global biomass including the biomasses of the bacteria in the community  
124 | if ~isempty(biomass)
125 |      BiomassAll=mergedModelS.mets(find(~(cellfun('isempty',strfind(mergedModelS.mets,biomass.mets)))));
126 | else
127 |      BiomassAll=mergedModelS.mets(find(~(cellfun('isempty',strfind(mergedModelS.mets,'cpd11416ee[lu]')))));
128 | end
129 | biomassmodel.mets=BiomassAll;
130 | biomassmodel.rxns={'BiomassAll'};
131 | biomassmodel.lb=0.1;
132 | biomassmodel.ub=1;
133 | biomassmodel.S=zeros(numel(biomassmodel.mets),numel(biomassmodel.rxns));
134 | if ~isempty(biomass)
135 |     biomassmodel.S(find(strcmp(biomassmodel.mets,biomass.mets)))=1;
136 | else
137 |     biomassmodel.S(find(strcmp(biomassmodel.mets,'cpd11416ee[lu]')))==1;
138 | end
139 | % add bacterial abundance as Stoichiometric Coefficients into the global biomass
140 | for w12 =1:numel(biomassmodel.mets)
141 |     mgs1 = strrep(biomassmodel.mets{w12}, '_cpd11416ee[lu]', '');
142 |     value=abundanceN(find(strcmp(modelListN,mgs1)));
143 |     if ~isempty(value)
144 |       biomassmodel.S(w12,1)=-(value);
145 |     end
146 | end
147 | % remove the FoEx_ and Fo_ global biomass    
148 | if ~isempty(biomass)
149 |     temp3=strrep(biomass.EXrxn, 'Ex_', '');
150 | else
151 |     temp3=strrep('Ex_Biomass', 'Ex_', '');
152 | end
153 | Toremove={strcat('FoEx_',temp3) strcat('Fo_',temp3)}
154 | mergedModelS = removeRxns(mergedModelS,Toremove);
155 | %add the global biomass to the community model
156 | PmergedModel=mergeTwoModels(mergedModelS,biomassmodel, 1, false);
157 | PmergedModel.c(:,1)=0;
158 | PmergedModel.c(find(strcmp(PmergedModel.rxns,'BiomassAll')))=1;
159 | PmergedModel1=PmergedModel;
160 | 
161 | for jjj=1:numel(modelListN)   
162 |          RXNs=PmergedModel1.rxns(find(~(cellfun('isempty',strfind(PmergedModel1.rxns,modelListN{jjj})))));
163 |          ExRXN=PmergedModel1.rxns(find(~(cellfun('isempty',strfind(PmergedModel1.rxns,'Biomass_Bacteria')))));
164 |          ExRXN=intersect(ExRXN,RXNs);
165 |          PmergedModel1=coupleRxnList2Rxn(PmergedModel1,RXNs,ExRXN); 
166 |          jjj
167 | end
168 | save([PathToSave filesep sampleName{h1} '.mat'],'PmergedModel1','PmergedModel')
169 | report{h1,1}=sampleName{h1}
170 | report{h1,2}=true
171 | end
172 | end
173 | 
174 | function exchangeMets=GetExchangeMetabolite(model)
175 | indexEx=strfind(model.rxns,'Ex_');
176 | IndexEx = find(not(cellfun('isempty',indexEx)));
177 | S=model.S(:,IndexEx);
178 | exchangeMets=model.mets(find(any(S,2)));
179 | end
180 | 


--------------------------------------------------------------------------------
/Functions/MetaGenomicsReactionScore.m:
--------------------------------------------------------------------------------
 1 | function [reactionScore, threshold] = MetaGenomicsReactionScore(BacteriaInformation)
 2 | % this function calculate reaction score and threshold for gap filling.
 3 | %inputs:
 4 | %	BacteriaInformation:	a structure includes:
 5 | %							taxoLevel, the taxonomy name. taxoInfo, taxonomy information for each
 6 | % 							bacteria. taxoGroup: taxonomy group fot each bacteria. rxns, the reaction
 7 | % 							name in reference model. bacteria, MSP IDs. BacteriaNames, species name.
 8 | % 							RxnStateAll, the reaction state (absent/present) for each bacteria. 
 9 | %						    species, names of species				
10 | %                       
11 | % output:
12 | %   reactionScore:     a matrix includes 3 different scores for each reaction.
13 | %	threshold:			includes a threshold for each specified taxonomy level.	
14 | 
15 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
16 | 
17 | index=find(strcmp(BacteriaInformation.bacteria, BacteriaInformation.species));
18 | BacteriaInformation.value=BacteriaInformation.RxnStateAll(:,index);
19 | 
20 | BacteriaInformation.RxnStateAll(find(BacteriaInformation.RxnStateAll <0))=0;
21 | ScoreMatrix=[];
22 | for j=1:size(BacteriaInformation.taxoLevel,2)
23 | 	s=BacteriaInformation.taxoGroup(index,j);
24 |     s1=char(BacteriaInformation.taxoInfo(index,j));
25 |     IndexC = isempty(strfind(s1,'unclassified'));
26 | 	if IndexC
27 | 		group=find(BacteriaInformation.taxoGroup(:,j)== s);
28 |         expression_Group= BacteriaInformation.RxnStateAll(:,group);
29 |         % calculate the freq of each reaction for the taxonomy level
30 |         ScoreMatrix(:,j) = (sum(expression_Group,2))/size(expression_Group,2);
31 |     else
32 |         ScoreMatrix(:,j)=zeros(size(BacteriaInformation.expressionset,1),1);
33 |     end
34 | end
35 | 
36 | ScoreMatrix(:,j+1)=(sum(ScoreMatrix,2))/size(ScoreMatrix,2);
37 | t1=sort(ScoreMatrix,1,'descend');
38 | for j=1:size(ScoreMatrix,2)-1
39 |     if sum(t1(:,j))>0
40 |         threshold(1,j)=t1(find(t1(:,j)==0,1, 'first')-1,end);
41 |     else
42 |         threshold(1,j)=0;
43 |     end
44 | end
45 | 
46 | reactionScore(:,1)=ScoreMatrix(:,end);
47 | reactionScore(:,2)=BacteriaInformation.value;
48 | reactionScore(:,end+1)=sum(reactionScore,2);
49 | reactionScore(find(reactionScore(:,2)==-1),end)=-1;
50 | reactionScore(find(reactionScore(:,end)> 1),end)=1;
51 | 
52 | end
53 | 


--------------------------------------------------------------------------------
/Functions/MetagenomeToReactions.m:
--------------------------------------------------------------------------------
  1 | function [RxnState, MSPmodel]= MetagemenomeToReactions(model,metagenomeData)
  2 | % creates a reaction state for each species based on absent/present genes in MSP into gut
  3 | % reference model and filter genes and gene rules in reference model for each species 
  4 | %inputs:
  5 | %   model: 				reference metabolic Model with COBRA or RAVEN format.
  6 | %   metagenomeData:		a structure contains two fields "gene" and "value", includes  gene name and 
  7 | %                       value, respectively.
  8 | %outputs:
  9 | %   MSPmodel: 			reference model with the bacterial genes and gene rules
 10 | %   RxnState: 			a vector showing the state of the reaction (zero or one) for the MSP
 11 | 
 12 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 13 | 
 14 | 
 15 | if isfield(model,'rules') && ~isfield(model,'grRules')
 16 |     model.grRules=cell([numel(model.rxns) 1]);
 17 |     for h=1:length(model.rules)
 18 |     if  ~isempty(model.rules{h,1})
 19 |         matchStr=regexp(model.rules{h,1},'\d*','match');
 20 |         collect={};
 21 |         for k=1:length(matchStr)
 22 |             index=str2num(matchStr{k});
 23 |             converted=model.genes{index,1};
 24 |             collect=vertcat(collect,converted);
 25 |         end
 26 |         if length(matchStr) >1
 27 |             model.grRules{h,1}=['(' strjoin(unique(collect),' or ') ')']; % generating model.grRules
 28 |         else
 29 |              model.grRules{h,1}=strjoin(unique(collect),' or ');
 30 |         end
 31 |     else
 32 |         model.grRules(h,1)={''}; 
 33 |     end
 34 |     end
 35 | elseif ~isfield(model,'rules') && ~isfield(model,'grRules')
 36 |     error('Either model.rules or model.grRules would be defined in the model. please provide one of them')
 37 | end
 38 | 
 39 | presentGenes=metagenomeData.gene(find(metagenomeData.value == 1));
 40 | 
 41 | RxnState=zeros(length(model.rxns),1);
 42 | 
 43 | RxnState(find(cellfun('isempty',model.grRules)),1)=-1;
 44 | % tic
 45 | % for i=1:length(presentGenes)
 46 | %     matchStr = regexp(model.grRules,presentGenes{i},'match');
 47 | %     indexx=find(not(cellfun('isempty',matchStr)));
 48 | %     RxnState(indexx,1)=1;
 49 | % end
 50 | % toc
 51 | model.grRules=strrep(model.grRules,'( ','');
 52 | model.grRules=strrep(model.grRules,'(','');
 53 | model.grRules=strrep(model.grRules,') ','');
 54 | model.grRules=strrep(model.grRules,')','');
 55 | 
 56 | totalgenes={};
 57 | for i=1:length(model.grRules)
 58 |     temp1=model.grRules{i,1};
 59 |     if ~isempty(temp1)
 60 |         genes=strsplit(temp1,' or ');
 61 |         inter=intersect(genes,presentGenes);
 62 |         if ~isempty(inter)
 63 |             RxnState(i,1)=1;
 64 |         end
 65 |         if length(inter) ==1
 66 |            model.grRules(i,1)= inter(1,1);
 67 |         elseif length(inter)>1
 68 |             model.grRules{i,1}=['(' strjoin(unique(inter),' or ') ')'];
 69 |         else
 70 |             model.grRules{i,1}='';
 71 |         end
 72 |         totalgenes=vertcat(totalgenes,inter);
 73 |     end
 74 | end
 75 | 
 76 | model.genes=unique(totalgenes);
 77 | model.geneNames=model.genes;
 78 | 
 79 | temp1=model.grRules;
 80 | temp1=strrep(temp1,'( ','');
 81 | temp1=strrep(temp1,'(','');
 82 | temp1=strrep(temp1,') ','');
 83 | temp1=strrep(temp1,')','');
 84 | for h=1:length(temp1)
 85 |     if  ~isempty(temp1{h,1})
 86 |         tra2=strsplit(temp1{h,1},' or ')';
 87 |         collect={};
 88 |         for k=1:length(tra2)
 89 |             index=find(strcmp(model.genes,tra2{k})) ;
 90 |             converted=['x(' num2str(index) ')'];
 91 |             collect=vertcat(collect,converted);
 92 |         end
 93 |         if length(tra2) >1
 94 |             model.rules{h,1}=['(' strjoin(unique(collect),' | ') ')']; % generating genericModel.rules
 95 |         else
 96 |             model.rules{h,1}= collect{1,1};
 97 |         end
 98 |     else
 99 |         model.rules(h,1)={''}; 
100 |     end
101 | end
102 | 
103 | 
104 | if isfield(model,'rxnGeneMat')
105 |     model=rmfield(model,'rxnGeneMat');
106 | end
107 | 
108 | MSPmodel=model;
109 | end
110 | 
111 | 


--------------------------------------------------------------------------------
/Functions/ReactionAbundanceGenerator.m:
--------------------------------------------------------------------------------
 1 | function [reactionRelativeAbun, rxnAbunPerSample]= ReactionAbundanceGenerator(modelList,PathToModels,abundance,sampleName)
 2 | %inputs:
 3 | %   modelList: 				list of model names.
 4 | %   PathToModels:			a structure includes the path where the models are available
 5 | %							and the name of model assigned in the .mat files
 6 | %	abundance:				matrix of microbiome (MSP) abundance profile
 7 | %	sampleName:				list of sample names
 8 | %outputs:
 9 | %   reactionRelativeAbun	relative reaction abundance
10 | %	rxnAbunPerSample		reaction abundance
11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
12 | 
13 | index=[];
14 | for h1 = 1:size(modelList,1)
15 | 	if exist([PathToModels.path filesep modelList{h1} '.mat'])
16 | 		index=[index;h1];
17 | 	end
18 | end
19 | modelList=modelList(index,:);
20 | abundance=abundance(index,:);
21 | c={};
22 | for h1 = 1:size(modelList,1)
23 | 	load([PathToModels.path filesep modelList{h1}],PathToModels.name)
24 | 	model=eval(PathToModels.name);
25 | 	models{h1,1}=model;
26 | 	c=vertcat(c,model.rxns);
27 | end
28 | rxns=unique(c);
29 | compare=zeros(numel(rxns),numel(modelList));
30 | for w1=1:numel(modelList)
31 | 	model=models{w1,1};
32 |     compare(find(ismember(rxns,model.rxns)),w1)=1;
33 | end
34 | abundance1=abundance';
35 | FinalAbundance=[];
36 | for i= 1:numel(sampleName)
37 |     abun1=abundance1(i,:);
38 |     compare1=compare;
39 |      for j=1:numel(abun1)
40 |         compare1(:,j)=compare1(:,j)*abun1(:,j);
41 |      end
42 |     FinalAbundance(:,i)=sum(compare1,2);
43 | end
44 | 
45 | SumAbundancy=sum(FinalAbundance);
46 | reactionRelativeAbun=[];
47 | for j=1:numel(SumAbundancy)
48 |    reactionRelativeAbun(:,j)=FinalAbundance(:,j)/SumAbundancy(:,j);
49 | end
50 | reactionRelativeAbun = array2table(reactionRelativeAbun);
51 | reactionRelativeAbun.Properties.VariableNames = sampleName;
52 | reactionRelativeAbun=[array2table(rxns) reactionRelativeAbun];
53 |  
54 | rxnAbunPerSample=zeros(size(FinalAbundance));
55 | for i=1:size(FinalAbundance,2)
56 | 	x = FinalAbundance(:,i);
57 | 	minVal = min(x);
58 | 	maxVal = max(x);
59 | 	if minVal==maxVal
60 | 		rxnAbunPerSample(:,i)=0;
61 | 	else
62 | 		rxnAbunPerSample(:,i) = (x - minVal) / ( maxVal - minVal);
63 | 	end
64 | end
65 | rxnAbunPerSample = array2table(rxnAbunPerSample);
66 | rxnAbunPerSample.Properties.VariableNames = sampleName;
67 | rxnAbunPerSample=[array2table(rxns) rxnAbunPerSample];
68 | end


--------------------------------------------------------------------------------
/Functions/ReactobiomeGenerator.m:
--------------------------------------------------------------------------------
 1 | function [countPerFive]= CPFGenerator(modelList,PathToModels,abundance,sampleName)
 2 | %inputs:
 3 | %   modelList: 				list of model names.
 4 | %   PathToModels:			a structure includes the path where the models are available
 5 | %							and the name of model assigned in the .mat files
 6 | %	abundance:				matrix of microbiome (MSP) abundance profile
 7 | %	sampleName:				list of sample names
 8 | %outputs:
 9 | %   countPerFive:			reactobiome profile
10 | 
11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
12 | index=[];
13 | for h1 = 1:size(modelList,1)
14 | 	if exist([PathToModels.path filesep modelList{h1} '.mat'])
15 | 		index=[index;h1];
16 | 	end
17 | end
18 | modelList=modelList(index,:);
19 | abundance=abundance(index,:);
20 | c={};
21 | for h1 = 1:size(modelList,1)
22 | 	load([PathToModels.path filesep modelList{h1}],PathToModels.name)
23 | 	model=eval(PathToModels.name);
24 | 	models{h1,1}=model;
25 | 	c=vertcat(c,model.rxns);
26 | end
27 | rxns=unique(c);
28 | compare=zeros(numel(rxns),numel(modelList));
29 | for w1=1:numel(modelList)
30 | 	model=models{w1,1};
31 |     compare(find(ismember(rxns,model.rxns)),w1)=1;
32 | end
33 | binary=abundance;
34 | binary(find(binary>0))=1;
35 | binary1=binary';
36 | count_rxnstions=[];
37 | for i= 1:numel(sampleName)
38 |     abun=binary1(i,:);
39 |     compare2=compare;
40 |     for j=1:numel(abun)
41 |         compare2(:,j)=compare2(:,j)*abun(:,j);
42 |     end
43 |     count_rxnstions(:,i)=sum(compare2,2);
44 | end
45 | 
46 | 
47 | count_rxnstionsNor=zeros(size(count_rxnstions));
48 | biomassCount=count_rxnstions(find(strcmp(rxns, 'Biomass_Bacteria')),:);
49 | if ~isempty(biomassCount)
50 | 	for i=1:size(count_rxnstionsNor,2)
51 | 		count_rxnstionsNor(:,i)=count_rxnstions(:,i)*500/biomassCount(1,i);
52 | 	end
53 | else
54 | 	for i=1:size(count_rxnstionsNor,2)
55 | 		count_rxnstionsNor(:,i)=count_rxnstions(:,i)*500/max(count_rxnstions(:,i));
56 | 	end
57 | end
58 | count_rxnstionsNor = array2table(count_rxnstionsNor);
59 | count_rxnstionsNor.Properties.VariableNames = sampleName;
60 | countPerFive=[array2table(rxns) count_rxnstionsNor];
61 | 
62 | end


--------------------------------------------------------------------------------
/Functions/RxnRichnessGenerator.m:
--------------------------------------------------------------------------------
 1 | function [richness]= RxnRichnessGenerator(modelList,PathToModels,abundance,sampleName)
 2 | %inputs:
 3 | %   modelList: 			list of model names.
 4 | %   PathToModels:		a structure includes the path where the models are available
 5 | %						and the name of model assigned in the .mat files
 6 | %	abundance:			matrix of microbiome (MSP) abundance profile
 7 | %	sampleName:			list of sample names
 8 | %outputs:
 9 | %   richness: 			gut microbiome reaction composition (reaction richness) of all individuals
10 | 
11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
12 | 
13 | index=[];
14 | for h1 = 1:size(modelList,1)
15 | 	if exist([PathToModels.path filesep modelList{h1} '.mat'])
16 | 		index=[index;h1];
17 | 	end
18 | end
19 | modelList=modelList(index,:);
20 | abundance=abundance(index,:);
21 | binary=abundance;
22 | binary(find(binary>0))=1;
23 | rxnTemp={};
24 | for h1 = 1:size(modelList,1)
25 | 	load([PathToModels.path filesep modelList{h1}],PathToModels.name)
26 | 	model=eval(PathToModels.name);
27 | 	rxnTemp=vertcat(rxnTemp,model.rxns);
28 | end
29 | rxns=unique(rxnTemp);
30 | temporary=zeros(numel(rxns),numel(modelList));
31 | for w1=1:numel(modelList)
32 |     load([PathToModels.path filesep modelList{w1}],PathToModels.name)
33 | 	model=eval(PathToModels.name);
34 |     temporary(find(ismember(rxns,model.rxns)),w1)=1;
35 | end
36 | binary1=binary';
37 | rxnsBinary=[];
38 | for i= 1:numel(sampleName)
39 |     abun=binary1(i,:);
40 |     temporary2=temporary;
41 |     for j=1:numel(abun)
42 |         temporary2(:,j)=temporary2(:,j)*abun(:,j);
43 |     end
44 |     rxnsBinary(:,i)=any(temporary2 ==1,2);
45 | end
46 | 
47 | richness = sum(rxnsBinary,1);
48 | richness=table(sampleName',richness');
49 | richness.Properties.VariableNames = {'sampleName','rxn_richness'};
50 | 
51 | end
52 | 


--------------------------------------------------------------------------------
/Functions/USDAcreatingDiet.m:
--------------------------------------------------------------------------------
 1 | function[micronutrients_diet_mmol, macronutrients_diet]= USDAcreatingDiet(food_id_item,food_grams_item)
 2 | % Input: 
 3 | %   food_grams_item:    the weight (in grams) for the food item 
 4 | %   food_id_item:       the ID of the food item (specified by USDA food ID)
 5 | %        NOTE: the food_grams_item and food_id_item need to be in the same order.
 6 | %        Also the food_grams_item needs to be specified in grams, so e.g 1.5 kg needs to be 1500 (g). 
 7 | 
 8 | %output: 
 9 | % macronutrients_diet:               total amount of macronutrients for the created diet 
10 | % micronutrients_diet_mmol:          total amount of micronutrients for the created diet in mmol/gDW 
11 | 
12 | % #Authors Bouchra Ezzamouri.
13 | 
14 | % get path to where the MIGRENE Toolbox is located
15 | MIGDIR = fileparts(which('MIGRENE_pipeline'));
16 | %load USDA dataset from MIGRENE Toolbox.
17 | USDA=[MIGDIR filesep 'mat' filesep 'USDAdataset.mat'];
18 | load(USDA)
19 | 
20 | % find the common IDs in USDA dataset
21 | food_id_members = ismember(food_item_USDA_id,food_id_item);%--> from the USDA foods (is a list of 8463 x 1) it will check if the specified input of the food_id_item is found the USDA food list. If so it is 1 otherwise 0  
22 | food_macros = query_food_item_macros_values_1gDW(:,food_id_members==1); %--> from the list of food_id_members if it is equal to 1 (so the food is in the list) then we want the macro values from the matrix query_food_item_macros_values_1gDW
23 | food_micros_mmolgDW = query_food_item_micros_mmol_gDW(:,food_id_members==1); % obtaining micronutrients from the matrix for the food of interest in mmol gDW
24 | 
25 | %output specifying matrix with a size of macronutrients/micronutrients x food items that were specified by the user. 
26 | food_macros_diet = zeros(size(food_macros,1),size(food_id_item,1));
27 | food_macros = transpose(food_macros);
28 | food_macros_diet = transpose(food_macros_diet);
29 | 
30 | food_micros_mmolgDW_diet= zeros(size(food_micros_mmolgDW,1),size(food_id_item,1));
31 | food_micros_mmolgDW = transpose(food_micros_mmolgDW);
32 | food_micros_mmolgDW_diet= transpose(food_micros_mmolgDW_diet);
33 | 
34 | % for every food that the user wants in the diet multiply by the amount in
35 | % grams that the user wants for that specific food. 
36 | for i =1:length(food_id_item)
37 |    food_macros_diet(i,:) = food_macros(i,:) * food_grams_item(i);
38 |    food_micros_mmolgDW_diet(i,:) =  food_micros_mmolgDW(i,:) * food_grams_item(i)  ;
39 | end
40 | 
41 | % the output will be a list of total macros in gDW and micronutrients (in gDW and mmol) for the specified diet 
42 |  total_food_macros_diet = transpose(sum(food_macros_diet,1));
43 |  total_food_micro_mmolgDW_diet= transpose(sum(food_micros_mmolgDW_diet,1));
44 | 
45 |  micronutrients_diet_mmol = table(mets_USDA_name,(total_food_micro_mmolgDW_diet));
46 |  macronutrients_diet = table(macros_USDA_name,(total_food_macros_diet));
47 | end
48 | 


--------------------------------------------------------------------------------
/Functions/checkCatalog.m:
--------------------------------------------------------------------------------
 1 | function [catalogFileChecked]= checkCatalog(inputFile,numWorkers)
 2 | %inputs:
 3 | %   inputFile:             cell array contains two columns, first column is gene name and 
 4 |                             % second column provides KO annotation
 5 | %   numWorkers             integer indicating the number of cores to use for parallelization
 6 | 
 7 | %outputs: 
 8 | %   catalogFileChecked:    cell array contains two columns, first column is gene name and 
 9 |                              % second column provides KO annotation
10 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
11 |  
12 | % check the availability of KO annotation for each gene
13 | 
14 | expression = 'K\d\d\d\d\d';
15 | matchStr = regexp(inputFile(:,2),expression,'match');
16 | Index = find(not(cellfun('isempty',matchStr)));
17 | if isempty(Index)
18 |     error('the catalog is not annotated by KO or if KO-annotated, check the format of inputFile')
19 | end
20 | 
21 | % genes can be annotated to more than one KO. Here, it splits the 
22 | % annotation column and rearrange the catalog to long format
23 | % including repeated genes with one KO annotation in each row 
24 | 
25 | % find the rows with one and more than one KO annotation 
26 | index=strfind(inputFile(:,2),'K');
27 | NumberOfKO = cellfun('length',index);
28 | IndexAboveOne= find(NumberOfKO > 1);
29 | IndexOne= find(NumberOfKO == 1);
30 | 
31 | Index = find(not(cellfun('isempty',index)));
32 | if ~isempty(Index)
33 |     % make a subset of gene catalog including rows with more than one KO annotation .
34 |     if ~isempty(IndexAboveOne)
35 |     output_1=inputFile(IndexAboveOne,:);
36 |     end
37 |     % make a subset of gene catalog including rows with one KO linked. 
38 |      if ~isempty(IndexOne)
39 |     output_2=inputFile(IndexOne,:);
40 |      end
41 | end
42 | 
43 | % check the number of workers for parallelization 
44 | if numWorkers > 1
45 |     poolobj = gcp('nocreate');
46 |     if isempty(poolobj)
47 |         parpool(numWorkers);
48 |     end
49 | else
50 |     disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers')
51 | end
52 | % convert the subset of gene catalog with several annotated genes to long format. 
53 | if ~isempty(IndexAboveOne)
54 |     transition2={};
55 |     parfor w=1:size(output_1,1)
56 |         transition1=output_1{w,2};
57 |         expression = 'K\d\d\d\d\d';
58 |         matchStr = regexp(transition1,expression,'match');
59 |         transition2{w,1}=matchStr;
60 |     end
61 | elseif ~isempty(IndexOne)
62 |     catalogFileChecked=output_2;
63 | end
64 | output_1updated=cell([0 2]);
65 | tic
66 | if ~isempty(IndexAboveOne)
67 |     parfor w=1:size(transition2,1)
68 |         transition3={};
69 |         transition1=transition2{w};
70 |         transition3(:,2)=transition1';
71 |         transition3(:,1)=output_1(w,1);
72 |         output_1updated=vertcat(output_1updated,transition3);
73 |     end
74 | end
75 | toc
76 | 
77 | if ~isempty(IndexAboveOne) & ~isempty(IndexOne)
78 | % Concatenate the two arrays vertically to make a catalog file
79 | catalogFileChecked=vertcat(output_2,output_1updated);
80 | elseif ~isempty(IndexAboveOne) & isempty(IndexOne)
81 |   catalogFileChecked=output_1updated;  
82 | end


--------------------------------------------------------------------------------
/Functions/contextSpecificModelGenertion.m:
--------------------------------------------------------------------------------
  1 | function [contextSpecificModel] = contextSpecificModelGenertion(model,metagenomeset,threshold,bibliome)
  2 | % generates species-specific model based on reaction score, threshold and mind the gap.
  3 | %inputs:
  4 | %   model:				reference metabolic model in COBRA or RAVEN format.
  5 | %	metagenomeset:		a matrix includes 3 different scores for each reaction.
  6 | %	threshold:			includes a threshold for each specified taxonomy level.
  7 | %OPTIONAL INPUTS:
  8 | %	bibliome:			any bibliome data on phenotypic features of the species.
  9 | % 						as structure with four fields:
 10 | % 						"bacteria" a cell array listing the name of the bacteria; "rxn" list the
 11 | % 						name of the reactions having bibliome; "value" a matrix of numbers: zero
 12 | % 						means no information, 1 means consumed, 2 means produced, -1 not-consumed
 13 | % 						and -2 means not-produced by the corresponding bacteria. "aerobIenfo" a
 14 | % 						cell array provides the info that the bacteria require oxygen for growth
 15 | % 						or not so specefiy with "aerobe" or "anaerobe" or "facultative".
 16 | %outputs: 
 17 | %	contextSpecificModel species-specific model	
 18 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 19 | if nargin<4
 20 |     bibliome={};
 21 | end
 22 | % Integrating the bibliome data into the model
 23 | if ~isempty(bibliome)
 24 |     if isfield(bibliome, 'value') && isfield(bibliome, 'rxn')
 25 |         %check all necessaries are provided
 26 |         if size(bibliome.value,2)>1 && ~isfield(bibliome, 'species')
 27 |             error('there are more than one column in bibliome.value and you didnt specify the name of species. please assign it by adding species field or do not dedicate bibliome as input of function')
 28 |         elseif size(bibliome.value,2)>1 && isfield(bibliome, 'species')
 29 |             % find the corresponding bibliome data for the species
 30 |             index=find(strcmp(bibliome.bacteria, bibliome.species));
 31 |             if ~isempty(index)
 32 |                 value=bibliome.value(:,index);
 33 |             else 
 34 |                 bibliome={};
 35 |             end
 36 |         elseif size(bibliome.value,2)==1
 37 |             value=bibliome.value;
 38 |         end
 39 | 
 40 |         if ~isempty(bibliome)
 41 |          model1=model;
 42 |         % find the exchange reactions and constrain the model for consumption 
 43 |         consumed=bibliome.rxn(find(value==1));
 44 |         model1.lb(find(ismember(model1.rxns,consumed)))=-1;
 45 |         model1.ub(find(ismember(model1.rxns,consumed)))=0;
 46 |         % find the exchange reactions and constrain the model for
 47 |         % production
 48 |         produced=bibliome.rxn(find(value==2));
 49 |         model1.lb(find(ismember(model1.rxns,produced)))=0.1;
 50 |         model1.ub(find(ismember(model1.rxns,produced)))=1000;
 51 |         % find the exchange reactions and constrain the model for
 52 |         % metabolites regarding the bibliome data
 53 |         notCon=bibliome.rxn(find(value==-1));
 54 |         model1.lb(find(ismember(model1.rxns,notCon)))=0;
 55 |         notPro=bibliome.rxn(find(value==-1));
 56 |         model1.ub(find(ismember(model1.rxns,notPro)))=0;
 57 |        % check the changes doesnt affect the functionality of the model
 58 |         g2=optimizeCbModel(model1);
 59 |         if g2.f>0
 60 |             disp('the bibliome info was added to the model')
 61 |             model=model1;
 62 |         else
 63 |             disp('the bibliome info could not be added to the model. some changes made the model nonfunctional')
 64 |         end
 65 |         end
 66 |     end
 67 | end
 68 | %check the species requires oxygen for growth
 69 | if ~isempty(bibliome) && isfield(bibliome, 'aerobeInfo')
 70 |     if size(bibliome.aerobeInfo,1)>1 && ~isfield(bibliome, 'species')
 71 |          error('there are more than one row in bibliome.aerobeInfo and you didnt specify the name of species. please assign it by adding species field or do not dedicate bibliome as input of function.')
 72 |     elseif size(bibliome.aerobeInfo,1)>1 && isfield(bibliome, 'species')
 73 |         index=find(strcmp(bibliome.bacteria, bibliome.species));
 74 |         obic=bibliome.aerobeInfo(index,1);
 75 |     else
 76 |         obic=bibliome.aerobeInfo;
 77 |     end
 78 | else
 79 |     obic={'none'};
 80 | end
 81 | 
 82 | %generate the specefic model
 83 | if strcmp(obic,'anaerobe')
 84 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0;
 85 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=0;
 86 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000;
 87 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0;
 88 | end
 89 | 
 90 | if ~ismember(obic,{'aerobe';'anaerobe';'facultative'})
 91 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0;
 92 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-10;
 93 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000;
 94 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0;
 95 | end
 96 | 
 97 | if strcmp(obic,'aerobe')
 98 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=-1;
 99 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-19.2;
100 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000;
101 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0.5;
102 | if metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3) < 1
103 | 	metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3)=1
104 | end
105 | end
106 | %get the closest taxonomy level
107 | s=threshold(find(threshold,1,'first'));
108 | if isempty(s)
109 | threshold=0;
110 | else
111 | threshold=s;
112 | end
113 | 
114 | if ~strcmp(obic,'facultative')
115 |     % collect the reactions with score from other levels of taxonomy and check
116 |     % the essentiality of the reactions.
117 |     rxn2remove= model.rxns(find(metagenomeset(:,3)<threshold & metagenomeset(:,3)>=0),1);
118 |     tissueModel= removeRxns(model,rxn2remove);
119 |     cc=optimizeCbModel(tissueModel);
120 |     matrix_General(1,1)=cc.f;
121 |     exp=metagenomeset(:,3);
122 |     TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1);
123 |     ddd=setdiff(TempModel.rxns,tissueModel.rxns);
124 |     ddd_general=ddd;
125 |     matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns));
126 |     metagenomeset1=metagenomeset;
127 |     for t=1:length(ddd)
128 |         metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1;
129 |     end
130 |     % remove the reactions with score from other levels of taxonomy
131 |     rxn2remove=model.rxns(find(metagenomeset1(:,3)<threshold & metagenomeset1(:,3)>=0),1) ;
132 |     tissueModel= removeRxns(model,rxn2remove);
133 | 
134 |     exp3=metagenomeset(:,3);
135 |     tissueModel1=tissueModel;
136 |     %remove deadEnd reactions that are not supported by metagenomics. 
137 |     [~,~, removedRxns] = removeDeadEnds(tissueModel1);
138 |     % exp3(:,2)=1;
139 |     % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0;
140 |     % a1=tissueModel1.rxns(find(exp3(:,1) <= 1));
141 |     % b1=tissueModel1.rxns(find(exp3(:,2) == 0));
142 |     % g1=intersect(a1,b1);
143 |     % tissueModel1= removeRxns(tissueModel,g1);
144 |     tissueModel1= removeRxns(tissueModel1,removedRxns);
145 |     fba=optimizeCbModel(tissueModel1);
146 | 
147 |     % prunnig the reactions without GPR
148 | 
149 |     exp4=zeros(length(tissueModel1.rxns),1);
150 |     for q=1:length(tissueModel1.rxns)
151 |         if	ismember(tissueModel1.rxns(q),model.rxns)
152 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3);
153 |         end
154 |     end
155 |     tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1);
156 | 
157 |     AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns);
158 | 
159 |     % get all the reaction with score 1 excluding the transport and exchange
160 |     % reactions
161 |     FromMetaGenomics=model.rxns(find(exp(:,1) >= 1));
162 |     indexEx=strfind(model.rxns,'Ex');
163 |     IndexEx = find(not(cellfun('isempty',indexEx)));
164 |     indexTr=strfind(model.rxns,'t_');
165 |     IndexTr = find(not(cellfun('isempty',indexTr)));
166 |     indexOfTrEx=union(IndexEx,IndexTr);
167 |     TrEx=model.rxns(indexOfTrEx);
168 |     FromMetaGenomics=setdiff(FromMetaGenomics,TrEx);
169 |     % get the dead end reactions with score 1 
170 |     tissueModelF5=tissueModel1;
171 |     index=1:50:length(AA2);
172 |     for h1=1:length(index)
173 |         if h1 ~= length(index)
174 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
175 |             d=AA2(index(h1):(index(h1)+49));
176 |             T1=tissueModelF5;
177 |             T1= removeRxns(T1,d);
178 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
179 |             removed=setdiff(removedRxns1,removedRxns);
180 |             hhh=intersect(removed,FromMetaGenomics);
181 |             if  length(hhh)<10
182 |                 tissueModelF5=T1;
183 |             else
184 |                 for h2=1:length(d)
185 |                     T1=tissueModelF5;
186 |                     T1= removeRxns(T1,d{h2});
187 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
188 |                     removed=setdiff(removedRxns1,removedRxns);
189 |                     hhh=intersect(removed,FromMetaGenomics);
190 |                     if  length(hhh)<10
191 |                         tissueModelF5=T1;
192 |                     end
193 |                 end
194 |             end
195 |         else
196 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
197 |             d=AA2(index(h1):end);
198 |             for h3=1:length(d)
199 |                     T1=tissueModelF5;
200 |                     T1= removeRxns(T1,d{h3});
201 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
202 |                     removed=setdiff(removedRxns1,removedRxns);
203 |                     hhh=intersect(removed,FromMetaGenomics);
204 |                     if  length(hhh)<10
205 |                         tissueModelF5=T1;
206 |                     end
207 |             end
208 |         end
209 |     end
210 | 
211 |     exp4=zeros(length(tissueModelF5.rxns),1);
212 |     for q=1:length(tissueModelF5.rxns)
213 |         if	ismember(tissueModelF5.rxns(q),model.rxns)
214 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3);
215 |         end
216 |     end
217 | 
218 |     AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1));
219 |     BB1=[];
220 |     for q=1:length(AA1)
221 |         if	ismember(AA1(q),model.rxns)
222 |             BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3);
223 |         else 
224 |             BB1(q,1)=1;
225 |         end
226 |     end
227 | 
228 | 
229 |     T=table(AA1,BB1);
230 |     Sort_Table = sortrows(T,'BB1');
231 |     AA2=table2cell(Sort_Table(:,1));
232 |     %prune the reactions if they are not lethal for the network and functionality of the model
233 |     fba=optimizeCbModel(tissueModelF5);
234 |     index=1:50:length(AA2);
235 |     for h1=1:length(index)
236 |         if h1 ~= length(index)
237 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
238 |             d=AA2(index(h1):(index(h1)+49));
239 |             T1=tissueModelF5;
240 |             T1= removeRxns(T1,d);
241 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
242 |             removed=setdiff(removedRxns1,removedRxns);
243 |             hhh=intersect(removed,FromMetaGenomics);
244 |             fbaa=optimizeCbModel(T1);
245 |             if  length(hhh)<10  && fbaa.f > fba.f*0.1
246 |                 tissueModelF5=T1;
247 |             else
248 |                 for h2=1:length(d)
249 |                     T1=tissueModelF5;
250 |                     T1= removeRxns(T1,d{h2});
251 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
252 |                     removed=setdiff(removedRxns1,removedRxns);
253 |                     hhh=intersect(removed,FromMetaGenomics);
254 |                     fbaa=optimizeCbModel(T1);
255 |                     if  length(hhh)<10  && fbaa.f > fba.f*0.1
256 |                         tissueModelF5=T1;
257 |                     end
258 |                 end
259 |             end
260 |         else
261 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
262 |             d=AA2(index(h1):end);
263 |             for h3=1:length(d)
264 |                     T1=tissueModelF5;
265 |                     T1= removeRxns(T1,d{h3});
266 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
267 |                     removed=setdiff(removedRxns1,removedRxns);
268 |                     hhh=intersect(removed,FromMetaGenomics);
269 |                     fbaa=optimizeCbModel(T1);
270 |                     if  length(hhh)<10 && fbaa.f > fba.f*0.1
271 |                         tissueModelF5=T1;
272 |                     end
273 |             end
274 |         end
275 |     end
276 | 
277 |     contextSpecificModel=tissueModelF5;
278 |     %matrix_General(1,3)=length(contextSpecificModel.rxns);
279 |     %matrix_General(1,4)=length(find(exp4>=1));
280 |     %matrix_General(1,5)=length(find(exp4<=0));
281 |     %matrix_General(1,6)=matrix_General(1,3)-(matrix_General(1,5)+matrix_General(1,4));
282 |     contextSpecificModel.lb(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=0;
283 |     contextSpecificModel.ub(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=1000;
284 |     if exist('produced','var')
285 |         contextSpecificModel.lb(find(ismember(contextSpecificModel.rxns,produced)))=0;
286 |         contextSpecificModel.ub(find(ismember(contextSpecificModel.rxns,produced)))=1000;
287 |     end
288 | end
289 | 
290 | if strcmp(obic,'facultative')
291 | 	model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0;
292 | 	model.lb(find(strcmp(model.rxns, 'Ex_O2')))=0;
293 | 	model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000;
294 | 	model.lb(find(strcmp(model.rxns, 'rxn08173')))=0;
295 |     % collect the reactions with score from other levels of taxonomy and check
296 |     % the essentiality of the reactions.
297 |     rxn2remove= model.rxns(find(metagenomeset(:,3)<threshold & metagenomeset(:,3)>=0),1);
298 |     tissueModel= removeRxns(model,rxn2remove);
299 |     cc=optimizeCbModel(tissueModel);
300 |     matrix_General(1,1)=cc.f;
301 |     exp=metagenomeset(:,3);
302 |     TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1);
303 |     ddd=setdiff(TempModel.rxns,tissueModel.rxns);
304 |     ddd_general=ddd;
305 |     matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns));
306 |     metagenomeset1=metagenomeset;
307 |     for t=1:length(ddd)
308 |         metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1;
309 |     end
310 |     % remove the reactions with score from other levels of taxonomy
311 |     rxn2remove=model.rxns(find(metagenomeset1(:,3)<threshold & metagenomeset1(:,3)>=0),1) ;
312 |     tissueModel= removeRxns(model,rxn2remove);
313 | 
314 |     exp3=metagenomeset(:,3);
315 |     tissueModel1=tissueModel;
316 |     %remove deadEnd reactions that are not supported by metagenomics. 
317 |     [~,~, removedRxns] = removeDeadEnds(tissueModel1);
318 |     % exp3(:,2)=1;
319 |     % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0;
320 |     % a1=tissueModel1.rxns(find(exp3(:,1) <= 1));
321 |     % b1=tissueModel1.rxns(find(exp3(:,2) == 0));
322 |     % g1=intersect(a1,b1);
323 |     % tissueModel1= removeRxns(tissueModel,g1);
324 |     tissueModel1= removeRxns(tissueModel1,removedRxns);
325 |     fba=optimizeCbModel(tissueModel1);
326 | 
327 |     % prunnig the reactions without GPR
328 | 
329 |     exp4=zeros(length(tissueModel1.rxns),1);
330 |     for q=1:length(tissueModel1.rxns)
331 |         if	ismember(tissueModel1.rxns(q),model.rxns)
332 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3);
333 |         end
334 |     end
335 |     tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1);
336 | 
337 |     AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns);
338 | 
339 |     % get all the reaction with score 1 excluding the transport and exchange
340 |     % reactions
341 |     FromMetaGenomics=model.rxns(find(exp(:,1) >= 1));
342 |     indexEx=strfind(model.rxns,'Ex');
343 |     IndexEx = find(not(cellfun('isempty',indexEx)));
344 |     indexTr=strfind(model.rxns,'t_');
345 |     IndexTr = find(not(cellfun('isempty',indexTr)));
346 |     indexOfTrEx=union(IndexEx,IndexTr);
347 |     TrEx=model.rxns(indexOfTrEx)
348 |     FromMetaGenomics=setdiff(FromMetaGenomics,TrEx);
349 |     % get the dead end reactions with score 1 
350 |     tissueModelF5=tissueModel1;
351 |     index=1:50:length(AA2);
352 |     for h1=1:length(index)
353 |         if h1 ~= length(index)
354 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
355 |             d=AA2(index(h1):(index(h1)+49));
356 |             T1=tissueModelF5;
357 |             T1= removeRxns(T1,d);
358 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
359 |             removed=setdiff(removedRxns1,removedRxns);
360 |             hhh=intersect(removed,FromMetaGenomics);
361 |             if  length(hhh)<10
362 |                 tissueModelF5=T1;
363 |             else
364 |                 for h2=1:length(d)
365 |                     T1=tissueModelF5;
366 |                     T1= removeRxns(T1,d{h2});
367 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
368 |                     removed=setdiff(removedRxns1,removedRxns);
369 |                     hhh=intersect(removed,FromMetaGenomics);
370 |                     if  length(hhh)<10
371 |                         tissueModelF5=T1;
372 |                     end
373 |                 end
374 |             end
375 |         else
376 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
377 |             d=AA2(index(h1):end);
378 |             for h3=1:length(d)
379 |                     T1=tissueModelF5;
380 |                     T1= removeRxns(T1,d{h3});
381 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
382 |                     removed=setdiff(removedRxns1,removedRxns);
383 |                     hhh=intersect(removed,FromMetaGenomics);
384 |                     if  length(hhh)<10
385 |                         tissueModelF5=T1;
386 |                     end
387 |             end
388 |         end
389 |     end
390 | 
391 |     exp4=zeros(length(tissueModelF5.rxns),1);
392 |     for q=1:length(tissueModelF5.rxns)
393 |         if	ismember(tissueModelF5.rxns(q),model.rxns)
394 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3);
395 |         end
396 |     end
397 | 
398 |     AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1));
399 |     BB1=[];
400 |     for q=1:length(AA1)
401 |         if	ismember(AA1(q),model.rxns)
402 |             BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3);
403 |         else 
404 |             BB1(q,1)=1;
405 |         end
406 |     end
407 | 
408 | 
409 |     T=table(AA1,BB1);
410 |     Sort_Table = sortrows(T,'BB1');
411 |     AA2=table2cell(Sort_Table(:,1));
412 |     %prune the reactions if they dont collapse the network and functionality of the model
413 |     fba=optimizeCbModel(tissueModelF5);
414 |     index=1:50:length(AA2);
415 |     for h1=1:length(index)
416 |         if h1 ~= length(index)
417 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
418 |             d=AA2(index(h1):(index(h1)+49));
419 |             T1=tissueModelF5;
420 |             T1= removeRxns(T1,d);
421 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
422 |             removed=setdiff(removedRxns1,removedRxns);
423 |             hhh=intersect(removed,FromMetaGenomics);
424 |             fbaa=optimizeCbModel(T1);
425 |             if  length(hhh)<10  && fbaa.f > fba.f*0.1
426 |                 tissueModelF5=T1;
427 |             else
428 |                 for h2=1:length(d)
429 |                     T1=tissueModelF5;
430 |                     T1= removeRxns(T1,d{h2});
431 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
432 |                     removed=setdiff(removedRxns1,removedRxns);
433 |                     hhh=intersect(removed,FromMetaGenomics);
434 |                     fbaa=optimizeCbModel(T1);
435 |                     if  length(hhh)<10  && fbaa.f > fba.f*0.1
436 |                         tissueModelF5=T1;
437 |                     end
438 |                 end
439 |             end
440 |         else
441 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
442 |             d=AA2(index(h1):end);
443 |             for h3=1:length(d)
444 |                     T1=tissueModelF5;
445 |                     T1= removeRxns(T1,d{h3});
446 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
447 |                     removed=setdiff(removedRxns1,removedRxns);
448 |                     hhh=intersect(removed,FromMetaGenomics);
449 |                     fbaa=optimizeCbModel(T1);
450 |                     if  length(hhh)<10 && fbaa.f > fba.f*0.1
451 |                         tissueModelF5=T1;
452 |                     end
453 |             end
454 |         end
455 |     end
456 | 
457 |     model_anaerobic=tissueModelF5;
458 |     %matrix_General(1,3)=length(model_anaerobic.rxns);
459 |     %matrix_General(1,4)=length(find(exp4>=1));
460 |     %matrix_General(1,5)=length(find(exp4<=0));
461 |     %matrix_General(1,6)=matrix_General(1,3)-(matrix_General(1,5)+matrix_General(1,4));
462 | 	model.ub(find(strcmp(model.rxns, 'Ex_O2')))=-1;
463 | 	model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-19.2;
464 | 	model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000;
465 | 	model.lb(find(strcmp(model.rxns, 'rxn08173')))=0.5;
466 | 	if metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3) < 1
467 | 	metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3)=1;
468 | 	end 
469 |     % collect the reactions with score from other levels of taxonomy and check
470 |     % the essentiality of the reactions.
471 |     rxn2remove= model.rxns(find(metagenomeset(:,3)<threshold & metagenomeset(:,3)>=0),1);
472 |     tissueModel= removeRxns(model,rxn2remove);
473 |     cc=optimizeCbModel(tissueModel);
474 |     matrix_General(1,1)=cc.f;
475 |     exp=metagenomeset(:,3);
476 |     TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1);
477 |     ddd=setdiff(TempModel.rxns,tissueModel.rxns);
478 |     ddd_general=ddd;
479 |     matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns));
480 |     metagenomeset1=metagenomeset;
481 |     for t=1:length(ddd)
482 |         metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1;
483 |     end
484 |     % remove the reactions with score from other levels of taxonomy
485 |     rxn2remove=model.rxns(find(metagenomeset1(:,3)<threshold & metagenomeset1(:,3)>=0),1) ;
486 |     tissueModel= removeRxns(model,rxn2remove);
487 | 
488 |     exp3=metagenomeset(:,3);
489 |     tissueModel1=tissueModel;
490 |     %remove deadEnd reactions that are not supported by metagenomics. 
491 |     [~,~, removedRxns] = removeDeadEnds(tissueModel1);
492 |     % exp3(:,2)=1;
493 |     % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0;
494 |     % a1=tissueModel1.rxns(find(exp3(:,1) <= 1));
495 |     % b1=tissueModel1.rxns(find(exp3(:,2) == 0));
496 |     % g1=intersect(a1,b1);
497 |     % tissueModel1= removeRxns(tissueModel,g1);
498 |     tissueModel1= removeRxns(tissueModel1,removedRxns);
499 |     fba=optimizeCbModel(tissueModel1);
500 | 
501 |     % prunnig the reactions without GPR
502 | 
503 |     exp4=zeros(length(tissueModel1.rxns),1);
504 |     for q=1:length(tissueModel1.rxns)
505 |         if	ismember(tissueModel1.rxns(q),model.rxns)
506 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3);
507 |         end
508 |     end
509 |     tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1);
510 | 
511 |     AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns);
512 | 
513 |     % get all the reaction with score 1 excluding the transport and exchange
514 |     % reactions
515 |     FromMetaGenomics=model.rxns(find(exp(:,1) >= 1));
516 |     indexEx=strfind(model.rxns,'Ex');
517 |     IndexEx = find(not(cellfun('isempty',indexEx)));
518 |     indexTr=strfind(model.rxns,'t_');
519 |     IndexTr = find(not(cellfun('isempty',indexTr)));
520 |     indexOfTrEx=union(IndexEx,IndexTr);
521 |     TrEx=model.rxns(indexOfTrEx)
522 |     FromMetaGenomics=setdiff(FromMetaGenomics,TrEx);
523 |     % get the dead end reactions with score 1 
524 |     tissueModelF5=tissueModel1
525 |     index=1:50:length(AA2);
526 |     for h1=1:length(index)
527 |         if h1 ~= length(index)
528 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
529 |             d=AA2(index(h1):(index(h1)+49));
530 |             T1=tissueModelF5;
531 |             T1= removeRxns(T1,d);
532 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
533 |             removed=setdiff(removedRxns1,removedRxns);
534 |             hhh=intersect(removed,FromMetaGenomics);
535 |             if  length(hhh)<10
536 |                 tissueModelF5=T1;
537 |             else
538 |                 for h2=1:length(d)
539 |                     T1=tissueModelF5;
540 |                     T1= removeRxns(T1,d{h2});
541 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
542 |                     removed=setdiff(removedRxns1,removedRxns);
543 |                     hhh=intersect(removed,FromMetaGenomics);
544 |                     if  length(hhh)<10
545 |                         tissueModelF5=T1;
546 |                     end
547 |                 end
548 |             end
549 |         else
550 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
551 |             d=AA2(index(h1):end);
552 |             for h3=1:length(d)
553 |                     T1=tissueModelF5;
554 |                     T1= removeRxns(T1,d{h3});
555 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
556 |                     removed=setdiff(removedRxns1,removedRxns);
557 |                     hhh=intersect(removed,FromMetaGenomics);
558 |                     if  length(hhh)<10
559 |                         tissueModelF5=T1;
560 |                     end
561 |             end
562 |         end
563 |     end
564 | 
565 |     exp4=zeros(length(tissueModelF5.rxns),1);
566 |     for q=1:length(tissueModelF5.rxns)
567 |         if	ismember(tissueModelF5.rxns(q),model.rxns)
568 |             exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3);
569 |         end
570 |     end
571 | 
572 |     AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1));
573 |     BB1=[];
574 |     for q=1:length(AA1)
575 |         if	ismember(AA1(q),model.rxns)
576 |             BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3);
577 |         else 
578 |             BB1(q,1)=1;
579 |         end
580 |     end
581 | 
582 | 
583 |     T=table(AA1,BB1);
584 |     Sort_Table = sortrows(T,'BB1');
585 |     AA2=table2cell(Sort_Table(:,1));
586 |     %prune the reactions if they dont collapse the network and functionality of the model
587 |     fba=optimizeCbModel(tissueModelF5);
588 |     index=1:50:length(AA2);
589 |     for h1=1:length(index)
590 |         if h1 ~= length(index)
591 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
592 |             d=AA2(index(h1):(index(h1)+49));
593 |             T1=tissueModelF5;
594 |             T1= removeRxns(T1,d);
595 |             [~, ~, removedRxns1] = removeDeadEnds(T1);
596 |             removed=setdiff(removedRxns1,removedRxns);
597 |             hhh=intersect(removed,FromMetaGenomics);
598 |             fbaa=optimizeCbModel(T1);
599 |             if  length(hhh)<10  && fbaa.f > fba.f*0.1
600 |                 tissueModelF5=T1;
601 |             else
602 |                 for h2=1:length(d)
603 |                     T1=tissueModelF5;
604 |                     T1= removeRxns(T1,d{h2});
605 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
606 |                     removed=setdiff(removedRxns1,removedRxns);
607 |                     hhh=intersect(removed,FromMetaGenomics);
608 |                     fbaa=optimizeCbModel(T1);
609 |                     if  length(hhh)<10  && fbaa.f > fba.f*0.1
610 |                         tissueModelF5=T1;
611 |                     end
612 |                 end
613 |             end
614 |         else
615 |             [~, ~, removedRxns] = removeDeadEnds(tissueModelF5);
616 |             d=AA2(index(h1):end);
617 |             for h3=1:length(d)
618 |                     T1=tissueModelF5;
619 |                     T1= removeRxns(T1,d{h3});
620 |                     [~, ~, removedRxns1] = removeDeadEnds(T1);
621 |                     removed=setdiff(removedRxns1,removedRxns);
622 |                     hhh=intersect(removed,FromMetaGenomics);
623 |                     fbaa=optimizeCbModel(T1);
624 |                     if  length(hhh)<10 && fbaa.f > fba.f*0.1
625 |                         tissueModelF5=T1;
626 |                     end
627 |             end
628 |         end
629 |     end
630 | 
631 |     model_aerobe=tissueModelF5;
632 | 	
633 | 	reactionInModel=union(model_anaerobic.rxns,model_aerobe.rxns);
634 | 	removeRxnsF=setdiff(model.rxns,reactionInModel);
635 | 	contextSpecificModel= removeRxns(model,removeRxnsF)
636 | 	contextSpecificModel.lb(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=0;
637 |     contextSpecificModel.ub(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=1000;
638 |     if exist('produced','var')
639 |         contextSpecificModel.lb(find(ismember(contextSpecificModel.rxns,produced)))=0;
640 |         contextSpecificModel.ub(find(ismember(contextSpecificModel.rxns,produced)))=1000;
641 |     end
642 | end
643 |     
644 | end
645 | 
646 | function reducedModelTemp = KeepNecessaryRxns(model, score, threshold, min)
647 | [tempModel,~,IndexRev2irrev,IndexIrrev2rev] = convertToIrreversible(model);
648 | expressionRxnsIrrev = zeros(length(tempModel.rxns),1);
649 | for i1=1:length(tempModel.rxns)
650 |     expressionRxnsIrrev(i1,1) = score(IndexIrrev2rev(i1,1),1);
651 | end
652 | expressionRxnsIrrev(find(expressionRxnsIrrev==-1))=0;
653 | cc=optimizeCbModel(model);
654 | tempModel.lb(find(tempModel.c ==1),1)=cc.f*min; % minimum fraction of objective 
655 | tempModel.c(:,1)=0;
656 | for i1=1:length(tempModel.rxns)
657 |     if (expressionRxnsIrrev(i1,1) < threshold)
658 |         tempModel.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight
659 |     end
660 | end
661 | gimmeSolution = optimizeCbModel(tempModel,'min');
662 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1);
663 | if (gimmeSolution.stat ~= 1)
664 |     reactionScoreTransition(:,1) = 0;
665 | end
666 | reactionScoreTransition(find(gimmeSolution.x>0),1)=1;
667 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1;
668 | %Translate reactionActivity to reversible model
669 | reactionScoreRev = zeros(length(model.rxns),1);
670 | for i=1:length(model.rxns)
671 |     temp1=IndexRev2irrev{i,1}';
672 |     for j=1:length(temp1)
673 |         if reactionScoreTransition(temp1(j)) > 0
674 |             reactionScoreRev(i,1) = reactionScoreTransition(temp1(j));
675 |         end
676 |     end
677 | end
678 | rxn2remove = model.rxns(reactionScoreRev == 0);
679 | reducedModelTemp = removeRxns(model,rxn2remove); 
680 | end
681 | function reducedModelTemp = KeepNecessaryRxnsLikeGIMME(model, score, threshold, min)
682 | [modelIrrev,~,rev2irrev,irrev2rev] = convertToIrreversible(model);
683 | expressionRxnsIrrev = zeros(length(modelIrrev.rxns),1);
684 | for i1=1:length(modelIrrev.rxns)
685 |     expressionRxnsIrrev(i1,1) = score(irrev2rev(i1,1),1);
686 | end
687 | cc=optimizeCbModel(model);
688 | modelIrrev.lb(find(modelIrrev.c ==1),1)=cc.f*min; % minimum fraction of objective 
689 | modelIrrev.c(:,1)=0;
690 | for i1=1:length(modelIrrev.rxns)
691 |     if (expressionRxnsIrrev(i1,1) > -1)   %if not absent reaction
692 |         if (expressionRxnsIrrev(i1,1) < threshold)
693 |             modelIrrev.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight
694 |         end
695 |     end
696 | end
697 | gimmeSolution = optimizeCbModel(modelIrrev,'min');
698 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1);
699 | if (gimmeSolution.stat ~= 1)
700 |     reactionScoreTransition(:,1) = 0;
701 | else
702 |     reactionScoreTransition(find(gimmeSolution.x>0),1)=1;
703 | end
704 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1;
705 | 
706 |     %Translate reactionActivity to reversible model
707 |     reactionActivity = zeros(nRxns,1);
708 |     for i=1:nRxns
709 |         for j=1:size(rev2irrev{i,1},2)
710 |             if (reactionScoreTransition(rev2irrev{i,1}(1,j)) > reactionActivity(i,1))
711 |                 reactionActivity(i,1) = reactionScoreTransition(rev2irrev{i,1}(1,j));
712 |             end
713 |         end
714 |     end 
715 |     remove = model.rxns(reactionActivity == 0);
716 |     reducedModelTemp = removeRxns(model,remove); 
717 | end


--------------------------------------------------------------------------------
/Functions/contextSpecificModelTune.m:
--------------------------------------------------------------------------------
  1 | function [modelTuned,modelInfo]= contextSpecificModelTune(model,MSPInformation,reactionScore,threshold,modelseed)
  2 | % this function tune the species-specefic model and gather the model information.
  3 | %inputs:
  4 | %	model:				species-specefic model
  5 | %   MSPInformation:     a structure includes:
  6 | %						taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each
  7 | % 						species. taxoGroup: taxonomy group fot bacteria. rxns, the reaction
  8 | % 						name in reference model. bacteria, a list of MSP IDs. BacteriaNames,
  9 | %						list of species names. species, names of the species.
 10 | % 						RxnStateAll, the reaction state (absent/present) for each bacteria
 11 | %   reactionScore:      a matrix includes 3 different scores for each reaction.
 12 | %	threshold:			includes a threshold for each specified taxonomy level.	
 13 | %	modelseed			true or false
 14 | %                       
 15 | % output:
 16 | %   modelTuned:     species-specefic model
 17 | %	modelInfo: 		provides the gap filling percentage at different levels: i.e. taxonomy 
 18 | %					proximity based, further taxonomy level, not annotated (without
 19 | %					gene-protein-reaction relation) and the total gap filling percentage
 20 | 
 21 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 22 | 
 23 | 
 24 | %find metabolites of exchange reactions
 25 | exchangeMets=GetExchangeMetabolite(model)
 26 | %remove dead end exchange reactions 
 27 | if ~isempty(exchangeMets)
 28 |     for hh1=1:size(exchangeMets,1)
 29 |         expression = exchangeMets{hh1};
 30 |         matchStr = regexp(model.mets,expression,'match');
 31 |         Index = find(~(cellfun('isempty',matchStr)));
 32 |         len=find(any(model.S(Index,:),1))';
 33 |         if length(len) <= 2
 34 |             model=removeRxns(model,model.rxns(len));
 35 |         end
 36 |     end
 37 | end
 38 | 
 39 | model.description='by MIGRENE toolbox';
 40 | if ~isempty(MSPInformation.BacteriaNames)
 41 |     index2=find(ismember(MSPInformation.bacteria,MSPInformation.species))
 42 |     model.modelName=MSPInformation.BacteriaNames{index2,1};  
 43 | end
 44 | model.modelID=MSPInformation.species; 
 45 | model.compNames={'Extracellular';'Cytosol';'ExtracellularForElectronTransportChain';'boundary'};
 46 | model.comps={'C_e';'C_c';'C_pe';'e'};
 47 | 
 48 | if modelseed  
 49 |     MIGDIR = fileparts(which('MIGRENE_pipeline'));
 50 |     METPATH=[MIGDIR filesep 'mat' filesep 'MetInformation.mat'];
 51 |     load(METPATH)
 52 |     for w1 =1:size(MetInformation,1)
 53 |         if ismember(MetInformation{w1,1},model.mets)
 54 |             model.metNames(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,3);
 55 |             model.metKEGG(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,4);
 56 |             model.metFormulas(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,5); 
 57 |         end
 58 |     end
 59 | end
 60 | modelTuned=model
 61 | % Assign an empty table to gather all the info of the model as below
 62 | modelInfo = array2table(zeros(1,8))
 63 | modelInfo.Properties.VariableNames = {'number_of_rxns',...
 64 |     'number_of_rxns_without_transport_rxns','number_of_rxns_with_genes','level_of_gapfilling','percentage_of_gapfillig'...
 65 |     'gapfillig_at_the_level','gapfillig_at_other_level',...
 66 |     'gapfillig_at_nonAnnotatedRxns'};
 67 | modelInfo.number_of_rxns=length(model.rxns);
 68 | indexEx=strfind(model.rxns,'Ex');
 69 | IndexEx = find(cellfun('isempty',indexEx));
 70 | indexTr=strfind(model.rxns,'t_');
 71 | IndexTr = find(cellfun('isempty',indexTr));
 72 | indexOfTrExRxns=intersect(IndexEx,IndexTr);
 73 | modelInfo.number_of_rxns_without_transport_rxns=length(indexOfTrExRxns);
 74 | %get the level of gapfilling
 75 | UsedThreshold=threshold(find(threshold,1,'first'));
 76 | if isempty(UsedThreshold)
 77 | UsedThreshold1=0;
 78 | UsedThreshold=0;
 79 | else
 80 | UsedThreshold1=find(threshold,1,'first');
 81 | end
 82 | %
 83 | if UsedThreshold1 == 0
 84 |     level='not classified';
 85 | else
 86 |     level=MSPInformation.taxoLevel{UsedThreshold1};
 87 | end
 88 | 
 89 | modelInfo.level_of_gapfilling=level;
 90 | rxns=model.rxns(indexOfTrExRxns)
 91 | g=find(ismember(MSPInformation.rxns,rxns));
 92 | metagenomesetMSP=reactionScore(g,3);
 93 | % number of reactions with corresponding genes in the species
 94 | modelInfo.number_of_rxns_with_genes=length(find(metagenomesetMSP>=1)); 
 95 | % gapfilling info
 96 | % percentage of gapfilling
 97 | modelInfo.percentage_of_gapfillig=(1-length(find(metagenomesetMSP>=1))/...
 98 |     length(indexOfTrExRxns))*100;
 99 | % percentage of number of reactions added by gap filling at the lowest classified level
100 | modelInfo.gapfillig_at_the_level=length(find(metagenomesetMSP>=UsedThreshold...
101 |     & metagenomesetMSP <1))/length(indexOfTrExRxns)*100;
102 | % percentage of number of reactions added by gap filling at the other level 
103 | modelInfo.gapfillig_at_other_level=length(find(metagenomesetMSP>=0 ...
104 |     & metagenomesetMSP < UsedThreshold))/length(indexOfTrExRxns)*100; 
105 | % percentage of number of reactions added by gap filling without any info in the catalog 
106 | modelInfo.gapfillig_at_nonAnnotatedRxns=length(find(metagenomesetMSP==-1))...
107 |     /length(indexOfTrExRxns)*100;
108 | end
109 | 
110 | function exchangeMets=GetExchangeMetabolite(model)
111 | indexEx=strfind(model.rxns,'Ex_');
112 | IndexEx = find(not(cellfun('isempty',indexEx)));
113 | S=model.S(:,IndexEx);
114 | exchangeMets=model.mets(find(any(S,2)));
115 | exchangeMets=strrep(exchangeMets,'ee[e]','');
116 | exchangeMets=strrep(exchangeMets,'[e]','');
117 | exchangeMets=strrep(exchangeMets,'e[e]','');
118 | end


--------------------------------------------------------------------------------
/Functions/convertCatalogAnnotation.m:
--------------------------------------------------------------------------------
 1 | function [catalogFileConverted]= convertCatalogAnnotation(inputFile,mapping,numWorkers)
 2 | % this function automatically download and convert KO to KEGG reaction IDs in catalog.
 3 | %inputs:
 4 | %   inputFile:             cell array contains two columns, first column is gene name and 
 5 | %                          second column provides KO annotation
 6 | % OPTIONAL INPUTS:
 7 | %   mapping:               either cell array contains KOs (column 1) and their corresponding kegg 
 8 | %                          reaction ID (column 2) or empty cell array
 9 | 
10 | %outputs: 
11 | %   catalogFileConverted:   cell array contains two columns, first column is gene name and 
12 | %                           second column provides kegg reaction annotation
13 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
14 | 
15 | % check the number of workers for parallelization 
16 | if numWorkers > 1
17 |     poolobj = gcp('nocreate');
18 |     if isempty(poolobj)
19 |         parpool(numWorkers);
20 |     end
21 | else
22 |     disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers')
23 | end
24 | % if the mapping file for converting KO to reaction ID is not provided, it
25 | % automatically download it from KEGG API. 
26 | if isempty(mapping)
27 | %   get path to where the MIGRENE Toolbox is located
28 |     MIG = fileparts(which('MIGRENE_pipeline'));
29 |     DATADIR=[MIG filesep 'data'];
30 | %   save ko2rn file in the directory "data" of MIGRENE Toolbox
31 |     urlwrite('http://rest.kegg.jp/link/reaction/ko',[DATADIR filesep 'ko2rn.txt']);
32 |     Transition1 = readtable([DATADIR filesep 'ko2rn.txt'],'Format','%s%s','ReadVariableNames', false);
33 |     ko2rn=table2cell(Transition1) ;
34 | else
35 |     ko2rn=mapping;
36 | end
37 | 
38 | %remove prefix ko: and rn: from ko2rn if present 
39 | ko2rn(:,1)=strrep(ko2rn(:,1),'ko:','');
40 | ko2rn(:,2)=strrep(ko2rn(:,2),'rn:','');
41 | 
42 | %group ko2rn by KO so that the rn ID for the same KO were summerized. 
43 | [~,~,ind]=unique(ko2rn(:,1));
44 | uni=unique(ind);
45 | 
46 | File={};
47 | for w=1:numel(uni)
48 |     Index=find(ind == uni(w));
49 |     transition2=unique(ko2rn(Index,2));
50 |     str =strjoin(transition2,',');
51 |     File(w,1)=unique(ko2rn(Index,1));
52 |     File{w,2}=str;
53 | end
54 | inputFile1=inputFile;
55 | for w=1:size(inputFile,1)
56 |     transition1=inputFile{w,2};
57 |     connection=File(find(strcmp(File(:,1),transition1)),2);
58 |     if ~isempty(connection)
59 |     inputFile1(w,2)=connection;
60 |     end
61 | end
62 | 
63 | expression = 'K\d\d\d\d\d';
64 | matchStr = regexp(inputFile1(:,2),expression,'match');
65 | Index = find(cellfun('isempty',matchStr));
66 | catalogFileConverted=inputFile1(Index,:);
67 | end
68 | 
69 | 


--------------------------------------------------------------------------------
/Functions/fakeModelGenerator.m:
--------------------------------------------------------------------------------
 1 | function [fakemodel, fakemodel1]=fakeModelGenerator(mets_art,Ex_art)
 2 | %inputs:
 3 | %   mets_art: 			list of exchange metabolites and suffix of the compartments
 4 | %   Ex_art:				list of exchange reactions and suffix of the compartments
 5 | %outputs:
 6 | %   fakemodel: 			a metabolic model
 7 | %   fakemodel1: 		a metabolic model
 8 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 9 | fakemodel.mets={mets_art{:,2} mets_art{:,3} mets_art{:,4}}';
10 | fakemodel.rxns={Ex_art{:,2} Ex_art{:,3} Ex_art{:,4} Ex_art{:,5}}';
11 | fakemodel.S=zeros(length(fakemodel.mets),length(fakemodel.rxns));
12 | fakemodel.lb=zeros(length(fakemodel.rxns),1);
13 | fakemodel.ub=zeros(length(fakemodel.rxns),1);
14 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,2))))=-1000;
15 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,2))))=0;
16 | 
17 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,3))),1)=0;
18 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,3))),1)=1000;
19 | 
20 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,4))),1)=0;
21 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,4))),1)=100000;
22 | 
23 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,5))),1)=0;
24 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,5))),1)=100000;
25 | 
26 | for w1=1:size(mets_art,1)
27 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,3})),find(strcmp(fakemodel.rxns,Ex_art{w1,2})))=-1;
28 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,3})),find(strcmp(fakemodel.rxns,Ex_art{w1,3})))=-1;
29 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,2})),find(strcmp(fakemodel.rxns,Ex_art{w1,3})))=1;
30 |     
31 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,2})),find(strcmp(fakemodel.rxns,Ex_art{w1,4})))=-1;
32 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,4})),find(strcmp(fakemodel.rxns,Ex_art{w1,4})))=1;
33 |     
34 |     fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,4})),find(strcmp(fakemodel.rxns,Ex_art{w1,5})))=-1;
35 | end
36 | 
37 | fakemodel1.mets={mets_art{:,1} mets_art{:,2}}';
38 | fakemodel1.rxns=Ex_art(:,1);
39 | fakemodel1.S=zeros(length(fakemodel1.mets),length(fakemodel1.rxns));
40 | fakemodel1.lb=zeros(length(fakemodel1.rxns),1);
41 | fakemodel1.ub=zeros(length(fakemodel1.rxns),1);
42 | 
43 | fakemodel1.lb(find(ismember(fakemodel1.rxns,Ex_art(:,1))),1)=-1000;
44 | fakemodel1.ub(find(ismember(fakemodel1.rxns,Ex_art(:,1))),1)=1000;
45 | 
46 | for w1=1:size(mets_art,1)  
47 |     fakemodel1.S(find(strcmp(fakemodel1.mets,mets_art{w1,2})),find(strcmp(fakemodel1.rxns,Ex_art{w1,1})))=1;
48 |     fakemodel1.S(find(strcmp(fakemodel1.mets,mets_art{w1,1})),find(strcmp(fakemodel1.rxns,Ex_art{w1,1})))=-1;
49 | end
50 | end
51 | 
52 | 


--------------------------------------------------------------------------------
/Functions/microbiomeGEMgeneration.m:
--------------------------------------------------------------------------------
  1 | function [microbiomeGEM]= microbiomeGEMgeneration(genericModel,cataloginfo,annotationFile,numWorkers)
  2 | % this function automatically download and convert KO to KEGG reaction IDs in your catalog.
  3 | %inputs:
  4 | %   genericModel:        metabolic Model with COBRA or RAVEN format, 
  5 | %   cataloginfo:          cell array contains two columns, first column is gene name and 
  6 | %                        second column provides kegg reaction ID
  7 | % OPTIONAL INPUTS:
  8 | %   annotationFile:       either cell array contains reaction IDs of the model(column 1),
  9 | %                         and column(s) for KO, EC and kegg RN annotations or empty cell array
 10 | %   numWorkers:           integer indicating the number of cores to use for parallelization
 11 | 
 12 | %outputs: 
 13 | %   microbiomeGEM:   cell array contains two columns, first column is gene name and 
 14 | %                           second column provides kegg reaction anntation
 15 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 16 | 
 17 | if nargin<4
 18 |     numWorkers=1;
 19 | end
 20 | % get path to where the MIGRENE Toolbox is located
 21 | MIG = fileparts(which('MIGRENE_pipeline'));
 22 | DATADIR=[MIG filesep 'data'];
 23 | % check the number of workers for parallelization 
 24 | if numWorkers > 1
 25 |     poolobj = gcp('nocreate');
 26 |     if isempty(poolobj)
 27 |         parpool(numWorkers);
 28 |     end
 29 | else
 30 |     disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers')
 31 | end
 32 | %% 
 33 | % if annotationFile is not provided, it automatically finds the annotations in the model. 
 34 | if isempty(annotationFile)
 35 |     disp('collecting the annotations from the model')
 36 |     %find fields including KO, rn or EC IDs
 37 |     KOexpression = 'K\d\d\d\d\d'; KOs={};
 38 |     RNexpression = 'R\d\d\d\d\d'; RNs={};
 39 |     ECexpression = 'EC:\d.\d.'; ECs={};
 40 |     fnames = fieldnames(genericModel);
 41 |     for w=1:numel(fnames)
 42 |         transition=strcat('genericModel.',fnames(w));
 43 |         transition=eval(char(transition));
 44 |         if iscellstr(transition)
 45 |             % find fields providing KO annotation for the reactions
 46 |             matchStr = regexp(transition,KOexpression,'match');
 47 |             Index = find(not(cellfun('isempty',matchStr)));
 48 |             if length(Index)/length(genericModel.rxns)*100>1
 49 |                 KOs= vertcat(KOs,fnames(w));
 50 |             end
 51 |             % find fields providing kegg reaction IDs annotation for the reactions
 52 |             matchStr = regexp(transition,RNexpression,'match');
 53 |             Index = find(not(cellfun('isempty',matchStr)));
 54 |             if length(Index)/length(genericModel.rxns)*100>1
 55 |                 RNs= vertcat(RNs,fnames(w));
 56 |             end
 57 |             % find fields providing EC annotation for the reactions
 58 |             matchStr = regexp(transition,ECexpression,'match');
 59 |             Index = find(not(cellfun('isempty',matchStr)));
 60 |             if length(Index)/length(genericModel.rxns)*100>1
 61 |                 ECs= vertcat(ECs,fnames(w));
 62 |             end
 63 |         end
 64 |     end
 65 | else
 66 |    %find fields including KO, rn or EC IDs in provided annotationFile
 67 |    disp('collecting the annotations from the annotationFile')
 68 |    disp('as annotationFile is provided, the function ignores the available annotations in the model.')
 69 |    disp('please make sure annotationFile follows the following format:')
 70 |    disp('cell array contains reaction IDs of the model(column 1),and column(s) for KO, EC and kegg RN annotations')
 71 |     KOexpression = 'K\d\d\d\d\d'; KOs=[];
 72 |     RNexpression = 'R\d\d\d\d\d'; RNs=[];
 73 |     ECexpression = 'EC:\d.\d.'; ECs=[];
 74 |     numberOfannotation=size(annotationFile,2);
 75 |     for w=2:numberOfannotation
 76 |         transition=annotationFile(:,w);
 77 |         if iscellstr(transition)
 78 |             % find fields providing KO annotation for the reactions
 79 |             matchStr = regexp(transition,KOexpression,'match');
 80 |             Index = find(not(cellfun('isempty',matchStr)));
 81 |             if length(Index)/length(genericModel.rxns)*100>1
 82 |                 KOs= [KOs w];
 83 |             end
 84 |             % find fields providing kegg reaction IDs annotation for the reactions
 85 |             matchStr = regexp(transition,RNexpression,'match');
 86 |             Index = find(not(cellfun('isempty',matchStr)));
 87 |             if length(Index)/length(genericModel.rxns)*100>1
 88 |                 RNs=  [RNs w];
 89 |             end
 90 |             % find fields providing EC annotation for the reactions
 91 |             matchStr = regexp(transition,ECexpression,'match');
 92 |             Index = find(not(cellfun('isempty',matchStr)));
 93 |             if length(Index)/length(genericModel.rxns)*100>1
 94 |                 ECs=  [ECs w];
 95 |             end
 96 |         end
 97 |     end
 98 | end
 99 | 
100 | if length(RNs)>1 && isempty(annotationFile)
101 |     error('there are more than one field in the metabolic model including kegg reaction IDs. It must be one cell array with kegg reaction ID') 
102 | end
103 | 
104 | if length(ECs)>1 && isempty(annotationFile)
105 |     error('there are more than one field in the metabolic model including EC annotation. It must be one cell array with EC annotation') 
106 | end
107 | 
108 | if length(KOs)>1 && isempty(annotationFile)
109 |     error('there are more than one field in the metabolic model including KO annotation. It must be one cell array with KO annotation') 
110 | end
111 | 
112 | if length(RNs)>1 && ~isempty(annotationFile)
113 |     error('there are more than one column in annotationFile including kegg reaction IDs. It must be one cell array with kegg reaction ID') 
114 | end
115 | 
116 | if length(ECs)>1 && ~isempty(annotationFile)
117 |     error('there are more than one column in annotationFile including EC annotation. It must be one cell array with EC annotation') 
118 | end
119 | 
120 | if length(KOs)>1 && ~isempty(annotationFile)
121 |     error('there are more than one column in annotationFile including KO annotation. It must be one cell array with KO annotation') 
122 | end
123 | 
124 | if isempty(KOs)
125 |     if isempty(RNs)
126 |         if isempty(ECs)
127 |             error('there are not any info in the metabolic model or annotationFile for KO,EC or kegg reaction ID annotation. It must be at least one cell array with one of the annotations') 
128 |         else
129 |             disp('EC annotation was found for integration of genes in catalog into model')
130 |         end
131 |     else
132 |         if isempty(ECs)
133 |             disp('kegg reaction annotation was found for integration of genes in catalog into model')
134 |         else
135 |             disp('kegg reaction and EC annotation were found for integration of genes in catalog into model')
136 |         end
137 |     end
138 | else
139 |      if isempty(RNs)
140 |         if isempty(ECs)
141 |             disp('KO annotation was found for integration of genes in catalog into model') 
142 |         else
143 |             disp('EC and KO annotations were found for integration of genes in catalog into model')
144 |         end
145 |     else
146 |         if isempty(ECs)
147 |             disp('KO and kegg reaction annotations were found for integration of genes in catalog into model')
148 |         else
149 |             disp('KO, kegg reaction and EC annotation were found for integration of genes in catalog into model')
150 |         end
151 |     end
152 | end
153 | 
154 | % make the model ready for integration
155 | %collect all the annotations in the model in one temporary field
156 | genericModel.temporary=cell([numel(genericModel.rxns) 1]);
157 | if ~isempty(RNs) && isempty(annotationFile)
158 |     transition=strcat('genericModel.',RNs(1));
159 |     genericModel.temporary=eval(char(transition));
160 | elseif ~isempty(RNs) && ~isempty(annotationFile)
161 |     for q=1:numel(genericModel.rxns)
162 |       if ismember(genericModel.rxns{q},annotationFile(:,1))      
163 |         genericModel.temporary(q,1)=annotationFile(find(strcmp(model.rxns,genericModel.rxns{q})),RNs);
164 |       end
165 |     end
166 | end
167 | 
168 | if ~isempty(ECs)
169 |     urlwrite('http://rest.kegg.jp/link/reaction/enzyme',[DATADIR filesep 'ec2rn.txt']);
170 |     Transition1 = readtable([DATADIR filesep 'ec2rn.txt'],'Format','%s%s','ReadVariableNames', false);
171 |     ec2rn=table2cell(Transition1);
172 | end
173 | if ~isempty(KOs)
174 |     urlwrite('http://rest.kegg.jp/link/reaction/ko',[DATADIR filesep 'ko2rn.txt']);
175 |     Transition1 = readtable([DATADIR filesep 'ko2rn.txt'],'Format','%s%s','ReadVariableNames', false);
176 |     ko2rn=table2cell(Transition1) ;
177 | end
178 | check=who;
179 | 
180 | if ismember('ko2rn',check)
181 |     ko2rn(:,1)=strrep(ko2rn(:,1),'ko:','');
182 |     ko2rn(:,2)=strrep(ko2rn(:,2),'rn:','');
183 |     %find not annotated reactions in the temporary file and fill in regarding available KO
184 |     index=strfind(genericModel.temporary,'R');
185 |     Index = find(cellfun('isempty',index));
186 |     if isempty(annotationFile)
187 |         transition=strcat('genericModel.',KOs(1));
188 |         koInfo=eval(char(transition));
189 |         koInfo=strrep(koInfo,' ',',');
190 |         koInfo=strrep(koInfo,';',',');
191 |         for qq=1:length(Index)     
192 |             ec=char(koInfo(Index(qq)));
193 |             if ~isempty(ec)
194 |                 tra1=strsplit(ec,',')';
195 |                 expression = 'K\d\d\d\d\d';
196 |                 matchStr = regexp(tra1,expression,'match');
197 |                 tra1=tra1(find(not(cellfun('isempty',matchStr))));
198 |                 anno={};
199 |                 if ~isempty(tra1)
200 |                     for ee=1:length(tra1)
201 |                          if ismember(tra1{ee},ec2rn(:,1))
202 |                              % get all the kegg reaction IDs related for the KO
203 |                              dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2);
204 |                              anno=vertcat(anno,dd);
205 |                          end
206 |                     end
207 |                     genericModel.temporary{Index(qq),1}=strjoin(unique(anno),',');
208 |                 end
209 |             end
210 |         end
211 |     else
212 |         koInfo=annotationFile(:,KOs);
213 |         koInfo=strrep(koInfo,' ',',');
214 |         koInfo=strrep(koInfo,';',',');
215 |         for qq=1:length(Index)
216 | 			tran1=koInfo(find(strcmp(annotationFile(:,1),genericModel.rxns(Index(qq)))));
217 |             ec=char(tran1);
218 |             if ~isempty(ec)
219 |                 tra1=strsplit(ec,',')';
220 |                 expression = 'K\d\d\d\d\d';
221 |                 matchStr = regexp(tra1,expression,'match');
222 |                 tra1=tra1(find(not(cellfun('isempty',matchStr))));
223 |                 if ~isempty(tra1)
224 |                     anno={};
225 |                     for ee=1:length(tra1)
226 |                          if ismember(tra1{ee},ec2rn(:,1))
227 |                              dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2);
228 |                              anno=vertcat(anno,dd);
229 |                          end
230 |                     end
231 |                     genericModel.temporary{Index(qq),1}=strjoin(unique(anno),',');
232 |                 end
233 |             end
234 |         end 
235 |     end
236 | end
237 | 
238 |    
239 | if ismember('ec2rn',check)
240 |     %remove prefix rn: from ko2rn if present 
241 |     ec2rn(:,2)=strrep(ec2rn(:,2),'rn:','');
242 |     %find not annotated reactions in the temporary file and fill in regarding available EC
243 |     index=strfind(genericModel.temporary,'R');
244 |     Index = find(cellfun('isempty',index));
245 |     if isempty(annotationFile)
246 |         transition=strcat('genericModel.',ECs(1));
247 |         ecInfo=eval(char(transition));
248 |         ecInfo=strrep(ecInfo,' ',',');
249 |         ecInfo=strrep(ecInfo,';',',');
250 |         ecInfo=strrep(ecInfo,'EC','ec');
251 |         for qq=1:length(Index)     
252 |             ec=char(ecInfo(Index(qq)));
253 |             if ~isempty(ec)
254 |                 tra1=strsplit(ec,',')';
255 |                 expression = 'ec:\d.\d.';
256 |                 matchStr = regexp(tra1,expression,'match');
257 |                 tra1=tra1(find(not(cellfun('isempty',matchStr))));
258 |                 anno={};
259 |                 if ~isempty(tra1)
260 |                     for ee=1:length(tra1)
261 |                          if ismember(tra1{ee},ec2rn(:,1))
262 |                              % get all the kegg reaction IDs related for the ec
263 |                              dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2);
264 |                              anno=vertcat(anno,dd);
265 |                          end
266 |                     end
267 |                     genericModel.temporary{Index(qq),1}=strjoin(unique(anno),',');
268 |                 end
269 |             end
270 |         end
271 |     else
272 |         ecInfo=annotationFile(:,ECs);
273 |         ecInfo=strrep(ecInfo,' ',',');
274 |         ecInfo=strrep(ecInfo,';',',');
275 |         ecInfo=strrep(ecInfo,'EC','ec');
276 |         for qq=1:length(Index)
277 | 			tran1=ecInfo(find(strcmp(annotationFile(:,1),genericModel.rxns(Index(qq)))));
278 |             ec=char(tran1);
279 |             if ~isempty(ec)
280 |                 tra1=strsplit(ec,',')';
281 |                 expression = 'ec:\d.\d.';
282 |                 matchStr = regexp(tra1,expression,'match');
283 |                 tra1=tra1(find(not(cellfun('isempty',matchStr))));
284 |                 if ~isempty(tra1)
285 |                     anno={};
286 |                     for ee=1:length(tra1)
287 |                          if ismember(tra1{ee},ec2rn(:,1))
288 |                              dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2);
289 |                              anno=vertcat(anno,dd);
290 |                          end
291 |                     end
292 |                     genericModel.temporary{Index(qq),1}=strjoin(unique(anno),',');
293 |                 end
294 |             end
295 |         end 
296 |     end
297 | end
298 | 
299 | %% summerize the catalog based on kegg reaction IDs
300 | disp('start getting the catalog info for integration')
301 | cataloginfoLong=ConvertTOLongFormat(cataloginfo,numWorkers);
302 | [~,~,ind]=unique(cataloginfoLong(:,2));
303 | uni=unique(ind);
304 | catalogForInteg={};
305 | for w=1:numel(uni)
306 |     Index=find(ind == uni(w));
307 |     transition2=unique(cataloginfoLong(Index,1));
308 |     str =strjoin(transition2,',');
309 |     catalogForInteg(w,1)=unique(cataloginfoLong(Index,2));
310 |     catalogForInteg{w,2}=str;
311 | end
312 | 
313 | %% add GPRs in the model based on catalog data and generate the GEM
314 | disp('start adding GPR into the model')
315 | % this section fill all the following empty cell arrays
316 | genericModel.grRules=cell([numel(genericModel.rxns) 1]);
317 | genericModel.genes={};
318 | genericModel.rules=cell([numel(genericModel.rxns) 1]);
319 | genericModel.geneNames={};
320 | genericModel.rxnGeneMat={};
321 | 
322 | for w=1:numel(genericModel.temporary)
323 |     rnInfo=genericModel.temporary{w,1};
324 |     if ~isempty(rnInfo)
325 |         rnInfo=strrep(rnInfo,' ',',');
326 |         rnInfo=strrep(rnInfo,';',',');
327 |         tra1=strsplit(rnInfo,',')';
328 |         expression = 'R\d\d\d\d\d';
329 |         matchStr = regexp(tra1,expression,'match');
330 |         tra1=tra1(find(not(cellfun('isempty',matchStr))));
331 |         if ~isempty(tra1)
332 |             annotation={};
333 |             for ee=1:length(tra1)
334 |                  if ismember(tra1{ee},catalogForInteg(:,1))
335 |                      dd=catalogForInteg(ismember(catalogForInteg(:,1),tra1{ee}),2);
336 |                      annotation=vertcat(annotation,dd);
337 |                  end
338 |             end
339 |             genericModel.grRules(w,1)={strjoin(unique(annotation),',')};
340 |         end
341 |     else
342 |         genericModel.grRules(w,1)={''};    
343 |     end
344 | end
345 | 
346 | temp1={};
347 | for i=1:length(genericModel.grRules)
348 |     if ~isempty(genericModel.grRules{i})
349 |      tra2=strsplit(genericModel.grRules{i},',')';
350 |      temp1=vertcat(temp1,tra2);
351 |     end
352 | end
353 | genericModel.genes=unique(temp1); % genericModel.genes generated
354 | genericModel.geneNames=genericModel.genes; % genericModel.geneNames generated
355 | 
356 | c=[];
357 | for h=1:length(genericModel.grRules)
358 |     if  ~isempty(genericModel.grRules{h,1})
359 |         tra2=strsplit(genericModel.grRules{h,1},',')';
360 |         for k=1:length(tra2)
361 |         index=find(strcmp(genericModel.genes,tra2{k}));        
362 |         c=[c;[h index 1]];
363 |         end
364 |     end
365 | end
366 | checkTheSize=intersect(find(c(:,1)== length(genericModel.rxns)),find(c(:,2)== length(genericModel.genes)));
367 | if isempty(checkTheSize)
368 |     c=[c;[length(genericModel.rxns) length(genericModel.genes) 0]];
369 | end
370 | temp1= sparse(c(:,1),c(:,2),c(:,3));
371 | genericModel.rxnGeneMat=temp1; % genericModel.rxnGeneMat generated
372 | 
373 | 
374 | for h=1:length(genericModel.grRules)
375 |     if  ~isempty(genericModel.grRules{h,1})
376 |         tra2=strsplit(genericModel.grRules{h,1},',')';
377 |         collect={};
378 |         for k=1:length(tra2)
379 |             index=find(strcmp(genericModel.genes,tra2{k})) ;
380 |             converted=['x(' num2str(index) ')'];
381 |             collect=vertcat(collect,converted);
382 |         end
383 |         genericModel.rules(h,1)={strjoin(unique(collect),',')}; % generating genericModel.rules
384 |     else
385 |         genericModel.rules(h,1)={''}; 
386 |     end
387 | end
388 | 
389 | for i=1:length(genericModel.rules)
390 |     if ~isempty(regexp(genericModel.rules{i},',','match'))
391 |      genericModel.rules{i}=['( ' genericModel.rules{i} ' )'];
392 |     end
393 | end
394 | genericModel.rules=strrep(genericModel.rules,',',' | '); % genericModel.rules generated
395 | 
396 | 
397 | for i=1:length(genericModel.grRules)
398 |     if ~isempty(regexp(genericModel.grRules{i},',','match'))
399 |      genericModel.grRules{i}=['(' genericModel.grRules{i} ')'];
400 |     end
401 | end
402 | genericModel.grRules=strrep(genericModel.grRules,',',' or '); % genericModel.grRules generated
403 | 
404 | genericModel.rxnAnnotation=genericModel.temporary; %consider temporary file as extended annotation file for the model
405 | genericModel = rmfield( genericModel,'temporary'); 
406 | 
407 | microbiomeGEM=genericModel;
408 | end
409 | %%
410 | function [cataloginfoLong]= ConvertTOLongFormat(cataloginfo,numWorkers)
411 | % find the rows with one and more than one kegg rn annotation 
412 | index=strfind(cataloginfo(:,2),'R');
413 | NumberOfKO = cellfun('length',index);
414 | IndexAboveOne= find(NumberOfKO > 1);
415 | IndexOne= find(NumberOfKO == 1);
416 | 
417 | Index = find(not(cellfun('isempty',index)));
418 | if ~isempty(Index)
419 |     % make a subset of gene catalog including rows with more than one KO annotation .
420 |     if ~isempty(IndexAboveOne)
421 |     output_1=cataloginfo(IndexAboveOne,:);
422 |     end
423 |     % make a subset of gene catalog including rows with one KO linked. 
424 |      if ~isempty(IndexOne)
425 |     output_2=cataloginfo(IndexOne,:);
426 |      end
427 | end
428 | 
429 | % check the number of workers for parallelization 
430 | if numWorkers > 1
431 |     poolobj = gcp('nocreate');
432 |     if isempty(poolobj)
433 |         parpool(numWorkers);
434 |     end
435 | else
436 |     disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers')
437 | end
438 | % convert the subset of gene catalog with several annotated genes to long format. 
439 | if ~isempty(IndexAboveOne)
440 |     transition2={};
441 |     parfor w=1:size(output_1,1)
442 |         transition1=output_1{w,2};
443 |         expression = 'R\d\d\d\d\d';
444 |         matchStr = regexp(transition1,expression,'match');
445 |         transition2{w,1}=matchStr;
446 |     end
447 | elseif ~isempty(IndexOne)
448 |     cataloginfoLong=output_2;
449 | end
450 | output_1updated=cell([0 2]);
451 | %tic
452 | if ~isempty(IndexAboveOne)
453 |     parfor w=1:size(transition2,1)
454 |         transition3={};
455 |         transition1=transition2{w};
456 |         transition3(:,2)=transition1';
457 |         transition3(:,1)=output_1(w,1);
458 |         output_1updated=vertcat(output_1updated,transition3);
459 |     end
460 | end
461 | %toc
462 | 
463 | if ~isempty(IndexAboveOne) & ~isempty(IndexOne)
464 | % Concatenate the two arrays vertically to make a catalog file
465 | cataloginfoLong=vertcat(output_2,output_1updated);
466 | elseif ~isempty(IndexAboveOne) & isempty(IndexOne)
467 |   cataloginfoLong=output_1updated;  
468 | end
469 | end
470 | 


--------------------------------------------------------------------------------
/Functions/pRSEGenerator.m:
--------------------------------------------------------------------------------
  1 | function [coverage,enrichment]= pRSEGenerator(modelList,PathToModels,abundance,sampleName,IDmap,GSE,significance)
  2 | %inputs:
  3 | %   modelList: 				list of model names.
  4 | %   PathToModels:			a structure includes the path where the models are available
  5 | %							and the name of model assigned in the .mat files
  6 | %	abundance:				matrix of microbiome (MSP) abundance profile
  7 | %	sampleName:				list of sample names
  8 | %	IDmap:					ID mapping between reaction names in the models and and 
  9 | % 							the IDs in the pathway file
 10 | %	GSE:					the pathway profile incuding reaction sets
 11 | %OPTIONAL INPUT
 12 | %	significance:			p value
 13 | %outputs:					
 14 | %   coverage:				a table shows the coverage of each pathway in samples
 15 | %   enrichment:				a table shows the p value of the pathways in samples
 16 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 17 | if nargin<7
 18 |     significance=1;
 19 | end
 20 | 
 21 | index=[];
 22 | for h1 = 1:size(modelList,1)
 23 | 	if exist([PathToModels.path filesep modelList{h1} '.mat'])
 24 | 		index=[index;h1];
 25 | 	end
 26 | end
 27 | modelList=modelList(index,:);
 28 | abundance=abundance(index,:);
 29 | binary=abundance;
 30 | binary(find(binary>0))=1;
 31 | rxnTemp={};
 32 | for h1 = 1:size(modelList,1)
 33 | 	load([PathToModels.path filesep modelList{h1}],PathToModels.name)
 34 | 	model=eval(PathToModels.name);
 35 | 	rxnTemp=vertcat(rxnTemp,model.rxns);
 36 | end
 37 | rxns=unique(rxnTemp);
 38 | temporary=zeros(numel(rxns),numel(modelList));
 39 | for w1=1:numel(modelList)
 40 |     load([PathToModels.path filesep modelList{w1}],PathToModels.name)
 41 | 	model=eval(PathToModels.name);
 42 |     temporary(find(ismember(rxns,model.rxns)),w1)=1;
 43 | end
 44 | binary1=binary';
 45 | input=[];
 46 | for i= 1:numel(sampleName)
 47 |     abun=binary1(i,:);
 48 |     temporary2=temporary;
 49 |     for j=1:numel(abun)
 50 |         temporary2(:,j)=temporary2(:,j)*abun(:,j);
 51 |     end
 52 |     input(:,i)=any(temporary2 ==1,2);
 53 | end
 54 | % convert rxns ID to provided ID for enrichment analysis
 55 | rxnsTemp=rxns;
 56 | for ii=1:size(rxnsTemp,1)
 57 |     index=find(strcmp(IDmap(:,1), rxnsTemp{ii,1}));
 58 |     if ~isempty(index)
 59 |     rxnsTemp{ii,2}=IDmap{index,2};
 60 |     end
 61 | end
 62 | Index = find(not(cellfun('isempty',rxnsTemp(:,2))));
 63 | input=input(Index,:);
 64 | rxns=rxnsTemp(Index,2);
 65 | % calculate Total number of unique IDs in provided GSE.
 66 | IDs={};
 67 | for i=1:size(GSE,1)
 68 |     temp1=strsplit(GSE{i,2},',');
 69 |     IDs=vertcat(IDs,temp1');
 70 | end
 71 | IDs=unique(IDs);
 72 | N=size(IDs,1);
 73 | enrichment=zeros(size(GSE,1),size(input,2));
 74 | coverage=zeros(size(GSE,1),size(input,2));
 75 | for i=1:size(GSE,1)
 76 |     temp2=transpose(strsplit(GSE{i,2},','));
 77 |     m=size(temp2,1); %Number of IDs associated to the term 
 78 |     n = N - m; %Number of IDs not associated to the term
 79 |     for j=1:size(input,2)
 80 |         input1=input(:,j);
 81 |         tempOne=find(input1==1);
 82 |         g = length(tempOne); % Number of submitted reactions
 83 |         k = length(intersect(IDs,rxns(tempOne))); % number of submitted 
 84 |         x = length(intersect(temp2,rxns(tempOne))); % reactions with at least one annotation in IDmap
 85 |         %number of IDs in the term present in the sample
 86 |         enrichment(i,j)=hygecdf(x-1,N,m,k,'upper');
 87 |         coverage(i,j)=x/m;
 88 |     end
 89 | end
 90 | if significance ~=1
 91 |     coverage(find(enrichment>significance))=0;
 92 | end
 93 | % convert coverage matrix to table and add sample name and the terms to the table
 94 | coverage = array2table(coverage);
 95 | coverage.Properties.VariableNames = sampleName;
 96 | coverage=[array2table(GSE(:,1)) coverage];
 97 | % convert enrichment matrix to table and add sample name and the terms to the table
 98 | enrichment = array2table(enrichment);
 99 | enrichment.Properties.VariableNames = sampleName;
100 | enrichment=[array2table(GSE(:,1)) enrichment];
101 | 
102 | end


--------------------------------------------------------------------------------
/MIGRENE_pipeline.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %% -----------------------MIGRENE Pipelie--------------------------
 3 | % The toolbox for Microbial and personalized GEM, REactiobiome and 
 4 | % community NEtwork modelling
 5 | 
 6 | % Three tutorials shows the steps that MIGRENE Toolbox automatically 
 7 | % generate and simulate MAGMA (MSP Associated Genome scale MetAbolic) 
 8 | % models and personalized metabolic microbiome data using Bacterial 
 9 | % gene catalog, metagenome species (MSP) and metagenomic data integration.
10 | 
11 | % The three tutorials are provided in the toolbox:
12 | % note: if you have your GEMS and you need to create the personalized metabolic
13 | % microbiome data i.e. reaction richness, Microbiome, reaction abundance, 
14 | % community models and pRSE (personalized reaction set enrichment), go to Box-c.
15 | 
16 | % IntegrationCatalogToModel.m: Box-a| integration of bacterial gene catalog 
17 | % into metabolic model to generate a microbiome reference genome scale metabolic
18 | % model (GEM).
19 | 
20 | % MAGMAgeneration.m: Box-b| calculation of reactionScore, constraining the model
21 | % based on diet, species specific GEMs or MAGMA generation
22 | 
23 | % PersonalizedMicrobiomeMetabolism.m: Box-c| generating personalized metabolic
24 | % microbiome
25 | 
26 | %-------
27 | %Gholamreza Bidkhori,
28 | %email: gholamreza.bidkhori@kcl.ac.uk
29 | %       gbidkhori@gmail.com,
30 | % Aug 2020 
31 | %%
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MIGRENE toolbox,
 2 | ## Description 
 3 | MIGRENE toolbox is an integrated pipeline for Microbial and personalized GEM (Genome-scale metabolic model), REactiobiome, and community NEtwork modeling. It enables the generation of species and community-level models from any reference gene catalogs and metagenome species to be applied to personalized microbiome studies. Using the toolbox, GEMs could be generated based on the gut microbial gene catalogs and metagenomic species pan-genomes (MSPs). This toolbox also contains functions for performing community modelling using GEMs, determining reaction abundance and richness and reaction set enrichment (RSE), and reactobiome that describes an aggregate of the metabolic repertoires of an individual gut microbiome, or the biochemical state of the microbiome.
 4 | 
 5 | ## Download and installation
 6 | 1. Download this repository. You can clone the repository using:
 7 | ```
 8 | git clone https://github.com/sysbiomelab/MIGRENE.git
 9 | ```
10 | Or you can download this repository as a <a href="https://codeload.github.com/sysbiomelab/MIGRENE/zip/refs/heads/master">compressed archive</a>.
11 | 
12 | 2. Change to the folder MIGRENE/ and run from Matlab
13 | ```
14 | addpath(genpath("MIGRENE"))
15 | ```
16 | Or you can use the <a href="https://uk.mathworks.com/help/matlab/matlab_env/add-remove-or-reorder-folders-on-the-search-path.html">link</a> to learn how to set path in MATLAB to the directory.
17 | ## Tutorials
18 | <a href="https://github.com/sysbiomelab/MIGRENE/wiki/generation-of-microbiome-GEM"> Generation of microbiome GEM using a generic metabolic model and a microbiome catalog</a>: This tutorial shows how to integrate a bacterial gene catalog 
19 | into the metabolic model to generate a microbiome reference genome-scale metabolic model (GEM).
20 | 
21 | <a href="https://github.com/sysbiomelab/MIGRENE/wiki/generation-of-microbiome-GEM"> Generation of Bacterial (species-specific) GEM </a>: This tutorial shows how to calculate the reaction score and threshold for bacteria, to constrain the model and to generate species-specific bacterial GEMs. 
22 | 
23 | <a href="https://github.com/sysbiomelab/MIGRENE/wiki/generation-of-microbiome-GEM"> Generation of Personalized Microbiome Metabolism </a>: It shows how to calculate reaction richness, reactobiome, reaction abundance, community models and iRSE (individualized reaction set enrichment)
24 | 
25 | <a href="https://github.com/sysbiomelab/LiverCirrhosis_MS"> Reactobiome and reaction richness for Liver Cirrhosis gut microbiome samples </a>
26 | 
27 | 
28 | ## Integration of a gene catalog into a metabolic model.
29 | ### Data usage
30 | * `<catalog>`: is a txt file containing gene names and KO (KEGG orthology) such as [SubSet_hs_10_4_igc2_annot.txt](data/SubSet_hs_10_4_igc2_annot.txt)
31 | * `<mapping file>`: (optional) a txt file contains the mapping information for KO to KEGG reaction ID.
32 | * `<Metabolic_model>`: a mat file containig a metabolic models whether COBRA or RAVEN format such as [RefMetabolicModel.mat](mat/RefMetabolicModel.mat) 
33 | ### functions
34 | * [checkCatalog](Functions/checkCatalog.m): check the `<catalog>` to make sure it is ready for integration.
35 | * [convertCatalogAnnotation](Functions/convertCatalogAnnotation.m): Convert KO annotations to KEGG reaction IDs in the  `<catalog>`. If no mapping file `<mapping file>` is provided, the latest information is automatically downloaded from the KEGG API. the output is a `<converted catalog>`.
36 | * [microbiomeGEMgeneration](Functions/microbiomeGEMgeneration.m): integrate the `<converted catalog>` into the `<Metabolic_model>` to generate a microbiome `<reference_GEM>`
37 | 
38 | 
39 | ## Generation of Bacterial (species‐specific) GEM
40 | ### Data usage
41 | * `<reference_GEM>`: the genome scale metabolic Model with COBRA or RAVEN format such as the `<reference_GEM>` produced above by [microbiomeGEMgeneration](Functions/microbiomeGEMgeneration.m)
42 | * `<bacterial_info>`: an structure from a binary matrix containing gene-level data for bacterial species (exanmple [MSPgeneProfile.txt](data/MSPgeneProfile.txt)).
43 |     
44 |   ```
45 |   T = readtable('MSPgeneProfile.txt');
46 |   bacterial_info = struct();
47 |   bacterial_info.genes=table2cell(T(:,1));
48 |   bacterial_info.msp=T.Properties.VariableNames;
49 |   bacterial_info.msp=transpose(MSPinfo.msp(1,2:end));
50 |   bacterial_info.expression=table2array(T(1:end ,2:end));
51 |   ```
52 | * `<taxonomy>`: an Excel file contains taxonomy classification info i.e. Kingdom, Phylum, Class, Order, Family, Genus, Species (exanmple [MSPgeneProfile.txt](data/MSPgeneProfile.txt)). 
53 | * `<Bibliome_Data>`: (optional, example [here](mat/bibliome.mat)) any bibliome data about phenotypic features of the bacteria can be provided as a structure with four fields: "bacteria" is a cell array listing the names of the bacteria. "rxn" lists the name of the reactions having bibliome. "value" is a matrix of numbers: zero means no information, 1 means consumed, 2 means produced, -1 means not consumed and -2 means not produced by the corresponding bacteria. "aerobeInfo" a cell array provides the info that the bacteria require oxygen for growth or not, specifying with "aerobe", "anaerobe" or "facultative".
54 |   
55 | ### functions
56 | * [DietConstrain](Functions/DietConstrain.m): (optional) this function constrains `<reference_GEM>` based on the provided diet `<diet_number>` (1 to 5). Five diets have been provided by the toolbox: 1: high Fibre Plant Based, 2: high Fibre omnivore, 3: high Protein Plant based, 4: high protein omnivore, 5:UK average. Set the number of the diet for constraining the model.
57 | 
58 | * [MetagenomeToReactions](Functions/MetagenomeToReactions.m): This function needs `<reference_GEM>`and `<bacterial_info>` as inputs and must be seperately run for each bacterial species in the `<bacterial_info.msp>` using a loop, as below:
59 | ```
60 | for h=1:length(MSPinfo.msp) 	
61 |             metagenomeData=struct();
62 |             metagenomeData.gene=bacterial_info.genes;
63 |             metagenomeData.value=bacterial_info.expression(:,h);
64 |             [Reaction_State, bacterial_model] = MetagenomeToReactions(microbiomeGEM, metagenomeData);
65 |             save(['save\to\directory\' bacterial_info.msp{h} '.mat'],'Reaction_State','bacterial_model');
66 | end
67 | ```
68 | `<bacterial_model>` and `<Reaction_State>` for each species are the output that must be saved in the output directry in the same `mat` file entitled the bacterial name.
69 | `<bacterial_model>` is a model with the bacterial genes and gene rules and `<Reaction_State>` is a vector showing the state of the reaction (zero or one) for the bacterial species.
70 | * [GenerateMSPInformation](Functions/GenerateMSPInformation.m): this function generates  `<bacterial_Information>`, a structure that includes the following fields: taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each species. taxoGroup: taxonomy group for bacteria. rxns, the reaction name in the reference model. bacteria, list of MSP IDs. BacteriaNames, list of species names. RxnStateAll, the reaction state (absent/present) for each bacteria. the input is the address to the directory including saved `Reaction_State>` and `bacterial_model>` for each bacterium (MSP), `<reference_GEM>` and address to `taxonomy>` file.
71 | * [MetaGenomicsReactionScore](Functions/MetaGenomicsReactionScore.m): This function utilize `<bacterial_Information>` to converts reaction states to reaction scores (`<reaction_Score>`) and calculate a threshold (`<threshold>`) for each bacterial species. `<reaction_Score>`, `<threshold>` must be added to the `mat` file including `<bacterial_model>` and `<Reaction_State>`
72 | * [contextSpecificModelTune](Functions/contextSpecificModelTune.m): `<bacterial_model>`,`<reaction_Score>`, `<threshold>` and `<Bibliome_Data>`to genrate context specefic species genome scale metabolic model (`<bacterial_GEM>`) as the output. [contextSpecificModelTune](Functions/contextSpecificModelTune.m) function tunes `<bacterial_GEM>` and also provides the level and the details of gap filling.
73 | 
74 | ## Reactobiome and reaction richness Generation
75 | 
76 | ### Data usage
77 | 
78 | * `<modelList>`: an array to provide the names of bacterial GEM, such as row names of this [file]
79 | * `<sampleName>`: an array to provide the names of samples or subject, such as column names of this [file]
80 | * `<bacterial_abundance>`: the bacterial abundance matrix showing the abundance of bacteria in `<modelList>` in each subject in `<sampleName>`  note: for example go to [link1](https://github.com/sysbiomelab/LiverCirrhosis_MS) or [link2](https://github.com/sysbiomelab/MIGRENE/wiki/Generation-of-Personalized-Microbiome-Metabolism). 
81 | * `<PathToModels>`: Provides the path where the [bacterial GEMs](https://github.com/sysbiomelab/LiverCirrhosis_MS/blob/main/GEMmodels.zip) are saved in `PathToModels.path` field and the name of the model assigned in the .mat files in `PathToModels.name`. note: if you use the provided [bacterial GEMs](https://github.com/sysbiomelab/LiverCirrhosis_MS/blob/main/GEMmodels.zip), unzip it and then ```PathToModels.name='model'```.
82 | 
83 | ### Functuions
84 | * [RxnRichnessGenerator](Functions/RxnRichnessGenerator.m): uses `<modelList>`,`<PathToModels>`,`<abundance>` and `<sampleName>` and generates reaction richness for all the subjects regarding the provided GEMs.
85 | * [ReactobiomeGenerator](Functions/ReactobiomeGenerator.m): utelizes `<modelList>`,`<PathToModels>`,`<abundance>` and `<sampleName>` and generates reactobiome for all the subjects regarding the provided GEMs.
86 | 
87 | ## Contact
88 | gholamreza.bidhkori@kcl.ac.uk,
89 | gbidkhori@gmail.com,
90 | # Citation
91 | in preparation, 2024
92 | 
93 | 


--------------------------------------------------------------------------------
/data/BacterialAbundance.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/BacterialAbundance.xlsx


--------------------------------------------------------------------------------
/data/Taxonomy.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/Taxonomy.xlsx


--------------------------------------------------------------------------------
/data/pathways.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/pathways.xlsx


--------------------------------------------------------------------------------
/mat/DietInput.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/DietInput.mat


--------------------------------------------------------------------------------
/mat/Diets.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/Diets.mat


--------------------------------------------------------------------------------
/mat/KBase.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/KBase.mat


--------------------------------------------------------------------------------
/mat/MetInformation.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/MetInformation.mat


--------------------------------------------------------------------------------
/mat/RefMetabolicModel.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/RefMetabolicModel.mat


--------------------------------------------------------------------------------
/mat/USDAdataset.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/USDAdataset.mat


--------------------------------------------------------------------------------
/mat/bibliome.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/bibliome.mat


--------------------------------------------------------------------------------
/mat/microbiomeGEM.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/microbiomeGEM.mat


--------------------------------------------------------------------------------
/saveDir/test.txt:
--------------------------------------------------------------------------------
1 | test file 


--------------------------------------------------------------------------------
/tutorials/IntegrationCatalogToModel.m:
--------------------------------------------------------------------------------
 1 | %% Box-A| generation of microbiome GEM using a generic metabolic model and a microbime catalog 
 2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
 3 | %% start
 4 | % get path to where the MIGRENE Toolbox is located
 5 | MIGDIR = fileparts(which('MIGRENE_pipeline'));
 6 | % provide the path to microbiome catalog .
 7 | CATDIR=[MIGDIR filesep 'data'];
 8 | % provide the path to reference metabolic model.
 9 | MATDIR=[MIGDIR filesep 'mat'];
10 | % define a directory to save microbiomeGEM, here it will be saved in mat
11 | % directory
12 | SAVEDIR=[MIGDIR filesep 'mat'];
13 | % number of cores specified for parallelization. it can be a positive
14 | % integer or a range specified as a 2-element vector of integers
15 | numWorkers=4
16 | 
17 | %% integration
18 | % integration of bacterial gene catalog into metabolic model to generate a
19 | % generic genome scale metabolic.
20 | %First the cataloge data is read from the text file.
21 | % here, a subset of updated gut catalog is used to run the pipeline and
22 | % generate MAGMA. this small annotated cataloge includes genes
23 | %of 10 Bacteroides. their taxonomy information is also provided for MAGMA
24 | %generation
25 | catalog=[CATDIR filesep 'SubSet_hs_10_4_igc2_annot.txt'];
26 | T = readtable(catalog);
27 | catalogData=table2cell(T) ;
28 | %before using the catalog, the following function rearranges the catalog
29 | %for the genes with more that KO linked, checks the structure, and provides
30 | %the format compatible for downstream functions
31 | [catalogFileChecked]= checkCatalog(catalogData,numWorkers);
32 | 
33 | %convert KO in the catalog to KEGG reaction IDs. if there is no ID mapping file,
34 | %assign an empty cell array. then, it will automatically download the last
35 | %updated information from KEGG API and saves in directory "data" where 
36 | %the MIGRENE Toolbox is located.(make sure, you are connected to internet) 
37 | mapping={};
38 | inputFile=catalogFileChecked;
39 | [catalogConverted]= convertCatalogAnnotation(inputFile,mapping,numWorkers);
40 | 
41 | % gene-protein-reaction (GPR) association is assigned by integrating the
42 | % the catalog genes into metabolic model and generate generic genome scale
43 | % metabolic model. it will be compatible with both COBRA and RAVEN
44 | % toolboxes so you can use any functions provided by both toolboxes.
45 | 
46 | % first, the annotated metabolic model model is loaded. you can load your
47 | % generic model.
48 | load([MATDIR filesep 'RefMetabolicModel.mat']);
49 | genericModel=model;
50 | % if you have annotation file, please load or import it. otherwise, leave
51 | % the annotationFile empty. then, the function automatically find all type
52 | % of annotations in your model and download the corresponding info from
53 | % KEGG API, (make sure, you are connected to internet). here a generic
54 | % metabolic model is used. the reactions in the model are annotated by KO
55 | % and kegg RN ID.
56 | annotationFile={};
57 | 
58 | [microbiomeGEM]=microbiomeGEMgeneration(genericModel,catalogConverted,...
59 |     annotationFile,numWorkers);
60 | 
61 | %save microbiomeGEM to a MAT-file
62 | save([SAVEDIR filesep 'microbiomeGEM.mat'],'microbiomeGEM')
63 | % done, congrats. go to MAGMAgeneration.m in tutorials directory


--------------------------------------------------------------------------------
/tutorials/MAGMAgeneration.m:
--------------------------------------------------------------------------------
  1 | %% Box-B| calculation of reactionScore, constraining the model based on diet, MAGMA generation
  2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
  3 | %% start
  4 | % get path to where the MIGRENE Toolbox is located
  5 | MIGDIR = fileparts(which('MIGRENE_pipeline'));
  6 | % provide the path to bacterial species (MSP) gene info and taxonomy
  7 | % information.
  8 | CATDIR=[MIGDIR filesep 'data'];
  9 | taxo=[CATDIR filesep 'Taxonomy.xlsx'];
 10 | speciesGeneInfo=[CATDIR filesep 'MSPgeneProfile.txt'];
 11 | % provide the path to microbiomeGEM (generated in
 12 | % IntegrationCatalogToModel.m) and bibliome data.
 13 | MATDIR=[MIGDIR filesep 'mat'];
 14 | MODEL=[MATDIR filesep 'microbiomeGEM.mat'];
 15 | BIBLIOME=[MATDIR filesep 'bibliome.mat'];
 16 | % define a directory to save microbiomeGEM
 17 | SAVEDIR=[MIGDIR filesep 'saveDir'];
 18 | % number of cores specified for parallelization. it can be a positive
 19 | % integer or a range specified as a 2-element vector of integers
 20 | numWorkers=4
 21 | 
 22 | % for some functions such as FBA simulation, you need to install
 23 | % cobra toolbox.
 24 | %please visit https://opencobra.github.io/cobratoolbox/latest/installation.html
 25 | % for installing the cobra toolbox. after installation, initiate COBRA 
 26 | initCobraToolbox()
 27 | 
 28 | %% 
 29 | %load microbiomeGEM model as a reference GEM.
 30 | load(MODEL)
 31 | 
 32 | % you can constrain the model before generating your bacterial GEMs
 33 | % for example here, the reference model is constraned based on the high fiber 
 34 | % animal based model.
 35 | % we already generated 5 diets. the following function constrain the model.
 36 | 
 37 | dietNumber=2; % 1:high Fibre Plant Based, 2:high Fibre omnivore, 3:high Protein Plant based
 38 |               % 4:high protein omnivore, 5:UK average.
 39 | [microbiomeGEM]=DietConstrain(microbiomeGEM,dietNumber);
 40 | % acetate and lactate are added as popular carbon sources for bacteria
 41 | microbiomeGEM.lb(find(strcmp(microbiomeGEM.rxns, 'Ex_Acetate')))= -2.597426442;
 42 | microbiomeGEM.lb(find(strcmp(microbiomeGEM.rxns, 'Ex_L-Lactate')))= -0.074862638;
 43 | 
 44 | % generating new diet based on USDA dataset
 45 | newDiet=false
 46 | % if you have a diet and want to apply for modeling, you can use the
 47 | % USDAcreatingDiet function as below. your compounds also could be matched
 48 | % with USDA IDs by using searchfood function. here we provided an example
 49 | % for high protein plant based diet. the diet will be converted to mmol/gDW
 50 | % for modeling
 51 | if newDiet 
 52 |     %Example: high protein plant based diet
 53 |     load([MATDIR filesep 'DietInput.mat']);
 54 |     %DietInput.mat includes: food_id_item: the iD of the food items that are
 55 |     %present in a high protein plant based diet, food_grams_item: the amount
 56 |     %(grams) of each food ID item
 57 |     [micronutrients_diet_mmol, macronutrients_diet]= USDAcreatingDiet(food_id_item,food_grams_item)
 58 | end
 59 | 
 60 | %filling a structure for species gene info.
 61 | MSPinfo = struct();
 62 | % First, the species(MSP) gene info is read from the text file. it provides
 63 | % the genes of 10 Bacteroides in a matrix.
 64 | T = readtable(speciesGeneInfo);
 65 | MSPinfo.genes=table2cell(T(:,1));
 66 | MSPinfo.msp=T.Properties.VariableNames;
 67 | MSPinfo.msp=transpose(MSPinfo.msp(1,2:end));
 68 | MSPinfo.expression=table2array(T(1:end ,2:end));
 69 | 
 70 | % second, generate reaction state (absent/present reaction) for bacteria
 71 | % and prune the genes based on bacterial (MSP) profile for each species.
 72 | for h=1:length(MSPinfo.msp)
 73 |     if ~exist([SAVEDIR filesep MSPinfo.msp{h} '.mat']) 	
 74 |             metagenomeData=struct();
 75 |             metagenomeData.gene=MSPinfo.genes;
 76 |             metagenomeData.value=MSPinfo.expression(:,h);
 77 |             [RxnState, modelforMSP] = MetagenomeToReactions(microbiomeGEM, metagenomeData);
 78 |             % save RxnState and modelforMSP in a mat file entitled the
 79 |             % corresponding bacterium (MSP)
 80 |             save([SAVEDIR filesep MSPinfo.msp{h} '.mat'],'RxnState','modelforMSP');
 81 |     else
 82 |         disp(['the reaction state and modelforMSP for ' MSPinfo.msp{h} ' are already generated. see your directory'])
 83 |     end 
 84 | end
 85 | 
 86 | % collect MSP (bacterial) Information
 87 | % get path to where RxnState and modelforMSP for each bacterium (MSP) were
 88 | % saved
 89 | RXNDIR=SAVEDIR;
 90 | % the following function generates MSPInformation, a structure includes the
 91 | % following fields:
 92 | % taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each
 93 | % species. taxoGroup: taxonomy group for bacteria. rxns, the reaction name
 94 | % in reference model. bacteria, list of MSP IDs. BacteriaNames, list of
 95 | % species names. RxnStateAll, the reaction state (absent/present) for each
 96 | % bacteria
 97 | 
 98 | [MSPInformation]= GenerateMSPInformation(taxo,RXNDIR,microbiomeGEM)
 99 | 
100 | % convet reaction state to reaction score and calculate the threshold for
101 | % gapfilling
102 | for h=1:length(MSPInformation.bacteria)
103 |     % add the msp name into MSPInformation 
104 |     MSPInformation.species=MSPInformation.bacteria{h};
105 |     [reactionScore, threshold] = MetaGenomicsReactionScore(MSPInformation);
106 | %   adds new variables to the corresponding MAT-file
107 |     save([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'reactionScore','threshold','-append');
108 | end 
109 | %% collecting the bibliome data and the constraining the model
110 | 
111 | % if you have any bibliome data about phenotypic features of the bacteria
112 | % that you are making GEM, provide it as a structure with four fields:
113 | % "bacteria" a cell array listing the name of the bacteria; "rxn" list the
114 | % name of the reactions having bibliome; "value" a matrix of numbers: zero
115 | % means no information, 1 means consumed, 2 means produced, -1 not-consumed
116 | % and -2 means not-produced by the corresponding bacteria. "aerobeInfo" a
117 | % cell array provides the info that the bacteria require oxygen for growth
118 | % or not so specefiy with "aerobe" or "anaerobe" or "facultative".
119 | % if you do not provide the information, then the models are just generated
120 | % and tuned based on the reactionScore and threshold.
121 | load(BIBLIOME)
122 | 
123 | %collect all exchange and transport reactions
124 | indexEx=strfind(microbiomeGEM.rxns,'Ex');
125 | IndexEx = find(not(cellfun('isempty',indexEx)));
126 | indexTr=strfind(microbiomeGEM.rxns,'t_');
127 | IndexTr = find(not(cellfun('isempty',indexTr)));
128 | indexOfTrEx=union(IndexEx,IndexTr);
129 | modelseed='true' %if the reference model is based on KBase or modelSEED  
130 | for h=1:length(MSPInformation.bacteria)
131 |     load([RXNDIR filesep MSPInformation.bacteria{h} '.mat'])
132 |     %to keep all the exchange reactions and transport reactions in the models
133 |     %and prune it later, before generating the species-specific GEMs (MAGMA)
134 |     %the score of the reaction changed as 1
135 |     reactionScore(indexOfTrEx,:)=1;
136 |     % add a field, named species, to the biblome for finding the
137 |     % corresponding info in the structure including all the bacteria
138 |     if exist('bibliome')
139 |         bibliome.species=MSPInformation.bacteria{h};
140 |         [contextSpecificModel] = contextSpecificModelGenertion(modelforMSP,reactionScore,threshold,bibliome);
141 |     else
142 |         [contextSpecificModel] = contextSpecificModelGenertion(modelforMSP,reactionScore,threshold);
143 |     end
144 |      MSPInformation.species=MSPInformation.bacteria{h};
145 |     [contextSpecificModel, modelInfo] = contextSpecificModelTune(contextSpecificModel,MSPInformation,reactionScore,threshold,modelseed);
146 |     %adds new variables to the corresponding MAT-file
147 |     save([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'contextSpecificModel','modelInfo','-append');
148 | end
149 | 
150 | % collect modelInfo Tables and save as excel file. regarding the level
151 | % and the pecentage of gapfilling, you can decide which bacterial models
152 | % were generated based on enough gene info and discard the poor models 
153 | modelInfoFinal=table()
154 | for h=1:length(MSPInformation.bacteria)
155 |     load([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'modelInfo')
156 |     modelInfo=[table(MSPInformation.bacteria(h)) modelInfo];
157 |     modelInfoFinal=[modelInfoFinal;modelInfo];
158 | end
159 | 
160 | % Write Data to Excel Spreadsheets
161 | filename=[RXNDIR filesep 'modelInfoFinal.xlsx']
162 | writetable(modelInfoFinal,filename,'Sheet',1,'Range','A1')
163 | 
164 | % done, congrats. go to PersonalizedMicrobiomeMetabolism.m in tutorials
165 | % directory for integration of bacterial profile into the models to
166 | % investigate the metabolism of microbiome at personalized level.


--------------------------------------------------------------------------------
/tutorials/PersonalizedMicrobiomeMetabolism.m:
--------------------------------------------------------------------------------
  1 | %% Box-C| generating personalized metabolic microbiome
  2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk
  3 | %% start
  4 | % get path to where the MIGRENE Toolbox is located
  5 | MIGDIR = fileparts(which('MIGRENE_pipeline'));
  6 | % provide the path to bacterial species (MSP) gene info and bacterial
  7 | % abundance obtained from metagenomics analysis
  8 | CATDIR=[MIGDIR filesep 'data'];
  9 | ABUNDANCE=[CATDIR filesep 'BacterialAbundance.xlsx'];
 10 | PATHWAY=[CATDIR filesep 'pathways.xlsx'];
 11 | % provide the path to microbiomeGEM (generated in
 12 | % IntegrationCatalogToModel.m) and bibliome data.
 13 | MATDIR=[MIGDIR filesep 'mat'];
 14 | MODEL=[MATDIR filesep 'microbiomeGEM.mat'];
 15 | BIBLIOME=[MATDIR filesep 'bibliome.mat'];
 16 | % define a directory to save microbiomeGEM
 17 | SAVEDIR=[MIGDIR filesep 'saveDir'];
 18 | % number of cores specified for parallelization. it can be a positive
 19 | % integer or a range specified as a 2-element vector of integers
 20 | numWorkers=4;
 21 | % for some functions such as FBA simulation, you need to install
 22 | % cobra toolbox
 23 | initCobraToolbox()
 24 | 
 25 | %% 
 26 | %load microbiome (MSP) abundance profile. it could be metagenomics or 16s based 
 27 | [abundance,infoFile,~]=xlsread(ABUNDANCE);
 28 | %name of models
 29 | modelList = infoFile(2:end,1);
 30 | %name of samples
 31 | sampleName=infoFile(1,2:end);
 32 | %check the samples,
 33 | %remove the MSP name if the abundance of bacteria in all samples are zero
 34 | abundance=abundance(sum(abundance,2)~=0,:);
 35 | modelList=modelList(sum(abundance,2)~=0,:);
 36 | %remove the samples if the there is no bacterial abundance
 37 | sampleName=sampleName(sum(abundance,1)~=0);
 38 | abundance=abundance(:,sum(abundance,1)~=0);
 39 | % get the number of bacteria (bacterial richness) in each sample 
 40 | temp1=abundance;
 41 | temp1(find(temp1>0))=1;
 42 | BactrialRichness=table(sampleName',sum(temp1,1)');
 43 | BactrialRichness.Properties.VariableNames = {'sampleName','BactrialRichness'};
 44 | 
 45 | % give the path where the models are available and the name of model assgined in the .mat files
 46 | PathToModels.path=SAVEDIR;
 47 | PathToModels.name='contextSpecificModel';
 48 | % generate gut microbiome reaction composition (reaction richness) of all individuals 
 49 | richness= RxnRichnessGenerator(modelList,PathToModels,abundance,sampleName);
 50 | 
 51 | % generate reaction abundance for all individuals; the function generates both reaction abundance
 52 | % and relative reaction abundance
 53 | [reactionRelativeAbun, rxnAbunPerSample]= ReactionAbundanceGenerator(modelList,PathToModels,abundance,sampleName);
 54 | % generate reactobiome for all individuals
 55 | countPerFiveBacteria= ReactobiomeGenerator(modelList,PathToModels,abundance,sampleName);
 56 | 
 57 | % for the enrichment analysis, you need to prepare two files:
 58 | %1) a file includes pathway terms and the IDs that could be KO,EC,kegg
 59 | %reaction ID and etc. here we use the KEGG pathway terms with Kegg reaction
 60 | %ID
 61 | %provide the pathway profiles
 62 | [~,terms,~]=xlsread(PATHWAY);
 63 | terms=terms(2:end,:);
 64 | %2) a file for ID mapping between reaction names in the models and and 
 65 | % the IDs in the pathway file. here, we use the info in the reference model. 
 66 | load(MODEL)
 67 | IDmap=[microbiomeGEM.rxns microbiomeGEM.rxnRN];
 68 | Index = find(not(cellfun('isempty',IDmap(:,2))));
 69 | IDmap=IDmap(Index,:);
 70 | % if you assign the p-value, then coverage of non-significance terms is set
 71 | % as zero. if you dont define the p-value, it returns all. 
 72 | p_value=0.05;
 73 | [coverageRSE,pRSE]= pRSEGenerator(modelList,PathToModels,abundance,sampleName,IDmap,terms,p_value);
 74 | % coverage is a table shows the coverage of each pathway in samples
 75 | % pRSE is a table shows the p value of the pathways in samples
 76 | %%
 77 | %community modeling
 78 | % define the number of top abundant bacteria for community modeling 
 79 | %here we generate communities for top 10 bacteria 
 80 | top=5;
 81 | thre=[];
 82 | for i=1:size(abundance,2)
 83 | t1=sort(abundance(:,i),1,'descend');
 84 | thre(i,1)=t1(top,1);
 85 | abundance(find(abundance(:,i) < thre(i,1)),i)=0;
 86 | end
 87 | boxplot(thre)
 88 | median(thre)
 89 | 
 90 | %specify the metabolite ID and exchange reaction for biomass (optional)
 91 | biomass.EXrxn='Ex_Biomass';
 92 | biomass.mets='cpd11416ee[lu]';
 93 | % make a directory to save generated community models
 94 | if ~exist([SAVEDIR filesep 'community'],'dir')
 95 | mkdir([SAVEDIR filesep 'community']);
 96 | end
 97 | PathToSave=[SAVEDIR filesep 'community'];
 98 | % in report, "one" next to sample name shows that community model has been
 99 | % generated for the individuals in PathToSave directory
100 | [report]= MakeCommunity(modelList,PathToModels,abundance,sampleName,PathToSave,biomass);
101 | 


--------------------------------------------------------------------------------