├── Functions ├── DietConstrain.m ├── GenerateMSPInformation.m ├── KeepNecessaryRxns.m ├── MakeCommunity.m ├── MetaGenomicsReactionScore.m ├── MetagenomeToReactions.m ├── ReactionAbundanceGenerator.m ├── ReactobiomeGenerator.m ├── RxnRichnessGenerator.m ├── USDAcreatingDiet.m ├── checkCatalog.m ├── contextSpecificModelGenertion.m ├── contextSpecificModelTune.m ├── convertCatalogAnnotation.m ├── fakeModelGenerator.m ├── microbiomeGEMgeneration.m └── pRSEGenerator.m ├── MIGRENE_pipeline.m ├── README.md ├── data ├── BacterialAbundance.xlsx ├── MSPgeneProfile.txt ├── SubSet_hs_10_4_igc2_annot.txt ├── Taxonomy.xlsx ├── ec2rn.txt ├── ko2rn.txt └── pathways.xlsx ├── mat ├── DietInput.mat ├── Diets.mat ├── KBase.mat ├── MetInformation.mat ├── RefMetabolicModel.mat ├── USDAdataset.mat ├── bibliome.mat └── microbiomeGEM.mat ├── saveDir └── test.txt └── tutorials ├── IntegrationCatalogToModel.m ├── MAGMAgeneration.m └── PersonalizedMicrobiomeMetabolism.m /Functions/DietConstrain.m: -------------------------------------------------------------------------------- 1 | function [constrainedModel]= DietConstrain(model,dietOption) 2 | % this function constrain GEM using the dietOption. 3 | %inputs: 4 | % model: metabolic Model with COBRA or RAVEN format, 5 | % dietOption: 1 to 5 (% 1:high Fibre Plant Based, 2:high Fibre omnivore, 3:high Protein Plant based 6 | % 4:high protein omnivore, 5:UK average.) 7 | %outputs: 8 | % constrainedModel: constrained metabolic Model 9 | 10 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 11 | 12 | %type of diet 13 | index=dietOption; 14 | % get path to where the MIGRENE Toolbox is located 15 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 16 | % provide the path to the diets 17 | load([MIGDIR filesep 'mat' filesep 'diets.mat']); 18 | 19 | for i =1:length(diets.rxn) 20 | value= -(diets.value(i,index)); 21 | if value ~= 0 22 | model.lb(find(strcmp(model.rxns, diets.rxn{i})))=value; 23 | end 24 | end 25 | constrainedModel=model; 26 | end -------------------------------------------------------------------------------- /Functions/GenerateMSPInformation.m: -------------------------------------------------------------------------------- 1 | function [MSPInformation]= GenerateMSPInformation(taxo,RXNDIR,model) 2 | % this function gather the taxonomy information and reaction state for bacteria. 3 | %inputs: 4 | % taxo: taxonomy profile 5 | % RXNDIR: path to where RxnState and modelforMSP for each bacterium (MSP) was saved 6 | % model: reference metabolic model with COBRA or RAVEN format, 7 | 8 | % 9 | % output: 10 | % MSPInformation: Structure includes: 11 | % taxoLevel, the taxonomy name. taxoInfo, taxonomy information for each 12 | % bacteria. taxoGroup: taxonomy group fot each bacteria. rxns, the reaction 13 | % name in reference model. bacteria, MSP IDs. BacteriaNames, species name. 14 | % RxnStateAll, the reaction state (absent/present) for each bacteria 15 | 16 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 17 | 18 | % read taxo info 19 | T = readtable(taxo); 20 | % get the bacterial species name 21 | mspNames=table2cell(T(:,1)); 22 | % provide the taxo info from bottom to top taxonomical levels (genus to 23 | % phylum level) 24 | TaxoAll=T.Properties.VariableNames; 25 | levels={'genus' 'family' 'order' 'class' 'phylum'}; 26 | Index=[]; 27 | for i=1:length(levels) 28 | index=find(ismember(TaxoAll,levels{i})); 29 | if ~isempty(index) 30 | Index=[Index index]; 31 | end 32 | end 33 | Taxo=TaxoAll(Index); % name of sorted levels 34 | if isempty(Index) 35 | error('there is no taxonomy info in the provided excel file. make sure the first row provide the taxonomy name i.e. genus to phylum') 36 | else 37 | infoFile=table2cell(T(1:end ,Index)); % taxonomy info for each species 38 | %dedicate the groups in each taxonomy level 39 | infoFile1=zeros(size(infoFile,1),size(infoFile,2)); 40 | for i=1:size(Taxo,2) 41 | [~,~,ic]=unique(infoFile(:,i)); 42 | infoFile1(:,i)=ic; 43 | end 44 | end 45 | 46 | % collect all the info in a structure 47 | MSPInformation.taxoLevel=Taxo; 48 | MSPInformation.taxoInfo=infoFile; 49 | MSPInformation.taxoGroup=infoFile1; 50 | MSPInformation.rxns=model.rxns; 51 | MSPInformation.bacteria=mspNames; 52 | 53 | index1=find(ismember(TaxoAll,'species')); 54 | if ~isempty(index1) 55 | MSPInformation.BacteriaNames=table2cell(T(1:end ,index1)); 56 | else 57 | MSPInformation.BacteriaNames={}; 58 | end 59 | 60 | % all the RxnStates were collected from reactionProfile directory 61 | MSPInformation.RxnStateAll=[]; 62 | for i =1:numel(MSPInformation.bacteria) 63 | if exist([RXNDIR filesep MSPInformation.bacteria{i} '.mat']) 64 | load ([RXNDIR filesep MSPInformation.bacteria{i} '.mat'],'RxnState'); 65 | if ~isempty(ismember(model.rxns, 'Biomass_Bacteria')) 66 | RxnState(find(strcmp(model.rxns, 'Biomass_Bacteria')))=1; 67 | else 68 | matchStr = regexp(lower(model.rxns),'biomass','match'); 69 | RxnState(find(not(cellfun('isempty',matchStr))))=1; 70 | end 71 | MSPInformation.RxnStateAll=[MSPInformation.RxnStateAll RxnState]; 72 | else 73 | error(['there is no information for ' MSPInformation.bacteria{i} '. please check the dedicated directory. Besides, you might not generate it by MetagemenomeToReactions function']) 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /Functions/KeepNecessaryRxns.m: -------------------------------------------------------------------------------- 1 | function reducedModelTemp = KeepNecessaryRxns(model, score, threshold, min) 2 | %inputs: 3 | % model: metabolic model with COBRA or RAVEN format. 4 | % score: a numeric vector that shows the score of each reaction in the model 5 | % min: minimum fraction of objective 6 | %outputs: 7 | % reducedModelTemp: reference model with the bacterial genes and gene rules 8 | 9 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 10 | [tempModel,~,IndexRev2irrev,IndexIrrev2rev] = convertToIrreversible(model); 11 | expressionRxnsIrrev = zeros(length(tempModel.rxns),1); 12 | for i1=1:length(tempModel.rxns) 13 | expressionRxnsIrrev(i1,1) = score(IndexIrrev2rev(i1,1),1); 14 | end 15 | expressionRxnsIrrev(find(expressionRxnsIrrev==-1))=0; 16 | cc=optimizeCbModel(model); 17 | tempModel.lb(find(tempModel.c ==1),1)=cc.f*min; % minimum fraction of objective 18 | tempModel.c(:,1)=0; 19 | for i1=1:length(tempModel.rxns) 20 | if (expressionRxnsIrrev(i1,1) < threshold) 21 | tempModel.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight 22 | end 23 | end 24 | gimmeSolution = optimizeCbModel(tempModel,'min'); 25 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1); 26 | if (gimmeSolution.stat ~= 1) 27 | reactionScoreTransition(:,1) = 0; 28 | end 29 | reactionScoreTransition(find(gimmeSolution.x>0),1)=1; 30 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1; 31 | %Translate reactionActivity to reversible model 32 | reactionScoreRev = zeros(length(model.rxns),1); 33 | for i=1:length(model.rxns) 34 | temp1=IndexRev2irrev{i,1}'; 35 | for j=1:length(temp1) 36 | if reactionScoreTransition(temp1(j)) > 0 37 | reactionScoreRev(i,1) = reactionScoreTransition(temp1(j)); 38 | end 39 | end 40 | end 41 | rxn2remove = model.rxns(reactionScoreRev == 0); 42 | reducedModelTemp = removeRxns(model,rxn2remove); 43 | end 44 | -------------------------------------------------------------------------------- /Functions/MakeCommunity.m: -------------------------------------------------------------------------------- 1 | function [report]= MakeCommunity(modelList,PathToModels,abundance,sampleName,PathToSave,biomass) 2 | %inputs: 3 | % modelList: list of model names. 4 | % PathToModels: a structure includes the path where the models are available 5 | % and the name of model assigned in the .mat files 6 | % abundance: matrix of microbiome (MSP) abundance profile 7 | % sampleName: list of sample names 8 | % PathToSave: a string showing the directory to save generated community models 9 | %OPTIONAL INPUT 10 | % biomass: a structure that specify the metabolite ID and exchange reaction name for biomass 11 | %outputs: 12 | % report: shows generated community models for samples 13 | 14 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidhkori@kcl.ac.uk 15 | if nargin<6 16 | biomass={}; 17 | end 18 | index=[]; 19 | for h1 = 1:size(modelList,1) 20 | if exist([PathToModels.path filesep modelList{h1} '.mat']) 21 | index=[index;h1]; 22 | end 23 | end 24 | modelList=modelList(index,:); 25 | abundance=abundance(index,:); 26 | %keep the bacteria with at leaset one nonzero observation 27 | abundance=abundance(sum(abundance,2)~=0,:); 28 | modelList=modelList(sum(abundance,2)~=0,:); 29 | %keep the sample with at leaset one nonzero observation 30 | abundance=abundance(:,sum(abundance,1)~=0); 31 | sampleName=sampleName(sum(abundance,1)~=0); 32 | exchangeMetabolites={}; 33 | report={}; 34 | for h1 = 1:size(modelList,1) 35 | load([PathToModels.path filesep modelList{h1}],PathToModels.name) 36 | model=eval(PathToModels.name); 37 | models{h1,1}=model; 38 | exchangeMets=GetExchangeMetabolite(model); 39 | exchangeMetabolites=vertcat(exchangeMetabolites,exchangeMets); 40 | end 41 | exchangeMetabolites=unique(exchangeMetabolites); 42 | mets_art=exchangeMetabolites; 43 | mets_art(:,2) = strrep(mets_art(:,1), '[e]', '[lu]'); 44 | mets_art(:,3) = strrep(mets_art(:,1), '[e]', '[fo]'); 45 | mets_art(:,4) = strrep(mets_art(:,1), '[e]', '[fe]'); 46 | reactions={}; 47 | for h1=1:length(models) 48 | temp=models{h1}; 49 | indexEx=strfind(temp.rxns,'Ex_'); 50 | IndexEx = find(not(cellfun('isempty',indexEx))); 51 | rxn=temp.rxns(IndexEx,1); 52 | rxn(:,2)=printRxnFormula(temp,rxn); 53 | reactions=vertcat(reactions,rxn); 54 | end 55 | [~,idx]=unique(strcat(reactions(:,1), 'rows')); 56 | reactions1=reactions(idx,:) 57 | 58 | Ex_art=[] 59 | for h1=1:size(mets_art,1) 60 | IndexC = strfind(reactions1(:,2),mets_art{h1,1}); 61 | Index = find(not(cellfun('isempty',IndexC))); 62 | Ex_art{h1,1}=reactions1{Index,1} 63 | end 64 | 65 | Ex_art(:,2) = strrep(Ex_art(:,1), 'Ex_', 'FoEx_'); 66 | Ex_art(:,3) = strrep(Ex_art(:,1), 'Ex_', 'Fo_'); 67 | Ex_art(:,4) = strrep(Ex_art(:,1), 'Ex_', 'Fe_'); 68 | Ex_art(:,5) = strrep(Ex_art(:,1), 'Ex_', 'FeEx_'); 69 | [fakemodel, fakemodel1]=fakeModelGenerator(mets_art,Ex_art); 70 | relativeAbundance=abundance; 71 | if any(sum(abundance,1)~=1) 72 | SumOfAbun=sum(abundance,1) 73 | for i=1:length(sampleName) 74 | relativeAbundance(:,i)=abundance(:,i)/SumOfAbun(i); 75 | end 76 | end 77 | abundance=relativeAbundance; 78 | 79 | for h1=1:numel(sampleName) 80 | abundanceS=abundance(:,h1) 81 | modelListN=modelList(find(abundanceS ~= 0)); 82 | abundanceN=abundanceS(find(abundanceS ~= 0)); 83 | models={}; 84 | for h11 = 1:size(modelListN,1) 85 | load([PathToModels.path filesep modelListN{h11}],PathToModels.name) 86 | model=eval(PathToModels.name); 87 | minLB=min(model.lb); 88 | %remove the constraint from the model 89 | model.lb(find(model.lb > minLB & model.lb < 0))=-1000; 90 | models{h11,1}=model 91 | end 92 | 93 | modelList1=strcat(modelListN, '_'); 94 | modelsM={}; 95 | for h2 = 1:size(modelListN,1) 96 | model=models{h2} 97 | exchangeCom=intersect(model.rxns, fakemodel1.rxns); 98 | fakemodelS = removeRxns(fakemodel1,setdiff(fakemodel1.rxns,exchangeCom)) 99 | model = removeRxns(model,Ex_art(:,1)); 100 | model= mergeTwoModels(model, fakemodelS, 1, false); 101 | if ~isempty(biomass) 102 | model.lb(find(strcmp(model.rxns,biomass.EXrxn)))=0; 103 | else 104 | model.lb(find(strcmp(model.rxns,'Ex_Biomass')))=0; 105 | end 106 | model.rxns = strcat(modelList1{h2, 1}, model.rxns); 107 | Lumen=model.mets(find(cellfun('isempty',strfind(model.mets,'ee[lu]')))) 108 | if ~isempty(biomass) 109 | Lumen=union(Lumen,biomass.mets) 110 | else 111 | Lumen=union(Lumen,'cpd11416ee[lu]') 112 | end 113 | model.mets(find(ismember(model.mets,Lumen)))=strcat(modelList1{h2, 1}, model.mets(find(ismember(model.mets,Lumen)))) 114 | modelsM{h2,1}=model; 115 | end 116 | 117 | merged=mergeTwoModels(modelsM{1}, modelsM{2}, 1, false) 118 | for i = 3:size(modelsM,1) 119 | merged= mergeTwoModels(merged, modelsM{i}, 1, false); 120 | end 121 | mergedModelS=mergeTwoModels(merged, fakemodel, 1, false) 122 | 123 | % make a global biomass including the biomasses of the bacteria in the community 124 | if ~isempty(biomass) 125 | BiomassAll=mergedModelS.mets(find(~(cellfun('isempty',strfind(mergedModelS.mets,biomass.mets))))); 126 | else 127 | BiomassAll=mergedModelS.mets(find(~(cellfun('isempty',strfind(mergedModelS.mets,'cpd11416ee[lu]'))))); 128 | end 129 | biomassmodel.mets=BiomassAll; 130 | biomassmodel.rxns={'BiomassAll'}; 131 | biomassmodel.lb=0.1; 132 | biomassmodel.ub=1; 133 | biomassmodel.S=zeros(numel(biomassmodel.mets),numel(biomassmodel.rxns)); 134 | if ~isempty(biomass) 135 | biomassmodel.S(find(strcmp(biomassmodel.mets,biomass.mets)))=1; 136 | else 137 | biomassmodel.S(find(strcmp(biomassmodel.mets,'cpd11416ee[lu]')))==1; 138 | end 139 | % add bacterial abundance as Stoichiometric Coefficients into the global biomass 140 | for w12 =1:numel(biomassmodel.mets) 141 | mgs1 = strrep(biomassmodel.mets{w12}, '_cpd11416ee[lu]', ''); 142 | value=abundanceN(find(strcmp(modelListN,mgs1))); 143 | if ~isempty(value) 144 | biomassmodel.S(w12,1)=-(value); 145 | end 146 | end 147 | % remove the FoEx_ and Fo_ global biomass 148 | if ~isempty(biomass) 149 | temp3=strrep(biomass.EXrxn, 'Ex_', ''); 150 | else 151 | temp3=strrep('Ex_Biomass', 'Ex_', ''); 152 | end 153 | Toremove={strcat('FoEx_',temp3) strcat('Fo_',temp3)} 154 | mergedModelS = removeRxns(mergedModelS,Toremove); 155 | %add the global biomass to the community model 156 | PmergedModel=mergeTwoModels(mergedModelS,biomassmodel, 1, false); 157 | PmergedModel.c(:,1)=0; 158 | PmergedModel.c(find(strcmp(PmergedModel.rxns,'BiomassAll')))=1; 159 | PmergedModel1=PmergedModel; 160 | 161 | for jjj=1:numel(modelListN) 162 | RXNs=PmergedModel1.rxns(find(~(cellfun('isempty',strfind(PmergedModel1.rxns,modelListN{jjj}))))); 163 | ExRXN=PmergedModel1.rxns(find(~(cellfun('isempty',strfind(PmergedModel1.rxns,'Biomass_Bacteria'))))); 164 | ExRXN=intersect(ExRXN,RXNs); 165 | PmergedModel1=coupleRxnList2Rxn(PmergedModel1,RXNs,ExRXN); 166 | jjj 167 | end 168 | save([PathToSave filesep sampleName{h1} '.mat'],'PmergedModel1','PmergedModel') 169 | report{h1,1}=sampleName{h1} 170 | report{h1,2}=true 171 | end 172 | end 173 | 174 | function exchangeMets=GetExchangeMetabolite(model) 175 | indexEx=strfind(model.rxns,'Ex_'); 176 | IndexEx = find(not(cellfun('isempty',indexEx))); 177 | S=model.S(:,IndexEx); 178 | exchangeMets=model.mets(find(any(S,2))); 179 | end 180 | -------------------------------------------------------------------------------- /Functions/MetaGenomicsReactionScore.m: -------------------------------------------------------------------------------- 1 | function [reactionScore, threshold] = MetaGenomicsReactionScore(BacteriaInformation) 2 | % this function calculate reaction score and threshold for gap filling. 3 | %inputs: 4 | % BacteriaInformation: a structure includes: 5 | % taxoLevel, the taxonomy name. taxoInfo, taxonomy information for each 6 | % bacteria. taxoGroup: taxonomy group fot each bacteria. rxns, the reaction 7 | % name in reference model. bacteria, MSP IDs. BacteriaNames, species name. 8 | % RxnStateAll, the reaction state (absent/present) for each bacteria. 9 | % species, names of species 10 | % 11 | % output: 12 | % reactionScore: a matrix includes 3 different scores for each reaction. 13 | % threshold: includes a threshold for each specified taxonomy level. 14 | 15 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 16 | 17 | index=find(strcmp(BacteriaInformation.bacteria, BacteriaInformation.species)); 18 | BacteriaInformation.value=BacteriaInformation.RxnStateAll(:,index); 19 | 20 | BacteriaInformation.RxnStateAll(find(BacteriaInformation.RxnStateAll <0))=0; 21 | ScoreMatrix=[]; 22 | for j=1:size(BacteriaInformation.taxoLevel,2) 23 | s=BacteriaInformation.taxoGroup(index,j); 24 | s1=char(BacteriaInformation.taxoInfo(index,j)); 25 | IndexC = isempty(strfind(s1,'unclassified')); 26 | if IndexC 27 | group=find(BacteriaInformation.taxoGroup(:,j)== s); 28 | expression_Group= BacteriaInformation.RxnStateAll(:,group); 29 | % calculate the freq of each reaction for the taxonomy level 30 | ScoreMatrix(:,j) = (sum(expression_Group,2))/size(expression_Group,2); 31 | else 32 | ScoreMatrix(:,j)=zeros(size(BacteriaInformation.expressionset,1),1); 33 | end 34 | end 35 | 36 | ScoreMatrix(:,j+1)=(sum(ScoreMatrix,2))/size(ScoreMatrix,2); 37 | t1=sort(ScoreMatrix,1,'descend'); 38 | for j=1:size(ScoreMatrix,2)-1 39 | if sum(t1(:,j))>0 40 | threshold(1,j)=t1(find(t1(:,j)==0,1, 'first')-1,end); 41 | else 42 | threshold(1,j)=0; 43 | end 44 | end 45 | 46 | reactionScore(:,1)=ScoreMatrix(:,end); 47 | reactionScore(:,2)=BacteriaInformation.value; 48 | reactionScore(:,end+1)=sum(reactionScore,2); 49 | reactionScore(find(reactionScore(:,2)==-1),end)=-1; 50 | reactionScore(find(reactionScore(:,end)> 1),end)=1; 51 | 52 | end 53 | -------------------------------------------------------------------------------- /Functions/MetagenomeToReactions.m: -------------------------------------------------------------------------------- 1 | function [RxnState, MSPmodel]= MetagemenomeToReactions(model,metagenomeData) 2 | % creates a reaction state for each species based on absent/present genes in MSP into gut 3 | % reference model and filter genes and gene rules in reference model for each species 4 | %inputs: 5 | % model: reference metabolic Model with COBRA or RAVEN format. 6 | % metagenomeData: a structure contains two fields "gene" and "value", includes gene name and 7 | % value, respectively. 8 | %outputs: 9 | % MSPmodel: reference model with the bacterial genes and gene rules 10 | % RxnState: a vector showing the state of the reaction (zero or one) for the MSP 11 | 12 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 13 | 14 | 15 | if isfield(model,'rules') && ~isfield(model,'grRules') 16 | model.grRules=cell([numel(model.rxns) 1]); 17 | for h=1:length(model.rules) 18 | if ~isempty(model.rules{h,1}) 19 | matchStr=regexp(model.rules{h,1},'\d*','match'); 20 | collect={}; 21 | for k=1:length(matchStr) 22 | index=str2num(matchStr{k}); 23 | converted=model.genes{index,1}; 24 | collect=vertcat(collect,converted); 25 | end 26 | if length(matchStr) >1 27 | model.grRules{h,1}=['(' strjoin(unique(collect),' or ') ')']; % generating model.grRules 28 | else 29 | model.grRules{h,1}=strjoin(unique(collect),' or '); 30 | end 31 | else 32 | model.grRules(h,1)={''}; 33 | end 34 | end 35 | elseif ~isfield(model,'rules') && ~isfield(model,'grRules') 36 | error('Either model.rules or model.grRules would be defined in the model. please provide one of them') 37 | end 38 | 39 | presentGenes=metagenomeData.gene(find(metagenomeData.value == 1)); 40 | 41 | RxnState=zeros(length(model.rxns),1); 42 | 43 | RxnState(find(cellfun('isempty',model.grRules)),1)=-1; 44 | % tic 45 | % for i=1:length(presentGenes) 46 | % matchStr = regexp(model.grRules,presentGenes{i},'match'); 47 | % indexx=find(not(cellfun('isempty',matchStr))); 48 | % RxnState(indexx,1)=1; 49 | % end 50 | % toc 51 | model.grRules=strrep(model.grRules,'( ',''); 52 | model.grRules=strrep(model.grRules,'(',''); 53 | model.grRules=strrep(model.grRules,') ',''); 54 | model.grRules=strrep(model.grRules,')',''); 55 | 56 | totalgenes={}; 57 | for i=1:length(model.grRules) 58 | temp1=model.grRules{i,1}; 59 | if ~isempty(temp1) 60 | genes=strsplit(temp1,' or '); 61 | inter=intersect(genes,presentGenes); 62 | if ~isempty(inter) 63 | RxnState(i,1)=1; 64 | end 65 | if length(inter) ==1 66 | model.grRules(i,1)= inter(1,1); 67 | elseif length(inter)>1 68 | model.grRules{i,1}=['(' strjoin(unique(inter),' or ') ')']; 69 | else 70 | model.grRules{i,1}=''; 71 | end 72 | totalgenes=vertcat(totalgenes,inter); 73 | end 74 | end 75 | 76 | model.genes=unique(totalgenes); 77 | model.geneNames=model.genes; 78 | 79 | temp1=model.grRules; 80 | temp1=strrep(temp1,'( ',''); 81 | temp1=strrep(temp1,'(',''); 82 | temp1=strrep(temp1,') ',''); 83 | temp1=strrep(temp1,')',''); 84 | for h=1:length(temp1) 85 | if ~isempty(temp1{h,1}) 86 | tra2=strsplit(temp1{h,1},' or ')'; 87 | collect={}; 88 | for k=1:length(tra2) 89 | index=find(strcmp(model.genes,tra2{k})) ; 90 | converted=['x(' num2str(index) ')']; 91 | collect=vertcat(collect,converted); 92 | end 93 | if length(tra2) >1 94 | model.rules{h,1}=['(' strjoin(unique(collect),' | ') ')']; % generating genericModel.rules 95 | else 96 | model.rules{h,1}= collect{1,1}; 97 | end 98 | else 99 | model.rules(h,1)={''}; 100 | end 101 | end 102 | 103 | 104 | if isfield(model,'rxnGeneMat') 105 | model=rmfield(model,'rxnGeneMat'); 106 | end 107 | 108 | MSPmodel=model; 109 | end 110 | 111 | -------------------------------------------------------------------------------- /Functions/ReactionAbundanceGenerator.m: -------------------------------------------------------------------------------- 1 | function [reactionRelativeAbun, rxnAbunPerSample]= ReactionAbundanceGenerator(modelList,PathToModels,abundance,sampleName) 2 | %inputs: 3 | % modelList: list of model names. 4 | % PathToModels: a structure includes the path where the models are available 5 | % and the name of model assigned in the .mat files 6 | % abundance: matrix of microbiome (MSP) abundance profile 7 | % sampleName: list of sample names 8 | %outputs: 9 | % reactionRelativeAbun relative reaction abundance 10 | % rxnAbunPerSample reaction abundance 11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 12 | 13 | index=[]; 14 | for h1 = 1:size(modelList,1) 15 | if exist([PathToModels.path filesep modelList{h1} '.mat']) 16 | index=[index;h1]; 17 | end 18 | end 19 | modelList=modelList(index,:); 20 | abundance=abundance(index,:); 21 | c={}; 22 | for h1 = 1:size(modelList,1) 23 | load([PathToModels.path filesep modelList{h1}],PathToModels.name) 24 | model=eval(PathToModels.name); 25 | models{h1,1}=model; 26 | c=vertcat(c,model.rxns); 27 | end 28 | rxns=unique(c); 29 | compare=zeros(numel(rxns),numel(modelList)); 30 | for w1=1:numel(modelList) 31 | model=models{w1,1}; 32 | compare(find(ismember(rxns,model.rxns)),w1)=1; 33 | end 34 | abundance1=abundance'; 35 | FinalAbundance=[]; 36 | for i= 1:numel(sampleName) 37 | abun1=abundance1(i,:); 38 | compare1=compare; 39 | for j=1:numel(abun1) 40 | compare1(:,j)=compare1(:,j)*abun1(:,j); 41 | end 42 | FinalAbundance(:,i)=sum(compare1,2); 43 | end 44 | 45 | SumAbundancy=sum(FinalAbundance); 46 | reactionRelativeAbun=[]; 47 | for j=1:numel(SumAbundancy) 48 | reactionRelativeAbun(:,j)=FinalAbundance(:,j)/SumAbundancy(:,j); 49 | end 50 | reactionRelativeAbun = array2table(reactionRelativeAbun); 51 | reactionRelativeAbun.Properties.VariableNames = sampleName; 52 | reactionRelativeAbun=[array2table(rxns) reactionRelativeAbun]; 53 | 54 | rxnAbunPerSample=zeros(size(FinalAbundance)); 55 | for i=1:size(FinalAbundance,2) 56 | x = FinalAbundance(:,i); 57 | minVal = min(x); 58 | maxVal = max(x); 59 | if minVal==maxVal 60 | rxnAbunPerSample(:,i)=0; 61 | else 62 | rxnAbunPerSample(:,i) = (x - minVal) / ( maxVal - minVal); 63 | end 64 | end 65 | rxnAbunPerSample = array2table(rxnAbunPerSample); 66 | rxnAbunPerSample.Properties.VariableNames = sampleName; 67 | rxnAbunPerSample=[array2table(rxns) rxnAbunPerSample]; 68 | end -------------------------------------------------------------------------------- /Functions/ReactobiomeGenerator.m: -------------------------------------------------------------------------------- 1 | function [countPerFive]= CPFGenerator(modelList,PathToModels,abundance,sampleName) 2 | %inputs: 3 | % modelList: list of model names. 4 | % PathToModels: a structure includes the path where the models are available 5 | % and the name of model assigned in the .mat files 6 | % abundance: matrix of microbiome (MSP) abundance profile 7 | % sampleName: list of sample names 8 | %outputs: 9 | % countPerFive: reactobiome profile 10 | 11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 12 | index=[]; 13 | for h1 = 1:size(modelList,1) 14 | if exist([PathToModels.path filesep modelList{h1} '.mat']) 15 | index=[index;h1]; 16 | end 17 | end 18 | modelList=modelList(index,:); 19 | abundance=abundance(index,:); 20 | c={}; 21 | for h1 = 1:size(modelList,1) 22 | load([PathToModels.path filesep modelList{h1}],PathToModels.name) 23 | model=eval(PathToModels.name); 24 | models{h1,1}=model; 25 | c=vertcat(c,model.rxns); 26 | end 27 | rxns=unique(c); 28 | compare=zeros(numel(rxns),numel(modelList)); 29 | for w1=1:numel(modelList) 30 | model=models{w1,1}; 31 | compare(find(ismember(rxns,model.rxns)),w1)=1; 32 | end 33 | binary=abundance; 34 | binary(find(binary>0))=1; 35 | binary1=binary'; 36 | count_rxnstions=[]; 37 | for i= 1:numel(sampleName) 38 | abun=binary1(i,:); 39 | compare2=compare; 40 | for j=1:numel(abun) 41 | compare2(:,j)=compare2(:,j)*abun(:,j); 42 | end 43 | count_rxnstions(:,i)=sum(compare2,2); 44 | end 45 | 46 | 47 | count_rxnstionsNor=zeros(size(count_rxnstions)); 48 | biomassCount=count_rxnstions(find(strcmp(rxns, 'Biomass_Bacteria')),:); 49 | if ~isempty(biomassCount) 50 | for i=1:size(count_rxnstionsNor,2) 51 | count_rxnstionsNor(:,i)=count_rxnstions(:,i)*500/biomassCount(1,i); 52 | end 53 | else 54 | for i=1:size(count_rxnstionsNor,2) 55 | count_rxnstionsNor(:,i)=count_rxnstions(:,i)*500/max(count_rxnstions(:,i)); 56 | end 57 | end 58 | count_rxnstionsNor = array2table(count_rxnstionsNor); 59 | count_rxnstionsNor.Properties.VariableNames = sampleName; 60 | countPerFive=[array2table(rxns) count_rxnstionsNor]; 61 | 62 | end -------------------------------------------------------------------------------- /Functions/RxnRichnessGenerator.m: -------------------------------------------------------------------------------- 1 | function [richness]= RxnRichnessGenerator(modelList,PathToModels,abundance,sampleName) 2 | %inputs: 3 | % modelList: list of model names. 4 | % PathToModels: a structure includes the path where the models are available 5 | % and the name of model assigned in the .mat files 6 | % abundance: matrix of microbiome (MSP) abundance profile 7 | % sampleName: list of sample names 8 | %outputs: 9 | % richness: gut microbiome reaction composition (reaction richness) of all individuals 10 | 11 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 12 | 13 | index=[]; 14 | for h1 = 1:size(modelList,1) 15 | if exist([PathToModels.path filesep modelList{h1} '.mat']) 16 | index=[index;h1]; 17 | end 18 | end 19 | modelList=modelList(index,:); 20 | abundance=abundance(index,:); 21 | binary=abundance; 22 | binary(find(binary>0))=1; 23 | rxnTemp={}; 24 | for h1 = 1:size(modelList,1) 25 | load([PathToModels.path filesep modelList{h1}],PathToModels.name) 26 | model=eval(PathToModels.name); 27 | rxnTemp=vertcat(rxnTemp,model.rxns); 28 | end 29 | rxns=unique(rxnTemp); 30 | temporary=zeros(numel(rxns),numel(modelList)); 31 | for w1=1:numel(modelList) 32 | load([PathToModels.path filesep modelList{w1}],PathToModels.name) 33 | model=eval(PathToModels.name); 34 | temporary(find(ismember(rxns,model.rxns)),w1)=1; 35 | end 36 | binary1=binary'; 37 | rxnsBinary=[]; 38 | for i= 1:numel(sampleName) 39 | abun=binary1(i,:); 40 | temporary2=temporary; 41 | for j=1:numel(abun) 42 | temporary2(:,j)=temporary2(:,j)*abun(:,j); 43 | end 44 | rxnsBinary(:,i)=any(temporary2 ==1,2); 45 | end 46 | 47 | richness = sum(rxnsBinary,1); 48 | richness=table(sampleName',richness'); 49 | richness.Properties.VariableNames = {'sampleName','rxn_richness'}; 50 | 51 | end 52 | -------------------------------------------------------------------------------- /Functions/USDAcreatingDiet.m: -------------------------------------------------------------------------------- 1 | function[micronutrients_diet_mmol, macronutrients_diet]= USDAcreatingDiet(food_id_item,food_grams_item) 2 | % Input: 3 | % food_grams_item: the weight (in grams) for the food item 4 | % food_id_item: the ID of the food item (specified by USDA food ID) 5 | % NOTE: the food_grams_item and food_id_item need to be in the same order. 6 | % Also the food_grams_item needs to be specified in grams, so e.g 1.5 kg needs to be 1500 (g). 7 | 8 | %output: 9 | % macronutrients_diet: total amount of macronutrients for the created diet 10 | % micronutrients_diet_mmol: total amount of micronutrients for the created diet in mmol/gDW 11 | 12 | % #Authors Bouchra Ezzamouri. 13 | 14 | % get path to where the MIGRENE Toolbox is located 15 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 16 | %load USDA dataset from MIGRENE Toolbox. 17 | USDA=[MIGDIR filesep 'mat' filesep 'USDAdataset.mat']; 18 | load(USDA) 19 | 20 | % find the common IDs in USDA dataset 21 | food_id_members = ismember(food_item_USDA_id,food_id_item);%--> from the USDA foods (is a list of 8463 x 1) it will check if the specified input of the food_id_item is found the USDA food list. If so it is 1 otherwise 0 22 | food_macros = query_food_item_macros_values_1gDW(:,food_id_members==1); %--> from the list of food_id_members if it is equal to 1 (so the food is in the list) then we want the macro values from the matrix query_food_item_macros_values_1gDW 23 | food_micros_mmolgDW = query_food_item_micros_mmol_gDW(:,food_id_members==1); % obtaining micronutrients from the matrix for the food of interest in mmol gDW 24 | 25 | %output specifying matrix with a size of macronutrients/micronutrients x food items that were specified by the user. 26 | food_macros_diet = zeros(size(food_macros,1),size(food_id_item,1)); 27 | food_macros = transpose(food_macros); 28 | food_macros_diet = transpose(food_macros_diet); 29 | 30 | food_micros_mmolgDW_diet= zeros(size(food_micros_mmolgDW,1),size(food_id_item,1)); 31 | food_micros_mmolgDW = transpose(food_micros_mmolgDW); 32 | food_micros_mmolgDW_diet= transpose(food_micros_mmolgDW_diet); 33 | 34 | % for every food that the user wants in the diet multiply by the amount in 35 | % grams that the user wants for that specific food. 36 | for i =1:length(food_id_item) 37 | food_macros_diet(i,:) = food_macros(i,:) * food_grams_item(i); 38 | food_micros_mmolgDW_diet(i,:) = food_micros_mmolgDW(i,:) * food_grams_item(i) ; 39 | end 40 | 41 | % the output will be a list of total macros in gDW and micronutrients (in gDW and mmol) for the specified diet 42 | total_food_macros_diet = transpose(sum(food_macros_diet,1)); 43 | total_food_micro_mmolgDW_diet= transpose(sum(food_micros_mmolgDW_diet,1)); 44 | 45 | micronutrients_diet_mmol = table(mets_USDA_name,(total_food_micro_mmolgDW_diet)); 46 | macronutrients_diet = table(macros_USDA_name,(total_food_macros_diet)); 47 | end 48 | -------------------------------------------------------------------------------- /Functions/checkCatalog.m: -------------------------------------------------------------------------------- 1 | function [catalogFileChecked]= checkCatalog(inputFile,numWorkers) 2 | %inputs: 3 | % inputFile: cell array contains two columns, first column is gene name and 4 | % second column provides KO annotation 5 | % numWorkers integer indicating the number of cores to use for parallelization 6 | 7 | %outputs: 8 | % catalogFileChecked: cell array contains two columns, first column is gene name and 9 | % second column provides KO annotation 10 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 11 | 12 | % check the availability of KO annotation for each gene 13 | 14 | expression = 'K\d\d\d\d\d'; 15 | matchStr = regexp(inputFile(:,2),expression,'match'); 16 | Index = find(not(cellfun('isempty',matchStr))); 17 | if isempty(Index) 18 | error('the catalog is not annotated by KO or if KO-annotated, check the format of inputFile') 19 | end 20 | 21 | % genes can be annotated to more than one KO. Here, it splits the 22 | % annotation column and rearrange the catalog to long format 23 | % including repeated genes with one KO annotation in each row 24 | 25 | % find the rows with one and more than one KO annotation 26 | index=strfind(inputFile(:,2),'K'); 27 | NumberOfKO = cellfun('length',index); 28 | IndexAboveOne= find(NumberOfKO > 1); 29 | IndexOne= find(NumberOfKO == 1); 30 | 31 | Index = find(not(cellfun('isempty',index))); 32 | if ~isempty(Index) 33 | % make a subset of gene catalog including rows with more than one KO annotation . 34 | if ~isempty(IndexAboveOne) 35 | output_1=inputFile(IndexAboveOne,:); 36 | end 37 | % make a subset of gene catalog including rows with one KO linked. 38 | if ~isempty(IndexOne) 39 | output_2=inputFile(IndexOne,:); 40 | end 41 | end 42 | 43 | % check the number of workers for parallelization 44 | if numWorkers > 1 45 | poolobj = gcp('nocreate'); 46 | if isempty(poolobj) 47 | parpool(numWorkers); 48 | end 49 | else 50 | disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers') 51 | end 52 | % convert the subset of gene catalog with several annotated genes to long format. 53 | if ~isempty(IndexAboveOne) 54 | transition2={}; 55 | parfor w=1:size(output_1,1) 56 | transition1=output_1{w,2}; 57 | expression = 'K\d\d\d\d\d'; 58 | matchStr = regexp(transition1,expression,'match'); 59 | transition2{w,1}=matchStr; 60 | end 61 | elseif ~isempty(IndexOne) 62 | catalogFileChecked=output_2; 63 | end 64 | output_1updated=cell([0 2]); 65 | tic 66 | if ~isempty(IndexAboveOne) 67 | parfor w=1:size(transition2,1) 68 | transition3={}; 69 | transition1=transition2{w}; 70 | transition3(:,2)=transition1'; 71 | transition3(:,1)=output_1(w,1); 72 | output_1updated=vertcat(output_1updated,transition3); 73 | end 74 | end 75 | toc 76 | 77 | if ~isempty(IndexAboveOne) & ~isempty(IndexOne) 78 | % Concatenate the two arrays vertically to make a catalog file 79 | catalogFileChecked=vertcat(output_2,output_1updated); 80 | elseif ~isempty(IndexAboveOne) & isempty(IndexOne) 81 | catalogFileChecked=output_1updated; 82 | end -------------------------------------------------------------------------------- /Functions/contextSpecificModelGenertion.m: -------------------------------------------------------------------------------- 1 | function [contextSpecificModel] = contextSpecificModelGenertion(model,metagenomeset,threshold,bibliome) 2 | % generates species-specific model based on reaction score, threshold and mind the gap. 3 | %inputs: 4 | % model: reference metabolic model in COBRA or RAVEN format. 5 | % metagenomeset: a matrix includes 3 different scores for each reaction. 6 | % threshold: includes a threshold for each specified taxonomy level. 7 | %OPTIONAL INPUTS: 8 | % bibliome: any bibliome data on phenotypic features of the species. 9 | % as structure with four fields: 10 | % "bacteria" a cell array listing the name of the bacteria; "rxn" list the 11 | % name of the reactions having bibliome; "value" a matrix of numbers: zero 12 | % means no information, 1 means consumed, 2 means produced, -1 not-consumed 13 | % and -2 means not-produced by the corresponding bacteria. "aerobIenfo" a 14 | % cell array provides the info that the bacteria require oxygen for growth 15 | % or not so specefiy with "aerobe" or "anaerobe" or "facultative". 16 | %outputs: 17 | % contextSpecificModel species-specific model 18 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 19 | if nargin<4 20 | bibliome={}; 21 | end 22 | % Integrating the bibliome data into the model 23 | if ~isempty(bibliome) 24 | if isfield(bibliome, 'value') && isfield(bibliome, 'rxn') 25 | %check all necessaries are provided 26 | if size(bibliome.value,2)>1 && ~isfield(bibliome, 'species') 27 | error('there are more than one column in bibliome.value and you didnt specify the name of species. please assign it by adding species field or do not dedicate bibliome as input of function') 28 | elseif size(bibliome.value,2)>1 && isfield(bibliome, 'species') 29 | % find the corresponding bibliome data for the species 30 | index=find(strcmp(bibliome.bacteria, bibliome.species)); 31 | if ~isempty(index) 32 | value=bibliome.value(:,index); 33 | else 34 | bibliome={}; 35 | end 36 | elseif size(bibliome.value,2)==1 37 | value=bibliome.value; 38 | end 39 | 40 | if ~isempty(bibliome) 41 | model1=model; 42 | % find the exchange reactions and constrain the model for consumption 43 | consumed=bibliome.rxn(find(value==1)); 44 | model1.lb(find(ismember(model1.rxns,consumed)))=-1; 45 | model1.ub(find(ismember(model1.rxns,consumed)))=0; 46 | % find the exchange reactions and constrain the model for 47 | % production 48 | produced=bibliome.rxn(find(value==2)); 49 | model1.lb(find(ismember(model1.rxns,produced)))=0.1; 50 | model1.ub(find(ismember(model1.rxns,produced)))=1000; 51 | % find the exchange reactions and constrain the model for 52 | % metabolites regarding the bibliome data 53 | notCon=bibliome.rxn(find(value==-1)); 54 | model1.lb(find(ismember(model1.rxns,notCon)))=0; 55 | notPro=bibliome.rxn(find(value==-1)); 56 | model1.ub(find(ismember(model1.rxns,notPro)))=0; 57 | % check the changes doesnt affect the functionality of the model 58 | g2=optimizeCbModel(model1); 59 | if g2.f>0 60 | disp('the bibliome info was added to the model') 61 | model=model1; 62 | else 63 | disp('the bibliome info could not be added to the model. some changes made the model nonfunctional') 64 | end 65 | end 66 | end 67 | end 68 | %check the species requires oxygen for growth 69 | if ~isempty(bibliome) && isfield(bibliome, 'aerobeInfo') 70 | if size(bibliome.aerobeInfo,1)>1 && ~isfield(bibliome, 'species') 71 | error('there are more than one row in bibliome.aerobeInfo and you didnt specify the name of species. please assign it by adding species field or do not dedicate bibliome as input of function.') 72 | elseif size(bibliome.aerobeInfo,1)>1 && isfield(bibliome, 'species') 73 | index=find(strcmp(bibliome.bacteria, bibliome.species)); 74 | obic=bibliome.aerobeInfo(index,1); 75 | else 76 | obic=bibliome.aerobeInfo; 77 | end 78 | else 79 | obic={'none'}; 80 | end 81 | 82 | %generate the specefic model 83 | if strcmp(obic,'anaerobe') 84 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0; 85 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=0; 86 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000; 87 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0; 88 | end 89 | 90 | if ~ismember(obic,{'aerobe';'anaerobe';'facultative'}) 91 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0; 92 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-10; 93 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000; 94 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0; 95 | end 96 | 97 | if strcmp(obic,'aerobe') 98 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=-1; 99 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-19.2; 100 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000; 101 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0.5; 102 | if metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3) < 1 103 | metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3)=1 104 | end 105 | end 106 | %get the closest taxonomy level 107 | s=threshold(find(threshold,1,'first')); 108 | if isempty(s) 109 | threshold=0; 110 | else 111 | threshold=s; 112 | end 113 | 114 | if ~strcmp(obic,'facultative') 115 | % collect the reactions with score from other levels of taxonomy and check 116 | % the essentiality of the reactions. 117 | rxn2remove= model.rxns(find(metagenomeset(:,3)=0),1); 118 | tissueModel= removeRxns(model,rxn2remove); 119 | cc=optimizeCbModel(tissueModel); 120 | matrix_General(1,1)=cc.f; 121 | exp=metagenomeset(:,3); 122 | TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1); 123 | ddd=setdiff(TempModel.rxns,tissueModel.rxns); 124 | ddd_general=ddd; 125 | matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns)); 126 | metagenomeset1=metagenomeset; 127 | for t=1:length(ddd) 128 | metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1; 129 | end 130 | % remove the reactions with score from other levels of taxonomy 131 | rxn2remove=model.rxns(find(metagenomeset1(:,3)=0),1) ; 132 | tissueModel= removeRxns(model,rxn2remove); 133 | 134 | exp3=metagenomeset(:,3); 135 | tissueModel1=tissueModel; 136 | %remove deadEnd reactions that are not supported by metagenomics. 137 | [~,~, removedRxns] = removeDeadEnds(tissueModel1); 138 | % exp3(:,2)=1; 139 | % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0; 140 | % a1=tissueModel1.rxns(find(exp3(:,1) <= 1)); 141 | % b1=tissueModel1.rxns(find(exp3(:,2) == 0)); 142 | % g1=intersect(a1,b1); 143 | % tissueModel1= removeRxns(tissueModel,g1); 144 | tissueModel1= removeRxns(tissueModel1,removedRxns); 145 | fba=optimizeCbModel(tissueModel1); 146 | 147 | % prunnig the reactions without GPR 148 | 149 | exp4=zeros(length(tissueModel1.rxns),1); 150 | for q=1:length(tissueModel1.rxns) 151 | if ismember(tissueModel1.rxns(q),model.rxns) 152 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3); 153 | end 154 | end 155 | tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1); 156 | 157 | AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns); 158 | 159 | % get all the reaction with score 1 excluding the transport and exchange 160 | % reactions 161 | FromMetaGenomics=model.rxns(find(exp(:,1) >= 1)); 162 | indexEx=strfind(model.rxns,'Ex'); 163 | IndexEx = find(not(cellfun('isempty',indexEx))); 164 | indexTr=strfind(model.rxns,'t_'); 165 | IndexTr = find(not(cellfun('isempty',indexTr))); 166 | indexOfTrEx=union(IndexEx,IndexTr); 167 | TrEx=model.rxns(indexOfTrEx); 168 | FromMetaGenomics=setdiff(FromMetaGenomics,TrEx); 169 | % get the dead end reactions with score 1 170 | tissueModelF5=tissueModel1; 171 | index=1:50:length(AA2); 172 | for h1=1:length(index) 173 | if h1 ~= length(index) 174 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 175 | d=AA2(index(h1):(index(h1)+49)); 176 | T1=tissueModelF5; 177 | T1= removeRxns(T1,d); 178 | [~, ~, removedRxns1] = removeDeadEnds(T1); 179 | removed=setdiff(removedRxns1,removedRxns); 180 | hhh=intersect(removed,FromMetaGenomics); 181 | if length(hhh)<10 182 | tissueModelF5=T1; 183 | else 184 | for h2=1:length(d) 185 | T1=tissueModelF5; 186 | T1= removeRxns(T1,d{h2}); 187 | [~, ~, removedRxns1] = removeDeadEnds(T1); 188 | removed=setdiff(removedRxns1,removedRxns); 189 | hhh=intersect(removed,FromMetaGenomics); 190 | if length(hhh)<10 191 | tissueModelF5=T1; 192 | end 193 | end 194 | end 195 | else 196 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 197 | d=AA2(index(h1):end); 198 | for h3=1:length(d) 199 | T1=tissueModelF5; 200 | T1= removeRxns(T1,d{h3}); 201 | [~, ~, removedRxns1] = removeDeadEnds(T1); 202 | removed=setdiff(removedRxns1,removedRxns); 203 | hhh=intersect(removed,FromMetaGenomics); 204 | if length(hhh)<10 205 | tissueModelF5=T1; 206 | end 207 | end 208 | end 209 | end 210 | 211 | exp4=zeros(length(tissueModelF5.rxns),1); 212 | for q=1:length(tissueModelF5.rxns) 213 | if ismember(tissueModelF5.rxns(q),model.rxns) 214 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3); 215 | end 216 | end 217 | 218 | AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1)); 219 | BB1=[]; 220 | for q=1:length(AA1) 221 | if ismember(AA1(q),model.rxns) 222 | BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3); 223 | else 224 | BB1(q,1)=1; 225 | end 226 | end 227 | 228 | 229 | T=table(AA1,BB1); 230 | Sort_Table = sortrows(T,'BB1'); 231 | AA2=table2cell(Sort_Table(:,1)); 232 | %prune the reactions if they are not lethal for the network and functionality of the model 233 | fba=optimizeCbModel(tissueModelF5); 234 | index=1:50:length(AA2); 235 | for h1=1:length(index) 236 | if h1 ~= length(index) 237 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 238 | d=AA2(index(h1):(index(h1)+49)); 239 | T1=tissueModelF5; 240 | T1= removeRxns(T1,d); 241 | [~, ~, removedRxns1] = removeDeadEnds(T1); 242 | removed=setdiff(removedRxns1,removedRxns); 243 | hhh=intersect(removed,FromMetaGenomics); 244 | fbaa=optimizeCbModel(T1); 245 | if length(hhh)<10 && fbaa.f > fba.f*0.1 246 | tissueModelF5=T1; 247 | else 248 | for h2=1:length(d) 249 | T1=tissueModelF5; 250 | T1= removeRxns(T1,d{h2}); 251 | [~, ~, removedRxns1] = removeDeadEnds(T1); 252 | removed=setdiff(removedRxns1,removedRxns); 253 | hhh=intersect(removed,FromMetaGenomics); 254 | fbaa=optimizeCbModel(T1); 255 | if length(hhh)<10 && fbaa.f > fba.f*0.1 256 | tissueModelF5=T1; 257 | end 258 | end 259 | end 260 | else 261 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 262 | d=AA2(index(h1):end); 263 | for h3=1:length(d) 264 | T1=tissueModelF5; 265 | T1= removeRxns(T1,d{h3}); 266 | [~, ~, removedRxns1] = removeDeadEnds(T1); 267 | removed=setdiff(removedRxns1,removedRxns); 268 | hhh=intersect(removed,FromMetaGenomics); 269 | fbaa=optimizeCbModel(T1); 270 | if length(hhh)<10 && fbaa.f > fba.f*0.1 271 | tissueModelF5=T1; 272 | end 273 | end 274 | end 275 | end 276 | 277 | contextSpecificModel=tissueModelF5; 278 | %matrix_General(1,3)=length(contextSpecificModel.rxns); 279 | %matrix_General(1,4)=length(find(exp4>=1)); 280 | %matrix_General(1,5)=length(find(exp4<=0)); 281 | %matrix_General(1,6)=matrix_General(1,3)-(matrix_General(1,5)+matrix_General(1,4)); 282 | contextSpecificModel.lb(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=0; 283 | contextSpecificModel.ub(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=1000; 284 | if exist('produced','var') 285 | contextSpecificModel.lb(find(ismember(contextSpecificModel.rxns,produced)))=0; 286 | contextSpecificModel.ub(find(ismember(contextSpecificModel.rxns,produced)))=1000; 287 | end 288 | end 289 | 290 | if strcmp(obic,'facultative') 291 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=0; 292 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=0; 293 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000; 294 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0; 295 | % collect the reactions with score from other levels of taxonomy and check 296 | % the essentiality of the reactions. 297 | rxn2remove= model.rxns(find(metagenomeset(:,3)=0),1); 298 | tissueModel= removeRxns(model,rxn2remove); 299 | cc=optimizeCbModel(tissueModel); 300 | matrix_General(1,1)=cc.f; 301 | exp=metagenomeset(:,3); 302 | TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1); 303 | ddd=setdiff(TempModel.rxns,tissueModel.rxns); 304 | ddd_general=ddd; 305 | matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns)); 306 | metagenomeset1=metagenomeset; 307 | for t=1:length(ddd) 308 | metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1; 309 | end 310 | % remove the reactions with score from other levels of taxonomy 311 | rxn2remove=model.rxns(find(metagenomeset1(:,3)=0),1) ; 312 | tissueModel= removeRxns(model,rxn2remove); 313 | 314 | exp3=metagenomeset(:,3); 315 | tissueModel1=tissueModel; 316 | %remove deadEnd reactions that are not supported by metagenomics. 317 | [~,~, removedRxns] = removeDeadEnds(tissueModel1); 318 | % exp3(:,2)=1; 319 | % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0; 320 | % a1=tissueModel1.rxns(find(exp3(:,1) <= 1)); 321 | % b1=tissueModel1.rxns(find(exp3(:,2) == 0)); 322 | % g1=intersect(a1,b1); 323 | % tissueModel1= removeRxns(tissueModel,g1); 324 | tissueModel1= removeRxns(tissueModel1,removedRxns); 325 | fba=optimizeCbModel(tissueModel1); 326 | 327 | % prunnig the reactions without GPR 328 | 329 | exp4=zeros(length(tissueModel1.rxns),1); 330 | for q=1:length(tissueModel1.rxns) 331 | if ismember(tissueModel1.rxns(q),model.rxns) 332 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3); 333 | end 334 | end 335 | tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1); 336 | 337 | AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns); 338 | 339 | % get all the reaction with score 1 excluding the transport and exchange 340 | % reactions 341 | FromMetaGenomics=model.rxns(find(exp(:,1) >= 1)); 342 | indexEx=strfind(model.rxns,'Ex'); 343 | IndexEx = find(not(cellfun('isempty',indexEx))); 344 | indexTr=strfind(model.rxns,'t_'); 345 | IndexTr = find(not(cellfun('isempty',indexTr))); 346 | indexOfTrEx=union(IndexEx,IndexTr); 347 | TrEx=model.rxns(indexOfTrEx) 348 | FromMetaGenomics=setdiff(FromMetaGenomics,TrEx); 349 | % get the dead end reactions with score 1 350 | tissueModelF5=tissueModel1; 351 | index=1:50:length(AA2); 352 | for h1=1:length(index) 353 | if h1 ~= length(index) 354 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 355 | d=AA2(index(h1):(index(h1)+49)); 356 | T1=tissueModelF5; 357 | T1= removeRxns(T1,d); 358 | [~, ~, removedRxns1] = removeDeadEnds(T1); 359 | removed=setdiff(removedRxns1,removedRxns); 360 | hhh=intersect(removed,FromMetaGenomics); 361 | if length(hhh)<10 362 | tissueModelF5=T1; 363 | else 364 | for h2=1:length(d) 365 | T1=tissueModelF5; 366 | T1= removeRxns(T1,d{h2}); 367 | [~, ~, removedRxns1] = removeDeadEnds(T1); 368 | removed=setdiff(removedRxns1,removedRxns); 369 | hhh=intersect(removed,FromMetaGenomics); 370 | if length(hhh)<10 371 | tissueModelF5=T1; 372 | end 373 | end 374 | end 375 | else 376 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 377 | d=AA2(index(h1):end); 378 | for h3=1:length(d) 379 | T1=tissueModelF5; 380 | T1= removeRxns(T1,d{h3}); 381 | [~, ~, removedRxns1] = removeDeadEnds(T1); 382 | removed=setdiff(removedRxns1,removedRxns); 383 | hhh=intersect(removed,FromMetaGenomics); 384 | if length(hhh)<10 385 | tissueModelF5=T1; 386 | end 387 | end 388 | end 389 | end 390 | 391 | exp4=zeros(length(tissueModelF5.rxns),1); 392 | for q=1:length(tissueModelF5.rxns) 393 | if ismember(tissueModelF5.rxns(q),model.rxns) 394 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3); 395 | end 396 | end 397 | 398 | AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1)); 399 | BB1=[]; 400 | for q=1:length(AA1) 401 | if ismember(AA1(q),model.rxns) 402 | BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3); 403 | else 404 | BB1(q,1)=1; 405 | end 406 | end 407 | 408 | 409 | T=table(AA1,BB1); 410 | Sort_Table = sortrows(T,'BB1'); 411 | AA2=table2cell(Sort_Table(:,1)); 412 | %prune the reactions if they dont collapse the network and functionality of the model 413 | fba=optimizeCbModel(tissueModelF5); 414 | index=1:50:length(AA2); 415 | for h1=1:length(index) 416 | if h1 ~= length(index) 417 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 418 | d=AA2(index(h1):(index(h1)+49)); 419 | T1=tissueModelF5; 420 | T1= removeRxns(T1,d); 421 | [~, ~, removedRxns1] = removeDeadEnds(T1); 422 | removed=setdiff(removedRxns1,removedRxns); 423 | hhh=intersect(removed,FromMetaGenomics); 424 | fbaa=optimizeCbModel(T1); 425 | if length(hhh)<10 && fbaa.f > fba.f*0.1 426 | tissueModelF5=T1; 427 | else 428 | for h2=1:length(d) 429 | T1=tissueModelF5; 430 | T1= removeRxns(T1,d{h2}); 431 | [~, ~, removedRxns1] = removeDeadEnds(T1); 432 | removed=setdiff(removedRxns1,removedRxns); 433 | hhh=intersect(removed,FromMetaGenomics); 434 | fbaa=optimizeCbModel(T1); 435 | if length(hhh)<10 && fbaa.f > fba.f*0.1 436 | tissueModelF5=T1; 437 | end 438 | end 439 | end 440 | else 441 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 442 | d=AA2(index(h1):end); 443 | for h3=1:length(d) 444 | T1=tissueModelF5; 445 | T1= removeRxns(T1,d{h3}); 446 | [~, ~, removedRxns1] = removeDeadEnds(T1); 447 | removed=setdiff(removedRxns1,removedRxns); 448 | hhh=intersect(removed,FromMetaGenomics); 449 | fbaa=optimizeCbModel(T1); 450 | if length(hhh)<10 && fbaa.f > fba.f*0.1 451 | tissueModelF5=T1; 452 | end 453 | end 454 | end 455 | end 456 | 457 | model_anaerobic=tissueModelF5; 458 | %matrix_General(1,3)=length(model_anaerobic.rxns); 459 | %matrix_General(1,4)=length(find(exp4>=1)); 460 | %matrix_General(1,5)=length(find(exp4<=0)); 461 | %matrix_General(1,6)=matrix_General(1,3)-(matrix_General(1,5)+matrix_General(1,4)); 462 | model.ub(find(strcmp(model.rxns, 'Ex_O2')))=-1; 463 | model.lb(find(strcmp(model.rxns, 'Ex_O2')))=-19.2; 464 | model.ub(find(strcmp(model.rxns, 'rxn08173')))=1000; 465 | model.lb(find(strcmp(model.rxns, 'rxn08173')))=0.5; 466 | if metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3) < 1 467 | metagenomeset(find(strcmp(reference_model.rxns,'rxn08173')),3)=1; 468 | end 469 | % collect the reactions with score from other levels of taxonomy and check 470 | % the essentiality of the reactions. 471 | rxn2remove= model.rxns(find(metagenomeset(:,3)=0),1); 472 | tissueModel= removeRxns(model,rxn2remove); 473 | cc=optimizeCbModel(tissueModel); 474 | matrix_General(1,1)=cc.f; 475 | exp=metagenomeset(:,3); 476 | TempModel = KeepNecessaryRxns(model, exp, 0.99, 0.1); 477 | ddd=setdiff(TempModel.rxns,tissueModel.rxns); 478 | ddd_general=ddd; 479 | matrix_General(1,2)=length(setdiff(TempModel.rxns,tissueModel.rxns)); 480 | metagenomeset1=metagenomeset; 481 | for t=1:length(ddd) 482 | metagenomeset1(find(strcmp(model.rxns, ddd{t})),3)=1; 483 | end 484 | % remove the reactions with score from other levels of taxonomy 485 | rxn2remove=model.rxns(find(metagenomeset1(:,3)=0),1) ; 486 | tissueModel= removeRxns(model,rxn2remove); 487 | 488 | exp3=metagenomeset(:,3); 489 | tissueModel1=tissueModel; 490 | %remove deadEnd reactions that are not supported by metagenomics. 491 | [~,~, removedRxns] = removeDeadEnds(tissueModel1); 492 | % exp3(:,2)=1; 493 | % exp3(find(ismember(tissueModel1.rxns,removedRxns)),2)=0; 494 | % a1=tissueModel1.rxns(find(exp3(:,1) <= 1)); 495 | % b1=tissueModel1.rxns(find(exp3(:,2) == 0)); 496 | % g1=intersect(a1,b1); 497 | % tissueModel1= removeRxns(tissueModel,g1); 498 | tissueModel1= removeRxns(tissueModel1,removedRxns); 499 | fba=optimizeCbModel(tissueModel1); 500 | 501 | % prunnig the reactions without GPR 502 | 503 | exp4=zeros(length(tissueModel1.rxns),1); 504 | for q=1:length(tissueModel1.rxns) 505 | if ismember(tissueModel1.rxns(q),model.rxns) 506 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModel1.rxns(q))),3); 507 | end 508 | end 509 | tissueModelF5 = KeepNecessaryRxns(tissueModel1, exp4, 0.0001, 0.1); 510 | 511 | AA2=setdiff(tissueModel1.rxns,tissueModelF5.rxns); 512 | 513 | % get all the reaction with score 1 excluding the transport and exchange 514 | % reactions 515 | FromMetaGenomics=model.rxns(find(exp(:,1) >= 1)); 516 | indexEx=strfind(model.rxns,'Ex'); 517 | IndexEx = find(not(cellfun('isempty',indexEx))); 518 | indexTr=strfind(model.rxns,'t_'); 519 | IndexTr = find(not(cellfun('isempty',indexTr))); 520 | indexOfTrEx=union(IndexEx,IndexTr); 521 | TrEx=model.rxns(indexOfTrEx) 522 | FromMetaGenomics=setdiff(FromMetaGenomics,TrEx); 523 | % get the dead end reactions with score 1 524 | tissueModelF5=tissueModel1 525 | index=1:50:length(AA2); 526 | for h1=1:length(index) 527 | if h1 ~= length(index) 528 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 529 | d=AA2(index(h1):(index(h1)+49)); 530 | T1=tissueModelF5; 531 | T1= removeRxns(T1,d); 532 | [~, ~, removedRxns1] = removeDeadEnds(T1); 533 | removed=setdiff(removedRxns1,removedRxns); 534 | hhh=intersect(removed,FromMetaGenomics); 535 | if length(hhh)<10 536 | tissueModelF5=T1; 537 | else 538 | for h2=1:length(d) 539 | T1=tissueModelF5; 540 | T1= removeRxns(T1,d{h2}); 541 | [~, ~, removedRxns1] = removeDeadEnds(T1); 542 | removed=setdiff(removedRxns1,removedRxns); 543 | hhh=intersect(removed,FromMetaGenomics); 544 | if length(hhh)<10 545 | tissueModelF5=T1; 546 | end 547 | end 548 | end 549 | else 550 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 551 | d=AA2(index(h1):end); 552 | for h3=1:length(d) 553 | T1=tissueModelF5; 554 | T1= removeRxns(T1,d{h3}); 555 | [~, ~, removedRxns1] = removeDeadEnds(T1); 556 | removed=setdiff(removedRxns1,removedRxns); 557 | hhh=intersect(removed,FromMetaGenomics); 558 | if length(hhh)<10 559 | tissueModelF5=T1; 560 | end 561 | end 562 | end 563 | end 564 | 565 | exp4=zeros(length(tissueModelF5.rxns),1); 566 | for q=1:length(tissueModelF5.rxns) 567 | if ismember(tissueModelF5.rxns(q),model.rxns) 568 | exp4(q,1)=metagenomeset(find(strcmp(model.rxns,tissueModelF5.rxns(q))),3); 569 | end 570 | end 571 | 572 | AA1=tissueModelF5.rxns(find(exp4<1 & exp4> -1)); 573 | BB1=[]; 574 | for q=1:length(AA1) 575 | if ismember(AA1(q),model.rxns) 576 | BB1(q,1)=metagenomeset(find(strcmp(model.rxns,AA1{q})),3); 577 | else 578 | BB1(q,1)=1; 579 | end 580 | end 581 | 582 | 583 | T=table(AA1,BB1); 584 | Sort_Table = sortrows(T,'BB1'); 585 | AA2=table2cell(Sort_Table(:,1)); 586 | %prune the reactions if they dont collapse the network and functionality of the model 587 | fba=optimizeCbModel(tissueModelF5); 588 | index=1:50:length(AA2); 589 | for h1=1:length(index) 590 | if h1 ~= length(index) 591 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 592 | d=AA2(index(h1):(index(h1)+49)); 593 | T1=tissueModelF5; 594 | T1= removeRxns(T1,d); 595 | [~, ~, removedRxns1] = removeDeadEnds(T1); 596 | removed=setdiff(removedRxns1,removedRxns); 597 | hhh=intersect(removed,FromMetaGenomics); 598 | fbaa=optimizeCbModel(T1); 599 | if length(hhh)<10 && fbaa.f > fba.f*0.1 600 | tissueModelF5=T1; 601 | else 602 | for h2=1:length(d) 603 | T1=tissueModelF5; 604 | T1= removeRxns(T1,d{h2}); 605 | [~, ~, removedRxns1] = removeDeadEnds(T1); 606 | removed=setdiff(removedRxns1,removedRxns); 607 | hhh=intersect(removed,FromMetaGenomics); 608 | fbaa=optimizeCbModel(T1); 609 | if length(hhh)<10 && fbaa.f > fba.f*0.1 610 | tissueModelF5=T1; 611 | end 612 | end 613 | end 614 | else 615 | [~, ~, removedRxns] = removeDeadEnds(tissueModelF5); 616 | d=AA2(index(h1):end); 617 | for h3=1:length(d) 618 | T1=tissueModelF5; 619 | T1= removeRxns(T1,d{h3}); 620 | [~, ~, removedRxns1] = removeDeadEnds(T1); 621 | removed=setdiff(removedRxns1,removedRxns); 622 | hhh=intersect(removed,FromMetaGenomics); 623 | fbaa=optimizeCbModel(T1); 624 | if length(hhh)<10 && fbaa.f > fba.f*0.1 625 | tissueModelF5=T1; 626 | end 627 | end 628 | end 629 | end 630 | 631 | model_aerobe=tissueModelF5; 632 | 633 | reactionInModel=union(model_anaerobic.rxns,model_aerobe.rxns); 634 | removeRxnsF=setdiff(model.rxns,reactionInModel); 635 | contextSpecificModel= removeRxns(model,removeRxnsF) 636 | contextSpecificModel.lb(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=0; 637 | contextSpecificModel.ub(find(strcmp(contextSpecificModel.rxns, 'rxn08173')))=1000; 638 | if exist('produced','var') 639 | contextSpecificModel.lb(find(ismember(contextSpecificModel.rxns,produced)))=0; 640 | contextSpecificModel.ub(find(ismember(contextSpecificModel.rxns,produced)))=1000; 641 | end 642 | end 643 | 644 | end 645 | 646 | function reducedModelTemp = KeepNecessaryRxns(model, score, threshold, min) 647 | [tempModel,~,IndexRev2irrev,IndexIrrev2rev] = convertToIrreversible(model); 648 | expressionRxnsIrrev = zeros(length(tempModel.rxns),1); 649 | for i1=1:length(tempModel.rxns) 650 | expressionRxnsIrrev(i1,1) = score(IndexIrrev2rev(i1,1),1); 651 | end 652 | expressionRxnsIrrev(find(expressionRxnsIrrev==-1))=0; 653 | cc=optimizeCbModel(model); 654 | tempModel.lb(find(tempModel.c ==1),1)=cc.f*min; % minimum fraction of objective 655 | tempModel.c(:,1)=0; 656 | for i1=1:length(tempModel.rxns) 657 | if (expressionRxnsIrrev(i1,1) < threshold) 658 | tempModel.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight 659 | end 660 | end 661 | gimmeSolution = optimizeCbModel(tempModel,'min'); 662 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1); 663 | if (gimmeSolution.stat ~= 1) 664 | reactionScoreTransition(:,1) = 0; 665 | end 666 | reactionScoreTransition(find(gimmeSolution.x>0),1)=1; 667 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1; 668 | %Translate reactionActivity to reversible model 669 | reactionScoreRev = zeros(length(model.rxns),1); 670 | for i=1:length(model.rxns) 671 | temp1=IndexRev2irrev{i,1}'; 672 | for j=1:length(temp1) 673 | if reactionScoreTransition(temp1(j)) > 0 674 | reactionScoreRev(i,1) = reactionScoreTransition(temp1(j)); 675 | end 676 | end 677 | end 678 | rxn2remove = model.rxns(reactionScoreRev == 0); 679 | reducedModelTemp = removeRxns(model,rxn2remove); 680 | end 681 | function reducedModelTemp = KeepNecessaryRxnsLikeGIMME(model, score, threshold, min) 682 | [modelIrrev,~,rev2irrev,irrev2rev] = convertToIrreversible(model); 683 | expressionRxnsIrrev = zeros(length(modelIrrev.rxns),1); 684 | for i1=1:length(modelIrrev.rxns) 685 | expressionRxnsIrrev(i1,1) = score(irrev2rev(i1,1),1); 686 | end 687 | cc=optimizeCbModel(model); 688 | modelIrrev.lb(find(modelIrrev.c ==1),1)=cc.f*min; % minimum fraction of objective 689 | modelIrrev.c(:,1)=0; 690 | for i1=1:length(modelIrrev.rxns) 691 | if (expressionRxnsIrrev(i1,1) > -1) %if not absent reaction 692 | if (expressionRxnsIrrev(i1,1) < threshold) 693 | modelIrrev.c(i1,1) = threshold-expressionRxnsIrrev(i1,1); %FIX: use expression level as weight 694 | end 695 | end 696 | end 697 | gimmeSolution = optimizeCbModel(modelIrrev,'min'); 698 | reactionScoreTransition=zeros(length(expressionRxnsIrrev),1); 699 | if (gimmeSolution.stat ~= 1) 700 | reactionScoreTransition(:,1) = 0; 701 | else 702 | reactionScoreTransition(find(gimmeSolution.x>0),1)=1; 703 | end 704 | reactionScoreTransition(find(expressionRxnsIrrev>threshold))=1; 705 | 706 | %Translate reactionActivity to reversible model 707 | reactionActivity = zeros(nRxns,1); 708 | for i=1:nRxns 709 | for j=1:size(rev2irrev{i,1},2) 710 | if (reactionScoreTransition(rev2irrev{i,1}(1,j)) > reactionActivity(i,1)) 711 | reactionActivity(i,1) = reactionScoreTransition(rev2irrev{i,1}(1,j)); 712 | end 713 | end 714 | end 715 | remove = model.rxns(reactionActivity == 0); 716 | reducedModelTemp = removeRxns(model,remove); 717 | end -------------------------------------------------------------------------------- /Functions/contextSpecificModelTune.m: -------------------------------------------------------------------------------- 1 | function [modelTuned,modelInfo]= contextSpecificModelTune(model,MSPInformation,reactionScore,threshold,modelseed) 2 | % this function tune the species-specefic model and gather the model information. 3 | %inputs: 4 | % model: species-specefic model 5 | % MSPInformation: a structure includes: 6 | % taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each 7 | % species. taxoGroup: taxonomy group fot bacteria. rxns, the reaction 8 | % name in reference model. bacteria, a list of MSP IDs. BacteriaNames, 9 | % list of species names. species, names of the species. 10 | % RxnStateAll, the reaction state (absent/present) for each bacteria 11 | % reactionScore: a matrix includes 3 different scores for each reaction. 12 | % threshold: includes a threshold for each specified taxonomy level. 13 | % modelseed true or false 14 | % 15 | % output: 16 | % modelTuned: species-specefic model 17 | % modelInfo: provides the gap filling percentage at different levels: i.e. taxonomy 18 | % proximity based, further taxonomy level, not annotated (without 19 | % gene-protein-reaction relation) and the total gap filling percentage 20 | 21 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 22 | 23 | 24 | %find metabolites of exchange reactions 25 | exchangeMets=GetExchangeMetabolite(model) 26 | %remove dead end exchange reactions 27 | if ~isempty(exchangeMets) 28 | for hh1=1:size(exchangeMets,1) 29 | expression = exchangeMets{hh1}; 30 | matchStr = regexp(model.mets,expression,'match'); 31 | Index = find(~(cellfun('isempty',matchStr))); 32 | len=find(any(model.S(Index,:),1))'; 33 | if length(len) <= 2 34 | model=removeRxns(model,model.rxns(len)); 35 | end 36 | end 37 | end 38 | 39 | model.description='by MIGRENE toolbox'; 40 | if ~isempty(MSPInformation.BacteriaNames) 41 | index2=find(ismember(MSPInformation.bacteria,MSPInformation.species)) 42 | model.modelName=MSPInformation.BacteriaNames{index2,1}; 43 | end 44 | model.modelID=MSPInformation.species; 45 | model.compNames={'Extracellular';'Cytosol';'ExtracellularForElectronTransportChain';'boundary'}; 46 | model.comps={'C_e';'C_c';'C_pe';'e'}; 47 | 48 | if modelseed 49 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 50 | METPATH=[MIGDIR filesep 'mat' filesep 'MetInformation.mat']; 51 | load(METPATH) 52 | for w1 =1:size(MetInformation,1) 53 | if ismember(MetInformation{w1,1},model.mets) 54 | model.metNames(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,3); 55 | model.metKEGG(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,4); 56 | model.metFormulas(find(strcmp(model.mets, MetInformation{w1,1})),1)=MetInformation(w1,5); 57 | end 58 | end 59 | end 60 | modelTuned=model 61 | % Assign an empty table to gather all the info of the model as below 62 | modelInfo = array2table(zeros(1,8)) 63 | modelInfo.Properties.VariableNames = {'number_of_rxns',... 64 | 'number_of_rxns_without_transport_rxns','number_of_rxns_with_genes','level_of_gapfilling','percentage_of_gapfillig'... 65 | 'gapfillig_at_the_level','gapfillig_at_other_level',... 66 | 'gapfillig_at_nonAnnotatedRxns'}; 67 | modelInfo.number_of_rxns=length(model.rxns); 68 | indexEx=strfind(model.rxns,'Ex'); 69 | IndexEx = find(cellfun('isempty',indexEx)); 70 | indexTr=strfind(model.rxns,'t_'); 71 | IndexTr = find(cellfun('isempty',indexTr)); 72 | indexOfTrExRxns=intersect(IndexEx,IndexTr); 73 | modelInfo.number_of_rxns_without_transport_rxns=length(indexOfTrExRxns); 74 | %get the level of gapfilling 75 | UsedThreshold=threshold(find(threshold,1,'first')); 76 | if isempty(UsedThreshold) 77 | UsedThreshold1=0; 78 | UsedThreshold=0; 79 | else 80 | UsedThreshold1=find(threshold,1,'first'); 81 | end 82 | % 83 | if UsedThreshold1 == 0 84 | level='not classified'; 85 | else 86 | level=MSPInformation.taxoLevel{UsedThreshold1}; 87 | end 88 | 89 | modelInfo.level_of_gapfilling=level; 90 | rxns=model.rxns(indexOfTrExRxns) 91 | g=find(ismember(MSPInformation.rxns,rxns)); 92 | metagenomesetMSP=reactionScore(g,3); 93 | % number of reactions with corresponding genes in the species 94 | modelInfo.number_of_rxns_with_genes=length(find(metagenomesetMSP>=1)); 95 | % gapfilling info 96 | % percentage of gapfilling 97 | modelInfo.percentage_of_gapfillig=(1-length(find(metagenomesetMSP>=1))/... 98 | length(indexOfTrExRxns))*100; 99 | % percentage of number of reactions added by gap filling at the lowest classified level 100 | modelInfo.gapfillig_at_the_level=length(find(metagenomesetMSP>=UsedThreshold... 101 | & metagenomesetMSP <1))/length(indexOfTrExRxns)*100; 102 | % percentage of number of reactions added by gap filling at the other level 103 | modelInfo.gapfillig_at_other_level=length(find(metagenomesetMSP>=0 ... 104 | & metagenomesetMSP < UsedThreshold))/length(indexOfTrExRxns)*100; 105 | % percentage of number of reactions added by gap filling without any info in the catalog 106 | modelInfo.gapfillig_at_nonAnnotatedRxns=length(find(metagenomesetMSP==-1))... 107 | /length(indexOfTrExRxns)*100; 108 | end 109 | 110 | function exchangeMets=GetExchangeMetabolite(model) 111 | indexEx=strfind(model.rxns,'Ex_'); 112 | IndexEx = find(not(cellfun('isempty',indexEx))); 113 | S=model.S(:,IndexEx); 114 | exchangeMets=model.mets(find(any(S,2))); 115 | exchangeMets=strrep(exchangeMets,'ee[e]',''); 116 | exchangeMets=strrep(exchangeMets,'[e]',''); 117 | exchangeMets=strrep(exchangeMets,'e[e]',''); 118 | end -------------------------------------------------------------------------------- /Functions/convertCatalogAnnotation.m: -------------------------------------------------------------------------------- 1 | function [catalogFileConverted]= convertCatalogAnnotation(inputFile,mapping,numWorkers) 2 | % this function automatically download and convert KO to KEGG reaction IDs in catalog. 3 | %inputs: 4 | % inputFile: cell array contains two columns, first column is gene name and 5 | % second column provides KO annotation 6 | % OPTIONAL INPUTS: 7 | % mapping: either cell array contains KOs (column 1) and their corresponding kegg 8 | % reaction ID (column 2) or empty cell array 9 | 10 | %outputs: 11 | % catalogFileConverted: cell array contains two columns, first column is gene name and 12 | % second column provides kegg reaction annotation 13 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 14 | 15 | % check the number of workers for parallelization 16 | if numWorkers > 1 17 | poolobj = gcp('nocreate'); 18 | if isempty(poolobj) 19 | parpool(numWorkers); 20 | end 21 | else 22 | disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers') 23 | end 24 | % if the mapping file for converting KO to reaction ID is not provided, it 25 | % automatically download it from KEGG API. 26 | if isempty(mapping) 27 | % get path to where the MIGRENE Toolbox is located 28 | MIG = fileparts(which('MIGRENE_pipeline')); 29 | DATADIR=[MIG filesep 'data']; 30 | % save ko2rn file in the directory "data" of MIGRENE Toolbox 31 | urlwrite('http://rest.kegg.jp/link/reaction/ko',[DATADIR filesep 'ko2rn.txt']); 32 | Transition1 = readtable([DATADIR filesep 'ko2rn.txt'],'Format','%s%s','ReadVariableNames', false); 33 | ko2rn=table2cell(Transition1) ; 34 | else 35 | ko2rn=mapping; 36 | end 37 | 38 | %remove prefix ko: and rn: from ko2rn if present 39 | ko2rn(:,1)=strrep(ko2rn(:,1),'ko:',''); 40 | ko2rn(:,2)=strrep(ko2rn(:,2),'rn:',''); 41 | 42 | %group ko2rn by KO so that the rn ID for the same KO were summerized. 43 | [~,~,ind]=unique(ko2rn(:,1)); 44 | uni=unique(ind); 45 | 46 | File={}; 47 | for w=1:numel(uni) 48 | Index=find(ind == uni(w)); 49 | transition2=unique(ko2rn(Index,2)); 50 | str =strjoin(transition2,','); 51 | File(w,1)=unique(ko2rn(Index,1)); 52 | File{w,2}=str; 53 | end 54 | inputFile1=inputFile; 55 | for w=1:size(inputFile,1) 56 | transition1=inputFile{w,2}; 57 | connection=File(find(strcmp(File(:,1),transition1)),2); 58 | if ~isempty(connection) 59 | inputFile1(w,2)=connection; 60 | end 61 | end 62 | 63 | expression = 'K\d\d\d\d\d'; 64 | matchStr = regexp(inputFile1(:,2),expression,'match'); 65 | Index = find(cellfun('isempty',matchStr)); 66 | catalogFileConverted=inputFile1(Index,:); 67 | end 68 | 69 | -------------------------------------------------------------------------------- /Functions/fakeModelGenerator.m: -------------------------------------------------------------------------------- 1 | function [fakemodel, fakemodel1]=fakeModelGenerator(mets_art,Ex_art) 2 | %inputs: 3 | % mets_art: list of exchange metabolites and suffix of the compartments 4 | % Ex_art: list of exchange reactions and suffix of the compartments 5 | %outputs: 6 | % fakemodel: a metabolic model 7 | % fakemodel1: a metabolic model 8 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 9 | fakemodel.mets={mets_art{:,2} mets_art{:,3} mets_art{:,4}}'; 10 | fakemodel.rxns={Ex_art{:,2} Ex_art{:,3} Ex_art{:,4} Ex_art{:,5}}'; 11 | fakemodel.S=zeros(length(fakemodel.mets),length(fakemodel.rxns)); 12 | fakemodel.lb=zeros(length(fakemodel.rxns),1); 13 | fakemodel.ub=zeros(length(fakemodel.rxns),1); 14 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,2))))=-1000; 15 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,2))))=0; 16 | 17 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,3))),1)=0; 18 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,3))),1)=1000; 19 | 20 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,4))),1)=0; 21 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,4))),1)=100000; 22 | 23 | fakemodel.lb(find(ismember(fakemodel.rxns,Ex_art(:,5))),1)=0; 24 | fakemodel.ub(find(ismember(fakemodel.rxns,Ex_art(:,5))),1)=100000; 25 | 26 | for w1=1:size(mets_art,1) 27 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,3})),find(strcmp(fakemodel.rxns,Ex_art{w1,2})))=-1; 28 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,3})),find(strcmp(fakemodel.rxns,Ex_art{w1,3})))=-1; 29 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,2})),find(strcmp(fakemodel.rxns,Ex_art{w1,3})))=1; 30 | 31 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,2})),find(strcmp(fakemodel.rxns,Ex_art{w1,4})))=-1; 32 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,4})),find(strcmp(fakemodel.rxns,Ex_art{w1,4})))=1; 33 | 34 | fakemodel.S(find(strcmp(fakemodel.mets,mets_art{w1,4})),find(strcmp(fakemodel.rxns,Ex_art{w1,5})))=-1; 35 | end 36 | 37 | fakemodel1.mets={mets_art{:,1} mets_art{:,2}}'; 38 | fakemodel1.rxns=Ex_art(:,1); 39 | fakemodel1.S=zeros(length(fakemodel1.mets),length(fakemodel1.rxns)); 40 | fakemodel1.lb=zeros(length(fakemodel1.rxns),1); 41 | fakemodel1.ub=zeros(length(fakemodel1.rxns),1); 42 | 43 | fakemodel1.lb(find(ismember(fakemodel1.rxns,Ex_art(:,1))),1)=-1000; 44 | fakemodel1.ub(find(ismember(fakemodel1.rxns,Ex_art(:,1))),1)=1000; 45 | 46 | for w1=1:size(mets_art,1) 47 | fakemodel1.S(find(strcmp(fakemodel1.mets,mets_art{w1,2})),find(strcmp(fakemodel1.rxns,Ex_art{w1,1})))=1; 48 | fakemodel1.S(find(strcmp(fakemodel1.mets,mets_art{w1,1})),find(strcmp(fakemodel1.rxns,Ex_art{w1,1})))=-1; 49 | end 50 | end 51 | 52 | -------------------------------------------------------------------------------- /Functions/microbiomeGEMgeneration.m: -------------------------------------------------------------------------------- 1 | function [microbiomeGEM]= microbiomeGEMgeneration(genericModel,cataloginfo,annotationFile,numWorkers) 2 | % this function automatically download and convert KO to KEGG reaction IDs in your catalog. 3 | %inputs: 4 | % genericModel: metabolic Model with COBRA or RAVEN format, 5 | % cataloginfo: cell array contains two columns, first column is gene name and 6 | % second column provides kegg reaction ID 7 | % OPTIONAL INPUTS: 8 | % annotationFile: either cell array contains reaction IDs of the model(column 1), 9 | % and column(s) for KO, EC and kegg RN annotations or empty cell array 10 | % numWorkers: integer indicating the number of cores to use for parallelization 11 | 12 | %outputs: 13 | % microbiomeGEM: cell array contains two columns, first column is gene name and 14 | % second column provides kegg reaction anntation 15 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 16 | 17 | if nargin<4 18 | numWorkers=1; 19 | end 20 | % get path to where the MIGRENE Toolbox is located 21 | MIG = fileparts(which('MIGRENE_pipeline')); 22 | DATADIR=[MIG filesep 'data']; 23 | % check the number of workers for parallelization 24 | if numWorkers > 1 25 | poolobj = gcp('nocreate'); 26 | if isempty(poolobj) 27 | parpool(numWorkers); 28 | end 29 | else 30 | disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers') 31 | end 32 | %% 33 | % if annotationFile is not provided, it automatically finds the annotations in the model. 34 | if isempty(annotationFile) 35 | disp('collecting the annotations from the model') 36 | %find fields including KO, rn or EC IDs 37 | KOexpression = 'K\d\d\d\d\d'; KOs={}; 38 | RNexpression = 'R\d\d\d\d\d'; RNs={}; 39 | ECexpression = 'EC:\d.\d.'; ECs={}; 40 | fnames = fieldnames(genericModel); 41 | for w=1:numel(fnames) 42 | transition=strcat('genericModel.',fnames(w)); 43 | transition=eval(char(transition)); 44 | if iscellstr(transition) 45 | % find fields providing KO annotation for the reactions 46 | matchStr = regexp(transition,KOexpression,'match'); 47 | Index = find(not(cellfun('isempty',matchStr))); 48 | if length(Index)/length(genericModel.rxns)*100>1 49 | KOs= vertcat(KOs,fnames(w)); 50 | end 51 | % find fields providing kegg reaction IDs annotation for the reactions 52 | matchStr = regexp(transition,RNexpression,'match'); 53 | Index = find(not(cellfun('isempty',matchStr))); 54 | if length(Index)/length(genericModel.rxns)*100>1 55 | RNs= vertcat(RNs,fnames(w)); 56 | end 57 | % find fields providing EC annotation for the reactions 58 | matchStr = regexp(transition,ECexpression,'match'); 59 | Index = find(not(cellfun('isempty',matchStr))); 60 | if length(Index)/length(genericModel.rxns)*100>1 61 | ECs= vertcat(ECs,fnames(w)); 62 | end 63 | end 64 | end 65 | else 66 | %find fields including KO, rn or EC IDs in provided annotationFile 67 | disp('collecting the annotations from the annotationFile') 68 | disp('as annotationFile is provided, the function ignores the available annotations in the model.') 69 | disp('please make sure annotationFile follows the following format:') 70 | disp('cell array contains reaction IDs of the model(column 1),and column(s) for KO, EC and kegg RN annotations') 71 | KOexpression = 'K\d\d\d\d\d'; KOs=[]; 72 | RNexpression = 'R\d\d\d\d\d'; RNs=[]; 73 | ECexpression = 'EC:\d.\d.'; ECs=[]; 74 | numberOfannotation=size(annotationFile,2); 75 | for w=2:numberOfannotation 76 | transition=annotationFile(:,w); 77 | if iscellstr(transition) 78 | % find fields providing KO annotation for the reactions 79 | matchStr = regexp(transition,KOexpression,'match'); 80 | Index = find(not(cellfun('isempty',matchStr))); 81 | if length(Index)/length(genericModel.rxns)*100>1 82 | KOs= [KOs w]; 83 | end 84 | % find fields providing kegg reaction IDs annotation for the reactions 85 | matchStr = regexp(transition,RNexpression,'match'); 86 | Index = find(not(cellfun('isempty',matchStr))); 87 | if length(Index)/length(genericModel.rxns)*100>1 88 | RNs= [RNs w]; 89 | end 90 | % find fields providing EC annotation for the reactions 91 | matchStr = regexp(transition,ECexpression,'match'); 92 | Index = find(not(cellfun('isempty',matchStr))); 93 | if length(Index)/length(genericModel.rxns)*100>1 94 | ECs= [ECs w]; 95 | end 96 | end 97 | end 98 | end 99 | 100 | if length(RNs)>1 && isempty(annotationFile) 101 | error('there are more than one field in the metabolic model including kegg reaction IDs. It must be one cell array with kegg reaction ID') 102 | end 103 | 104 | if length(ECs)>1 && isempty(annotationFile) 105 | error('there are more than one field in the metabolic model including EC annotation. It must be one cell array with EC annotation') 106 | end 107 | 108 | if length(KOs)>1 && isempty(annotationFile) 109 | error('there are more than one field in the metabolic model including KO annotation. It must be one cell array with KO annotation') 110 | end 111 | 112 | if length(RNs)>1 && ~isempty(annotationFile) 113 | error('there are more than one column in annotationFile including kegg reaction IDs. It must be one cell array with kegg reaction ID') 114 | end 115 | 116 | if length(ECs)>1 && ~isempty(annotationFile) 117 | error('there are more than one column in annotationFile including EC annotation. It must be one cell array with EC annotation') 118 | end 119 | 120 | if length(KOs)>1 && ~isempty(annotationFile) 121 | error('there are more than one column in annotationFile including KO annotation. It must be one cell array with KO annotation') 122 | end 123 | 124 | if isempty(KOs) 125 | if isempty(RNs) 126 | if isempty(ECs) 127 | error('there are not any info in the metabolic model or annotationFile for KO,EC or kegg reaction ID annotation. It must be at least one cell array with one of the annotations') 128 | else 129 | disp('EC annotation was found for integration of genes in catalog into model') 130 | end 131 | else 132 | if isempty(ECs) 133 | disp('kegg reaction annotation was found for integration of genes in catalog into model') 134 | else 135 | disp('kegg reaction and EC annotation were found for integration of genes in catalog into model') 136 | end 137 | end 138 | else 139 | if isempty(RNs) 140 | if isempty(ECs) 141 | disp('KO annotation was found for integration of genes in catalog into model') 142 | else 143 | disp('EC and KO annotations were found for integration of genes in catalog into model') 144 | end 145 | else 146 | if isempty(ECs) 147 | disp('KO and kegg reaction annotations were found for integration of genes in catalog into model') 148 | else 149 | disp('KO, kegg reaction and EC annotation were found for integration of genes in catalog into model') 150 | end 151 | end 152 | end 153 | 154 | % make the model ready for integration 155 | %collect all the annotations in the model in one temporary field 156 | genericModel.temporary=cell([numel(genericModel.rxns) 1]); 157 | if ~isempty(RNs) && isempty(annotationFile) 158 | transition=strcat('genericModel.',RNs(1)); 159 | genericModel.temporary=eval(char(transition)); 160 | elseif ~isempty(RNs) && ~isempty(annotationFile) 161 | for q=1:numel(genericModel.rxns) 162 | if ismember(genericModel.rxns{q},annotationFile(:,1)) 163 | genericModel.temporary(q,1)=annotationFile(find(strcmp(model.rxns,genericModel.rxns{q})),RNs); 164 | end 165 | end 166 | end 167 | 168 | if ~isempty(ECs) 169 | urlwrite('http://rest.kegg.jp/link/reaction/enzyme',[DATADIR filesep 'ec2rn.txt']); 170 | Transition1 = readtable([DATADIR filesep 'ec2rn.txt'],'Format','%s%s','ReadVariableNames', false); 171 | ec2rn=table2cell(Transition1); 172 | end 173 | if ~isempty(KOs) 174 | urlwrite('http://rest.kegg.jp/link/reaction/ko',[DATADIR filesep 'ko2rn.txt']); 175 | Transition1 = readtable([DATADIR filesep 'ko2rn.txt'],'Format','%s%s','ReadVariableNames', false); 176 | ko2rn=table2cell(Transition1) ; 177 | end 178 | check=who; 179 | 180 | if ismember('ko2rn',check) 181 | ko2rn(:,1)=strrep(ko2rn(:,1),'ko:',''); 182 | ko2rn(:,2)=strrep(ko2rn(:,2),'rn:',''); 183 | %find not annotated reactions in the temporary file and fill in regarding available KO 184 | index=strfind(genericModel.temporary,'R'); 185 | Index = find(cellfun('isempty',index)); 186 | if isempty(annotationFile) 187 | transition=strcat('genericModel.',KOs(1)); 188 | koInfo=eval(char(transition)); 189 | koInfo=strrep(koInfo,' ',','); 190 | koInfo=strrep(koInfo,';',','); 191 | for qq=1:length(Index) 192 | ec=char(koInfo(Index(qq))); 193 | if ~isempty(ec) 194 | tra1=strsplit(ec,',')'; 195 | expression = 'K\d\d\d\d\d'; 196 | matchStr = regexp(tra1,expression,'match'); 197 | tra1=tra1(find(not(cellfun('isempty',matchStr)))); 198 | anno={}; 199 | if ~isempty(tra1) 200 | for ee=1:length(tra1) 201 | if ismember(tra1{ee},ec2rn(:,1)) 202 | % get all the kegg reaction IDs related for the KO 203 | dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2); 204 | anno=vertcat(anno,dd); 205 | end 206 | end 207 | genericModel.temporary{Index(qq),1}=strjoin(unique(anno),','); 208 | end 209 | end 210 | end 211 | else 212 | koInfo=annotationFile(:,KOs); 213 | koInfo=strrep(koInfo,' ',','); 214 | koInfo=strrep(koInfo,';',','); 215 | for qq=1:length(Index) 216 | tran1=koInfo(find(strcmp(annotationFile(:,1),genericModel.rxns(Index(qq))))); 217 | ec=char(tran1); 218 | if ~isempty(ec) 219 | tra1=strsplit(ec,',')'; 220 | expression = 'K\d\d\d\d\d'; 221 | matchStr = regexp(tra1,expression,'match'); 222 | tra1=tra1(find(not(cellfun('isempty',matchStr)))); 223 | if ~isempty(tra1) 224 | anno={}; 225 | for ee=1:length(tra1) 226 | if ismember(tra1{ee},ec2rn(:,1)) 227 | dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2); 228 | anno=vertcat(anno,dd); 229 | end 230 | end 231 | genericModel.temporary{Index(qq),1}=strjoin(unique(anno),','); 232 | end 233 | end 234 | end 235 | end 236 | end 237 | 238 | 239 | if ismember('ec2rn',check) 240 | %remove prefix rn: from ko2rn if present 241 | ec2rn(:,2)=strrep(ec2rn(:,2),'rn:',''); 242 | %find not annotated reactions in the temporary file and fill in regarding available EC 243 | index=strfind(genericModel.temporary,'R'); 244 | Index = find(cellfun('isempty',index)); 245 | if isempty(annotationFile) 246 | transition=strcat('genericModel.',ECs(1)); 247 | ecInfo=eval(char(transition)); 248 | ecInfo=strrep(ecInfo,' ',','); 249 | ecInfo=strrep(ecInfo,';',','); 250 | ecInfo=strrep(ecInfo,'EC','ec'); 251 | for qq=1:length(Index) 252 | ec=char(ecInfo(Index(qq))); 253 | if ~isempty(ec) 254 | tra1=strsplit(ec,',')'; 255 | expression = 'ec:\d.\d.'; 256 | matchStr = regexp(tra1,expression,'match'); 257 | tra1=tra1(find(not(cellfun('isempty',matchStr)))); 258 | anno={}; 259 | if ~isempty(tra1) 260 | for ee=1:length(tra1) 261 | if ismember(tra1{ee},ec2rn(:,1)) 262 | % get all the kegg reaction IDs related for the ec 263 | dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2); 264 | anno=vertcat(anno,dd); 265 | end 266 | end 267 | genericModel.temporary{Index(qq),1}=strjoin(unique(anno),','); 268 | end 269 | end 270 | end 271 | else 272 | ecInfo=annotationFile(:,ECs); 273 | ecInfo=strrep(ecInfo,' ',','); 274 | ecInfo=strrep(ecInfo,';',','); 275 | ecInfo=strrep(ecInfo,'EC','ec'); 276 | for qq=1:length(Index) 277 | tran1=ecInfo(find(strcmp(annotationFile(:,1),genericModel.rxns(Index(qq))))); 278 | ec=char(tran1); 279 | if ~isempty(ec) 280 | tra1=strsplit(ec,',')'; 281 | expression = 'ec:\d.\d.'; 282 | matchStr = regexp(tra1,expression,'match'); 283 | tra1=tra1(find(not(cellfun('isempty',matchStr)))); 284 | if ~isempty(tra1) 285 | anno={}; 286 | for ee=1:length(tra1) 287 | if ismember(tra1{ee},ec2rn(:,1)) 288 | dd=ec2rn(ismember(ec2rn(:,1),tra1{ee}),2); 289 | anno=vertcat(anno,dd); 290 | end 291 | end 292 | genericModel.temporary{Index(qq),1}=strjoin(unique(anno),','); 293 | end 294 | end 295 | end 296 | end 297 | end 298 | 299 | %% summerize the catalog based on kegg reaction IDs 300 | disp('start getting the catalog info for integration') 301 | cataloginfoLong=ConvertTOLongFormat(cataloginfo,numWorkers); 302 | [~,~,ind]=unique(cataloginfoLong(:,2)); 303 | uni=unique(ind); 304 | catalogForInteg={}; 305 | for w=1:numel(uni) 306 | Index=find(ind == uni(w)); 307 | transition2=unique(cataloginfoLong(Index,1)); 308 | str =strjoin(transition2,','); 309 | catalogForInteg(w,1)=unique(cataloginfoLong(Index,2)); 310 | catalogForInteg{w,2}=str; 311 | end 312 | 313 | %% add GPRs in the model based on catalog data and generate the GEM 314 | disp('start adding GPR into the model') 315 | % this section fill all the following empty cell arrays 316 | genericModel.grRules=cell([numel(genericModel.rxns) 1]); 317 | genericModel.genes={}; 318 | genericModel.rules=cell([numel(genericModel.rxns) 1]); 319 | genericModel.geneNames={}; 320 | genericModel.rxnGeneMat={}; 321 | 322 | for w=1:numel(genericModel.temporary) 323 | rnInfo=genericModel.temporary{w,1}; 324 | if ~isempty(rnInfo) 325 | rnInfo=strrep(rnInfo,' ',','); 326 | rnInfo=strrep(rnInfo,';',','); 327 | tra1=strsplit(rnInfo,',')'; 328 | expression = 'R\d\d\d\d\d'; 329 | matchStr = regexp(tra1,expression,'match'); 330 | tra1=tra1(find(not(cellfun('isempty',matchStr)))); 331 | if ~isempty(tra1) 332 | annotation={}; 333 | for ee=1:length(tra1) 334 | if ismember(tra1{ee},catalogForInteg(:,1)) 335 | dd=catalogForInteg(ismember(catalogForInteg(:,1),tra1{ee}),2); 336 | annotation=vertcat(annotation,dd); 337 | end 338 | end 339 | genericModel.grRules(w,1)={strjoin(unique(annotation),',')}; 340 | end 341 | else 342 | genericModel.grRules(w,1)={''}; 343 | end 344 | end 345 | 346 | temp1={}; 347 | for i=1:length(genericModel.grRules) 348 | if ~isempty(genericModel.grRules{i}) 349 | tra2=strsplit(genericModel.grRules{i},',')'; 350 | temp1=vertcat(temp1,tra2); 351 | end 352 | end 353 | genericModel.genes=unique(temp1); % genericModel.genes generated 354 | genericModel.geneNames=genericModel.genes; % genericModel.geneNames generated 355 | 356 | c=[]; 357 | for h=1:length(genericModel.grRules) 358 | if ~isempty(genericModel.grRules{h,1}) 359 | tra2=strsplit(genericModel.grRules{h,1},',')'; 360 | for k=1:length(tra2) 361 | index=find(strcmp(genericModel.genes,tra2{k})); 362 | c=[c;[h index 1]]; 363 | end 364 | end 365 | end 366 | checkTheSize=intersect(find(c(:,1)== length(genericModel.rxns)),find(c(:,2)== length(genericModel.genes))); 367 | if isempty(checkTheSize) 368 | c=[c;[length(genericModel.rxns) length(genericModel.genes) 0]]; 369 | end 370 | temp1= sparse(c(:,1),c(:,2),c(:,3)); 371 | genericModel.rxnGeneMat=temp1; % genericModel.rxnGeneMat generated 372 | 373 | 374 | for h=1:length(genericModel.grRules) 375 | if ~isempty(genericModel.grRules{h,1}) 376 | tra2=strsplit(genericModel.grRules{h,1},',')'; 377 | collect={}; 378 | for k=1:length(tra2) 379 | index=find(strcmp(genericModel.genes,tra2{k})) ; 380 | converted=['x(' num2str(index) ')']; 381 | collect=vertcat(collect,converted); 382 | end 383 | genericModel.rules(h,1)={strjoin(unique(collect),',')}; % generating genericModel.rules 384 | else 385 | genericModel.rules(h,1)={''}; 386 | end 387 | end 388 | 389 | for i=1:length(genericModel.rules) 390 | if ~isempty(regexp(genericModel.rules{i},',','match')) 391 | genericModel.rules{i}=['( ' genericModel.rules{i} ' )']; 392 | end 393 | end 394 | genericModel.rules=strrep(genericModel.rules,',',' | '); % genericModel.rules generated 395 | 396 | 397 | for i=1:length(genericModel.grRules) 398 | if ~isempty(regexp(genericModel.grRules{i},',','match')) 399 | genericModel.grRules{i}=['(' genericModel.grRules{i} ')']; 400 | end 401 | end 402 | genericModel.grRules=strrep(genericModel.grRules,',',' or '); % genericModel.grRules generated 403 | 404 | genericModel.rxnAnnotation=genericModel.temporary; %consider temporary file as extended annotation file for the model 405 | genericModel = rmfield( genericModel,'temporary'); 406 | 407 | microbiomeGEM=genericModel; 408 | end 409 | %% 410 | function [cataloginfoLong]= ConvertTOLongFormat(cataloginfo,numWorkers) 411 | % find the rows with one and more than one kegg rn annotation 412 | index=strfind(cataloginfo(:,2),'R'); 413 | NumberOfKO = cellfun('length',index); 414 | IndexAboveOne= find(NumberOfKO > 1); 415 | IndexOne= find(NumberOfKO == 1); 416 | 417 | Index = find(not(cellfun('isempty',index))); 418 | if ~isempty(Index) 419 | % make a subset of gene catalog including rows with more than one KO annotation . 420 | if ~isempty(IndexAboveOne) 421 | output_1=cataloginfo(IndexAboveOne,:); 422 | end 423 | % make a subset of gene catalog including rows with one KO linked. 424 | if ~isempty(IndexOne) 425 | output_2=cataloginfo(IndexOne,:); 426 | end 427 | end 428 | 429 | % check the number of workers for parallelization 430 | if numWorkers > 1 431 | poolobj = gcp('nocreate'); 432 | if isempty(poolobj) 433 | parpool(numWorkers); 434 | end 435 | else 436 | disp('You didnot specify the number of workers, so parallel mode is disabled. please dedicate number of workers') 437 | end 438 | % convert the subset of gene catalog with several annotated genes to long format. 439 | if ~isempty(IndexAboveOne) 440 | transition2={}; 441 | parfor w=1:size(output_1,1) 442 | transition1=output_1{w,2}; 443 | expression = 'R\d\d\d\d\d'; 444 | matchStr = regexp(transition1,expression,'match'); 445 | transition2{w,1}=matchStr; 446 | end 447 | elseif ~isempty(IndexOne) 448 | cataloginfoLong=output_2; 449 | end 450 | output_1updated=cell([0 2]); 451 | %tic 452 | if ~isempty(IndexAboveOne) 453 | parfor w=1:size(transition2,1) 454 | transition3={}; 455 | transition1=transition2{w}; 456 | transition3(:,2)=transition1'; 457 | transition3(:,1)=output_1(w,1); 458 | output_1updated=vertcat(output_1updated,transition3); 459 | end 460 | end 461 | %toc 462 | 463 | if ~isempty(IndexAboveOne) & ~isempty(IndexOne) 464 | % Concatenate the two arrays vertically to make a catalog file 465 | cataloginfoLong=vertcat(output_2,output_1updated); 466 | elseif ~isempty(IndexAboveOne) & isempty(IndexOne) 467 | cataloginfoLong=output_1updated; 468 | end 469 | end 470 | -------------------------------------------------------------------------------- /Functions/pRSEGenerator.m: -------------------------------------------------------------------------------- 1 | function [coverage,enrichment]= pRSEGenerator(modelList,PathToModels,abundance,sampleName,IDmap,GSE,significance) 2 | %inputs: 3 | % modelList: list of model names. 4 | % PathToModels: a structure includes the path where the models are available 5 | % and the name of model assigned in the .mat files 6 | % abundance: matrix of microbiome (MSP) abundance profile 7 | % sampleName: list of sample names 8 | % IDmap: ID mapping between reaction names in the models and and 9 | % the IDs in the pathway file 10 | % GSE: the pathway profile incuding reaction sets 11 | %OPTIONAL INPUT 12 | % significance: p value 13 | %outputs: 14 | % coverage: a table shows the coverage of each pathway in samples 15 | % enrichment: a table shows the p value of the pathways in samples 16 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 17 | if nargin<7 18 | significance=1; 19 | end 20 | 21 | index=[]; 22 | for h1 = 1:size(modelList,1) 23 | if exist([PathToModels.path filesep modelList{h1} '.mat']) 24 | index=[index;h1]; 25 | end 26 | end 27 | modelList=modelList(index,:); 28 | abundance=abundance(index,:); 29 | binary=abundance; 30 | binary(find(binary>0))=1; 31 | rxnTemp={}; 32 | for h1 = 1:size(modelList,1) 33 | load([PathToModels.path filesep modelList{h1}],PathToModels.name) 34 | model=eval(PathToModels.name); 35 | rxnTemp=vertcat(rxnTemp,model.rxns); 36 | end 37 | rxns=unique(rxnTemp); 38 | temporary=zeros(numel(rxns),numel(modelList)); 39 | for w1=1:numel(modelList) 40 | load([PathToModels.path filesep modelList{w1}],PathToModels.name) 41 | model=eval(PathToModels.name); 42 | temporary(find(ismember(rxns,model.rxns)),w1)=1; 43 | end 44 | binary1=binary'; 45 | input=[]; 46 | for i= 1:numel(sampleName) 47 | abun=binary1(i,:); 48 | temporary2=temporary; 49 | for j=1:numel(abun) 50 | temporary2(:,j)=temporary2(:,j)*abun(:,j); 51 | end 52 | input(:,i)=any(temporary2 ==1,2); 53 | end 54 | % convert rxns ID to provided ID for enrichment analysis 55 | rxnsTemp=rxns; 56 | for ii=1:size(rxnsTemp,1) 57 | index=find(strcmp(IDmap(:,1), rxnsTemp{ii,1})); 58 | if ~isempty(index) 59 | rxnsTemp{ii,2}=IDmap{index,2}; 60 | end 61 | end 62 | Index = find(not(cellfun('isempty',rxnsTemp(:,2)))); 63 | input=input(Index,:); 64 | rxns=rxnsTemp(Index,2); 65 | % calculate Total number of unique IDs in provided GSE. 66 | IDs={}; 67 | for i=1:size(GSE,1) 68 | temp1=strsplit(GSE{i,2},','); 69 | IDs=vertcat(IDs,temp1'); 70 | end 71 | IDs=unique(IDs); 72 | N=size(IDs,1); 73 | enrichment=zeros(size(GSE,1),size(input,2)); 74 | coverage=zeros(size(GSE,1),size(input,2)); 75 | for i=1:size(GSE,1) 76 | temp2=transpose(strsplit(GSE{i,2},',')); 77 | m=size(temp2,1); %Number of IDs associated to the term 78 | n = N - m; %Number of IDs not associated to the term 79 | for j=1:size(input,2) 80 | input1=input(:,j); 81 | tempOne=find(input1==1); 82 | g = length(tempOne); % Number of submitted reactions 83 | k = length(intersect(IDs,rxns(tempOne))); % number of submitted 84 | x = length(intersect(temp2,rxns(tempOne))); % reactions with at least one annotation in IDmap 85 | %number of IDs in the term present in the sample 86 | enrichment(i,j)=hygecdf(x-1,N,m,k,'upper'); 87 | coverage(i,j)=x/m; 88 | end 89 | end 90 | if significance ~=1 91 | coverage(find(enrichment>significance))=0; 92 | end 93 | % convert coverage matrix to table and add sample name and the terms to the table 94 | coverage = array2table(coverage); 95 | coverage.Properties.VariableNames = sampleName; 96 | coverage=[array2table(GSE(:,1)) coverage]; 97 | % convert enrichment matrix to table and add sample name and the terms to the table 98 | enrichment = array2table(enrichment); 99 | enrichment.Properties.VariableNames = sampleName; 100 | enrichment=[array2table(GSE(:,1)) enrichment]; 101 | 102 | end -------------------------------------------------------------------------------- /MIGRENE_pipeline.m: -------------------------------------------------------------------------------- 1 | 2 | %% -----------------------MIGRENE Pipelie-------------------------- 3 | % The toolbox for Microbial and personalized GEM, REactiobiome and 4 | % community NEtwork modelling 5 | 6 | % Three tutorials shows the steps that MIGRENE Toolbox automatically 7 | % generate and simulate MAGMA (MSP Associated Genome scale MetAbolic) 8 | % models and personalized metabolic microbiome data using Bacterial 9 | % gene catalog, metagenome species (MSP) and metagenomic data integration. 10 | 11 | % The three tutorials are provided in the toolbox: 12 | % note: if you have your GEMS and you need to create the personalized metabolic 13 | % microbiome data i.e. reaction richness, Microbiome, reaction abundance, 14 | % community models and pRSE (personalized reaction set enrichment), go to Box-c. 15 | 16 | % IntegrationCatalogToModel.m: Box-a| integration of bacterial gene catalog 17 | % into metabolic model to generate a microbiome reference genome scale metabolic 18 | % model (GEM). 19 | 20 | % MAGMAgeneration.m: Box-b| calculation of reactionScore, constraining the model 21 | % based on diet, species specific GEMs or MAGMA generation 22 | 23 | % PersonalizedMicrobiomeMetabolism.m: Box-c| generating personalized metabolic 24 | % microbiome 25 | 26 | %------- 27 | %Gholamreza Bidkhori, 28 | %email: gholamreza.bidkhori@kcl.ac.uk 29 | % gbidkhori@gmail.com, 30 | % Aug 2020 31 | %% 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIGRENE toolbox, 2 | ## Description 3 | MIGRENE toolbox is an integrated pipeline for Microbial and personalized GEM (Genome-scale metabolic model), REactiobiome, and community NEtwork modeling. It enables the generation of species and community-level models from any reference gene catalogs and metagenome species to be applied to personalized microbiome studies. Using the toolbox, GEMs could be generated based on the gut microbial gene catalogs and metagenomic species pan-genomes (MSPs). This toolbox also contains functions for performing community modelling using GEMs, determining reaction abundance and richness and reaction set enrichment (RSE), and reactobiome that describes an aggregate of the metabolic repertoires of an individual gut microbiome, or the biochemical state of the microbiome. 4 | 5 | ## Download and installation 6 | 1. Download this repository. You can clone the repository using: 7 | ``` 8 | git clone https://github.com/sysbiomelab/MIGRENE.git 9 | ``` 10 | Or you can download this repository as a compressed archive. 11 | 12 | 2. Change to the folder MIGRENE/ and run from Matlab 13 | ``` 14 | addpath(genpath("MIGRENE")) 15 | ``` 16 | Or you can use the link to learn how to set path in MATLAB to the directory. 17 | ## Tutorials 18 | Generation of microbiome GEM using a generic metabolic model and a microbiome catalog: This tutorial shows how to integrate a bacterial gene catalog 19 | into the metabolic model to generate a microbiome reference genome-scale metabolic model (GEM). 20 | 21 | Generation of Bacterial (species-specific) GEM : This tutorial shows how to calculate the reaction score and threshold for bacteria, to constrain the model and to generate species-specific bacterial GEMs. 22 | 23 | Generation of Personalized Microbiome Metabolism : It shows how to calculate reaction richness, reactobiome, reaction abundance, community models and iRSE (individualized reaction set enrichment) 24 | 25 | Reactobiome and reaction richness for Liver Cirrhosis gut microbiome samples 26 | 27 | 28 | ## Integration of a gene catalog into a metabolic model. 29 | ### Data usage 30 | * ``: is a txt file containing gene names and KO (KEGG orthology) such as [SubSet_hs_10_4_igc2_annot.txt](data/SubSet_hs_10_4_igc2_annot.txt) 31 | * ``: (optional) a txt file contains the mapping information for KO to KEGG reaction ID. 32 | * ``: a mat file containig a metabolic models whether COBRA or RAVEN format such as [RefMetabolicModel.mat](mat/RefMetabolicModel.mat) 33 | ### functions 34 | * [checkCatalog](Functions/checkCatalog.m): check the `` to make sure it is ready for integration. 35 | * [convertCatalogAnnotation](Functions/convertCatalogAnnotation.m): Convert KO annotations to KEGG reaction IDs in the ``. If no mapping file `` is provided, the latest information is automatically downloaded from the KEGG API. the output is a ``. 36 | * [microbiomeGEMgeneration](Functions/microbiomeGEMgeneration.m): integrate the `` into the `` to generate a microbiome `` 37 | 38 | 39 | ## Generation of Bacterial (species‐specific) GEM 40 | ### Data usage 41 | * ``: the genome scale metabolic Model with COBRA or RAVEN format such as the `` produced above by [microbiomeGEMgeneration](Functions/microbiomeGEMgeneration.m) 42 | * ``: an structure from a binary matrix containing gene-level data for bacterial species (exanmple [MSPgeneProfile.txt](data/MSPgeneProfile.txt)). 43 | 44 | ``` 45 | T = readtable('MSPgeneProfile.txt'); 46 | bacterial_info = struct(); 47 | bacterial_info.genes=table2cell(T(:,1)); 48 | bacterial_info.msp=T.Properties.VariableNames; 49 | bacterial_info.msp=transpose(MSPinfo.msp(1,2:end)); 50 | bacterial_info.expression=table2array(T(1:end ,2:end)); 51 | ``` 52 | * ``: an Excel file contains taxonomy classification info i.e. Kingdom, Phylum, Class, Order, Family, Genus, Species (exanmple [MSPgeneProfile.txt](data/MSPgeneProfile.txt)). 53 | * ``: (optional, example [here](mat/bibliome.mat)) any bibliome data about phenotypic features of the bacteria can be provided as a structure with four fields: "bacteria" is a cell array listing the names of the bacteria. "rxn" lists the name of the reactions having bibliome. "value" is a matrix of numbers: zero means no information, 1 means consumed, 2 means produced, -1 means not consumed and -2 means not produced by the corresponding bacteria. "aerobeInfo" a cell array provides the info that the bacteria require oxygen for growth or not, specifying with "aerobe", "anaerobe" or "facultative". 54 | 55 | ### functions 56 | * [DietConstrain](Functions/DietConstrain.m): (optional) this function constrains `` based on the provided diet `` (1 to 5). Five diets have been provided by the toolbox: 1: high Fibre Plant Based, 2: high Fibre omnivore, 3: high Protein Plant based, 4: high protein omnivore, 5:UK average. Set the number of the diet for constraining the model. 57 | 58 | * [MetagenomeToReactions](Functions/MetagenomeToReactions.m): This function needs ``and `` as inputs and must be seperately run for each bacterial species in the `` using a loop, as below: 59 | ``` 60 | for h=1:length(MSPinfo.msp) 61 | metagenomeData=struct(); 62 | metagenomeData.gene=bacterial_info.genes; 63 | metagenomeData.value=bacterial_info.expression(:,h); 64 | [Reaction_State, bacterial_model] = MetagenomeToReactions(microbiomeGEM, metagenomeData); 65 | save(['save\to\directory\' bacterial_info.msp{h} '.mat'],'Reaction_State','bacterial_model'); 66 | end 67 | ``` 68 | `` and `` for each species are the output that must be saved in the output directry in the same `mat` file entitled the bacterial name. 69 | `` is a model with the bacterial genes and gene rules and `` is a vector showing the state of the reaction (zero or one) for the bacterial species. 70 | * [GenerateMSPInformation](Functions/GenerateMSPInformation.m): this function generates ``, a structure that includes the following fields: taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each species. taxoGroup: taxonomy group for bacteria. rxns, the reaction name in the reference model. bacteria, list of MSP IDs. BacteriaNames, list of species names. RxnStateAll, the reaction state (absent/present) for each bacteria. the input is the address to the directory including saved `Reaction_State>` and `bacterial_model>` for each bacterium (MSP), `` and address to `taxonomy>` file. 71 | * [MetaGenomicsReactionScore](Functions/MetaGenomicsReactionScore.m): This function utilize `` to converts reaction states to reaction scores (``) and calculate a threshold (``) for each bacterial species. ``, `` must be added to the `mat` file including `` and `` 72 | * [contextSpecificModelTune](Functions/contextSpecificModelTune.m): ``,``, `` and ``to genrate context specefic species genome scale metabolic model (``) as the output. [contextSpecificModelTune](Functions/contextSpecificModelTune.m) function tunes `` and also provides the level and the details of gap filling. 73 | 74 | ## Reactobiome and reaction richness Generation 75 | 76 | ### Data usage 77 | 78 | * ``: an array to provide the names of bacterial GEM, such as row names of this [file] 79 | * ``: an array to provide the names of samples or subject, such as column names of this [file] 80 | * ``: the bacterial abundance matrix showing the abundance of bacteria in `` in each subject in `` note: for example go to [link1](https://github.com/sysbiomelab/LiverCirrhosis_MS) or [link2](https://github.com/sysbiomelab/MIGRENE/wiki/Generation-of-Personalized-Microbiome-Metabolism). 81 | * ``: Provides the path where the [bacterial GEMs](https://github.com/sysbiomelab/LiverCirrhosis_MS/blob/main/GEMmodels.zip) are saved in `PathToModels.path` field and the name of the model assigned in the .mat files in `PathToModels.name`. note: if you use the provided [bacterial GEMs](https://github.com/sysbiomelab/LiverCirrhosis_MS/blob/main/GEMmodels.zip), unzip it and then ```PathToModels.name='model'```. 82 | 83 | ### Functuions 84 | * [RxnRichnessGenerator](Functions/RxnRichnessGenerator.m): uses ``,``,`` and `` and generates reaction richness for all the subjects regarding the provided GEMs. 85 | * [ReactobiomeGenerator](Functions/ReactobiomeGenerator.m): utelizes ``,``,`` and `` and generates reactobiome for all the subjects regarding the provided GEMs. 86 | 87 | ## Contact 88 | gholamreza.bidhkori@kcl.ac.uk, 89 | gbidkhori@gmail.com, 90 | # Citation 91 | in preparation, 2024 92 | 93 | -------------------------------------------------------------------------------- /data/BacterialAbundance.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/BacterialAbundance.xlsx -------------------------------------------------------------------------------- /data/Taxonomy.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/Taxonomy.xlsx -------------------------------------------------------------------------------- /data/pathways.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/data/pathways.xlsx -------------------------------------------------------------------------------- /mat/DietInput.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/DietInput.mat -------------------------------------------------------------------------------- /mat/Diets.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/Diets.mat -------------------------------------------------------------------------------- /mat/KBase.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/KBase.mat -------------------------------------------------------------------------------- /mat/MetInformation.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/MetInformation.mat -------------------------------------------------------------------------------- /mat/RefMetabolicModel.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/RefMetabolicModel.mat -------------------------------------------------------------------------------- /mat/USDAdataset.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/USDAdataset.mat -------------------------------------------------------------------------------- /mat/bibliome.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/bibliome.mat -------------------------------------------------------------------------------- /mat/microbiomeGEM.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysbiomelab/MIGRENE/571e35f75aec38320b6f387a59681d75be89c2f7/mat/microbiomeGEM.mat -------------------------------------------------------------------------------- /saveDir/test.txt: -------------------------------------------------------------------------------- 1 | test file -------------------------------------------------------------------------------- /tutorials/IntegrationCatalogToModel.m: -------------------------------------------------------------------------------- 1 | %% Box-A| generation of microbiome GEM using a generic metabolic model and a microbime catalog 2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 3 | %% start 4 | % get path to where the MIGRENE Toolbox is located 5 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 6 | % provide the path to microbiome catalog . 7 | CATDIR=[MIGDIR filesep 'data']; 8 | % provide the path to reference metabolic model. 9 | MATDIR=[MIGDIR filesep 'mat']; 10 | % define a directory to save microbiomeGEM, here it will be saved in mat 11 | % directory 12 | SAVEDIR=[MIGDIR filesep 'mat']; 13 | % number of cores specified for parallelization. it can be a positive 14 | % integer or a range specified as a 2-element vector of integers 15 | numWorkers=4 16 | 17 | %% integration 18 | % integration of bacterial gene catalog into metabolic model to generate a 19 | % generic genome scale metabolic. 20 | %First the cataloge data is read from the text file. 21 | % here, a subset of updated gut catalog is used to run the pipeline and 22 | % generate MAGMA. this small annotated cataloge includes genes 23 | %of 10 Bacteroides. their taxonomy information is also provided for MAGMA 24 | %generation 25 | catalog=[CATDIR filesep 'SubSet_hs_10_4_igc2_annot.txt']; 26 | T = readtable(catalog); 27 | catalogData=table2cell(T) ; 28 | %before using the catalog, the following function rearranges the catalog 29 | %for the genes with more that KO linked, checks the structure, and provides 30 | %the format compatible for downstream functions 31 | [catalogFileChecked]= checkCatalog(catalogData,numWorkers); 32 | 33 | %convert KO in the catalog to KEGG reaction IDs. if there is no ID mapping file, 34 | %assign an empty cell array. then, it will automatically download the last 35 | %updated information from KEGG API and saves in directory "data" where 36 | %the MIGRENE Toolbox is located.(make sure, you are connected to internet) 37 | mapping={}; 38 | inputFile=catalogFileChecked; 39 | [catalogConverted]= convertCatalogAnnotation(inputFile,mapping,numWorkers); 40 | 41 | % gene-protein-reaction (GPR) association is assigned by integrating the 42 | % the catalog genes into metabolic model and generate generic genome scale 43 | % metabolic model. it will be compatible with both COBRA and RAVEN 44 | % toolboxes so you can use any functions provided by both toolboxes. 45 | 46 | % first, the annotated metabolic model model is loaded. you can load your 47 | % generic model. 48 | load([MATDIR filesep 'RefMetabolicModel.mat']); 49 | genericModel=model; 50 | % if you have annotation file, please load or import it. otherwise, leave 51 | % the annotationFile empty. then, the function automatically find all type 52 | % of annotations in your model and download the corresponding info from 53 | % KEGG API, (make sure, you are connected to internet). here a generic 54 | % metabolic model is used. the reactions in the model are annotated by KO 55 | % and kegg RN ID. 56 | annotationFile={}; 57 | 58 | [microbiomeGEM]=microbiomeGEMgeneration(genericModel,catalogConverted,... 59 | annotationFile,numWorkers); 60 | 61 | %save microbiomeGEM to a MAT-file 62 | save([SAVEDIR filesep 'microbiomeGEM.mat'],'microbiomeGEM') 63 | % done, congrats. go to MAGMAgeneration.m in tutorials directory -------------------------------------------------------------------------------- /tutorials/MAGMAgeneration.m: -------------------------------------------------------------------------------- 1 | %% Box-B| calculation of reactionScore, constraining the model based on diet, MAGMA generation 2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 3 | %% start 4 | % get path to where the MIGRENE Toolbox is located 5 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 6 | % provide the path to bacterial species (MSP) gene info and taxonomy 7 | % information. 8 | CATDIR=[MIGDIR filesep 'data']; 9 | taxo=[CATDIR filesep 'Taxonomy.xlsx']; 10 | speciesGeneInfo=[CATDIR filesep 'MSPgeneProfile.txt']; 11 | % provide the path to microbiomeGEM (generated in 12 | % IntegrationCatalogToModel.m) and bibliome data. 13 | MATDIR=[MIGDIR filesep 'mat']; 14 | MODEL=[MATDIR filesep 'microbiomeGEM.mat']; 15 | BIBLIOME=[MATDIR filesep 'bibliome.mat']; 16 | % define a directory to save microbiomeGEM 17 | SAVEDIR=[MIGDIR filesep 'saveDir']; 18 | % number of cores specified for parallelization. it can be a positive 19 | % integer or a range specified as a 2-element vector of integers 20 | numWorkers=4 21 | 22 | % for some functions such as FBA simulation, you need to install 23 | % cobra toolbox. 24 | %please visit https://opencobra.github.io/cobratoolbox/latest/installation.html 25 | % for installing the cobra toolbox. after installation, initiate COBRA 26 | initCobraToolbox() 27 | 28 | %% 29 | %load microbiomeGEM model as a reference GEM. 30 | load(MODEL) 31 | 32 | % you can constrain the model before generating your bacterial GEMs 33 | % for example here, the reference model is constraned based on the high fiber 34 | % animal based model. 35 | % we already generated 5 diets. the following function constrain the model. 36 | 37 | dietNumber=2; % 1:high Fibre Plant Based, 2:high Fibre omnivore, 3:high Protein Plant based 38 | % 4:high protein omnivore, 5:UK average. 39 | [microbiomeGEM]=DietConstrain(microbiomeGEM,dietNumber); 40 | % acetate and lactate are added as popular carbon sources for bacteria 41 | microbiomeGEM.lb(find(strcmp(microbiomeGEM.rxns, 'Ex_Acetate')))= -2.597426442; 42 | microbiomeGEM.lb(find(strcmp(microbiomeGEM.rxns, 'Ex_L-Lactate')))= -0.074862638; 43 | 44 | % generating new diet based on USDA dataset 45 | newDiet=false 46 | % if you have a diet and want to apply for modeling, you can use the 47 | % USDAcreatingDiet function as below. your compounds also could be matched 48 | % with USDA IDs by using searchfood function. here we provided an example 49 | % for high protein plant based diet. the diet will be converted to mmol/gDW 50 | % for modeling 51 | if newDiet 52 | %Example: high protein plant based diet 53 | load([MATDIR filesep 'DietInput.mat']); 54 | %DietInput.mat includes: food_id_item: the iD of the food items that are 55 | %present in a high protein plant based diet, food_grams_item: the amount 56 | %(grams) of each food ID item 57 | [micronutrients_diet_mmol, macronutrients_diet]= USDAcreatingDiet(food_id_item,food_grams_item) 58 | end 59 | 60 | %filling a structure for species gene info. 61 | MSPinfo = struct(); 62 | % First, the species(MSP) gene info is read from the text file. it provides 63 | % the genes of 10 Bacteroides in a matrix. 64 | T = readtable(speciesGeneInfo); 65 | MSPinfo.genes=table2cell(T(:,1)); 66 | MSPinfo.msp=T.Properties.VariableNames; 67 | MSPinfo.msp=transpose(MSPinfo.msp(1,2:end)); 68 | MSPinfo.expression=table2array(T(1:end ,2:end)); 69 | 70 | % second, generate reaction state (absent/present reaction) for bacteria 71 | % and prune the genes based on bacterial (MSP) profile for each species. 72 | for h=1:length(MSPinfo.msp) 73 | if ~exist([SAVEDIR filesep MSPinfo.msp{h} '.mat']) 74 | metagenomeData=struct(); 75 | metagenomeData.gene=MSPinfo.genes; 76 | metagenomeData.value=MSPinfo.expression(:,h); 77 | [RxnState, modelforMSP] = MetagenomeToReactions(microbiomeGEM, metagenomeData); 78 | % save RxnState and modelforMSP in a mat file entitled the 79 | % corresponding bacterium (MSP) 80 | save([SAVEDIR filesep MSPinfo.msp{h} '.mat'],'RxnState','modelforMSP'); 81 | else 82 | disp(['the reaction state and modelforMSP for ' MSPinfo.msp{h} ' are already generated. see your directory']) 83 | end 84 | end 85 | 86 | % collect MSP (bacterial) Information 87 | % get path to where RxnState and modelforMSP for each bacterium (MSP) were 88 | % saved 89 | RXNDIR=SAVEDIR; 90 | % the following function generates MSPInformation, a structure includes the 91 | % following fields: 92 | % taxoLevel, the taxonomy names. taxoInfo, taxonomy information for each 93 | % species. taxoGroup: taxonomy group for bacteria. rxns, the reaction name 94 | % in reference model. bacteria, list of MSP IDs. BacteriaNames, list of 95 | % species names. RxnStateAll, the reaction state (absent/present) for each 96 | % bacteria 97 | 98 | [MSPInformation]= GenerateMSPInformation(taxo,RXNDIR,microbiomeGEM) 99 | 100 | % convet reaction state to reaction score and calculate the threshold for 101 | % gapfilling 102 | for h=1:length(MSPInformation.bacteria) 103 | % add the msp name into MSPInformation 104 | MSPInformation.species=MSPInformation.bacteria{h}; 105 | [reactionScore, threshold] = MetaGenomicsReactionScore(MSPInformation); 106 | % adds new variables to the corresponding MAT-file 107 | save([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'reactionScore','threshold','-append'); 108 | end 109 | %% collecting the bibliome data and the constraining the model 110 | 111 | % if you have any bibliome data about phenotypic features of the bacteria 112 | % that you are making GEM, provide it as a structure with four fields: 113 | % "bacteria" a cell array listing the name of the bacteria; "rxn" list the 114 | % name of the reactions having bibliome; "value" a matrix of numbers: zero 115 | % means no information, 1 means consumed, 2 means produced, -1 not-consumed 116 | % and -2 means not-produced by the corresponding bacteria. "aerobeInfo" a 117 | % cell array provides the info that the bacteria require oxygen for growth 118 | % or not so specefiy with "aerobe" or "anaerobe" or "facultative". 119 | % if you do not provide the information, then the models are just generated 120 | % and tuned based on the reactionScore and threshold. 121 | load(BIBLIOME) 122 | 123 | %collect all exchange and transport reactions 124 | indexEx=strfind(microbiomeGEM.rxns,'Ex'); 125 | IndexEx = find(not(cellfun('isempty',indexEx))); 126 | indexTr=strfind(microbiomeGEM.rxns,'t_'); 127 | IndexTr = find(not(cellfun('isempty',indexTr))); 128 | indexOfTrEx=union(IndexEx,IndexTr); 129 | modelseed='true' %if the reference model is based on KBase or modelSEED 130 | for h=1:length(MSPInformation.bacteria) 131 | load([RXNDIR filesep MSPInformation.bacteria{h} '.mat']) 132 | %to keep all the exchange reactions and transport reactions in the models 133 | %and prune it later, before generating the species-specific GEMs (MAGMA) 134 | %the score of the reaction changed as 1 135 | reactionScore(indexOfTrEx,:)=1; 136 | % add a field, named species, to the biblome for finding the 137 | % corresponding info in the structure including all the bacteria 138 | if exist('bibliome') 139 | bibliome.species=MSPInformation.bacteria{h}; 140 | [contextSpecificModel] = contextSpecificModelGenertion(modelforMSP,reactionScore,threshold,bibliome); 141 | else 142 | [contextSpecificModel] = contextSpecificModelGenertion(modelforMSP,reactionScore,threshold); 143 | end 144 | MSPInformation.species=MSPInformation.bacteria{h}; 145 | [contextSpecificModel, modelInfo] = contextSpecificModelTune(contextSpecificModel,MSPInformation,reactionScore,threshold,modelseed); 146 | %adds new variables to the corresponding MAT-file 147 | save([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'contextSpecificModel','modelInfo','-append'); 148 | end 149 | 150 | % collect modelInfo Tables and save as excel file. regarding the level 151 | % and the pecentage of gapfilling, you can decide which bacterial models 152 | % were generated based on enough gene info and discard the poor models 153 | modelInfoFinal=table() 154 | for h=1:length(MSPInformation.bacteria) 155 | load([RXNDIR filesep MSPInformation.bacteria{h} '.mat'],'modelInfo') 156 | modelInfo=[table(MSPInformation.bacteria(h)) modelInfo]; 157 | modelInfoFinal=[modelInfoFinal;modelInfo]; 158 | end 159 | 160 | % Write Data to Excel Spreadsheets 161 | filename=[RXNDIR filesep 'modelInfoFinal.xlsx'] 162 | writetable(modelInfoFinal,filename,'Sheet',1,'Range','A1') 163 | 164 | % done, congrats. go to PersonalizedMicrobiomeMetabolism.m in tutorials 165 | % directory for integration of bacterial profile into the models to 166 | % investigate the metabolism of microbiome at personalized level. -------------------------------------------------------------------------------- /tutorials/PersonalizedMicrobiomeMetabolism.m: -------------------------------------------------------------------------------- 1 | %% Box-C| generating personalized metabolic microbiome 2 | %#Author: Gholamreza Bidkori, KCL, UK, email: gbidkhori@gmail.com, gholamreza.bidkhori@kcl.ac.uk 3 | %% start 4 | % get path to where the MIGRENE Toolbox is located 5 | MIGDIR = fileparts(which('MIGRENE_pipeline')); 6 | % provide the path to bacterial species (MSP) gene info and bacterial 7 | % abundance obtained from metagenomics analysis 8 | CATDIR=[MIGDIR filesep 'data']; 9 | ABUNDANCE=[CATDIR filesep 'BacterialAbundance.xlsx']; 10 | PATHWAY=[CATDIR filesep 'pathways.xlsx']; 11 | % provide the path to microbiomeGEM (generated in 12 | % IntegrationCatalogToModel.m) and bibliome data. 13 | MATDIR=[MIGDIR filesep 'mat']; 14 | MODEL=[MATDIR filesep 'microbiomeGEM.mat']; 15 | BIBLIOME=[MATDIR filesep 'bibliome.mat']; 16 | % define a directory to save microbiomeGEM 17 | SAVEDIR=[MIGDIR filesep 'saveDir']; 18 | % number of cores specified for parallelization. it can be a positive 19 | % integer or a range specified as a 2-element vector of integers 20 | numWorkers=4; 21 | % for some functions such as FBA simulation, you need to install 22 | % cobra toolbox 23 | initCobraToolbox() 24 | 25 | %% 26 | %load microbiome (MSP) abundance profile. it could be metagenomics or 16s based 27 | [abundance,infoFile,~]=xlsread(ABUNDANCE); 28 | %name of models 29 | modelList = infoFile(2:end,1); 30 | %name of samples 31 | sampleName=infoFile(1,2:end); 32 | %check the samples, 33 | %remove the MSP name if the abundance of bacteria in all samples are zero 34 | abundance=abundance(sum(abundance,2)~=0,:); 35 | modelList=modelList(sum(abundance,2)~=0,:); 36 | %remove the samples if the there is no bacterial abundance 37 | sampleName=sampleName(sum(abundance,1)~=0); 38 | abundance=abundance(:,sum(abundance,1)~=0); 39 | % get the number of bacteria (bacterial richness) in each sample 40 | temp1=abundance; 41 | temp1(find(temp1>0))=1; 42 | BactrialRichness=table(sampleName',sum(temp1,1)'); 43 | BactrialRichness.Properties.VariableNames = {'sampleName','BactrialRichness'}; 44 | 45 | % give the path where the models are available and the name of model assgined in the .mat files 46 | PathToModels.path=SAVEDIR; 47 | PathToModels.name='contextSpecificModel'; 48 | % generate gut microbiome reaction composition (reaction richness) of all individuals 49 | richness= RxnRichnessGenerator(modelList,PathToModels,abundance,sampleName); 50 | 51 | % generate reaction abundance for all individuals; the function generates both reaction abundance 52 | % and relative reaction abundance 53 | [reactionRelativeAbun, rxnAbunPerSample]= ReactionAbundanceGenerator(modelList,PathToModels,abundance,sampleName); 54 | % generate reactobiome for all individuals 55 | countPerFiveBacteria= ReactobiomeGenerator(modelList,PathToModels,abundance,sampleName); 56 | 57 | % for the enrichment analysis, you need to prepare two files: 58 | %1) a file includes pathway terms and the IDs that could be KO,EC,kegg 59 | %reaction ID and etc. here we use the KEGG pathway terms with Kegg reaction 60 | %ID 61 | %provide the pathway profiles 62 | [~,terms,~]=xlsread(PATHWAY); 63 | terms=terms(2:end,:); 64 | %2) a file for ID mapping between reaction names in the models and and 65 | % the IDs in the pathway file. here, we use the info in the reference model. 66 | load(MODEL) 67 | IDmap=[microbiomeGEM.rxns microbiomeGEM.rxnRN]; 68 | Index = find(not(cellfun('isempty',IDmap(:,2)))); 69 | IDmap=IDmap(Index,:); 70 | % if you assign the p-value, then coverage of non-significance terms is set 71 | % as zero. if you dont define the p-value, it returns all. 72 | p_value=0.05; 73 | [coverageRSE,pRSE]= pRSEGenerator(modelList,PathToModels,abundance,sampleName,IDmap,terms,p_value); 74 | % coverage is a table shows the coverage of each pathway in samples 75 | % pRSE is a table shows the p value of the pathways in samples 76 | %% 77 | %community modeling 78 | % define the number of top abundant bacteria for community modeling 79 | %here we generate communities for top 10 bacteria 80 | top=5; 81 | thre=[]; 82 | for i=1:size(abundance,2) 83 | t1=sort(abundance(:,i),1,'descend'); 84 | thre(i,1)=t1(top,1); 85 | abundance(find(abundance(:,i) < thre(i,1)),i)=0; 86 | end 87 | boxplot(thre) 88 | median(thre) 89 | 90 | %specify the metabolite ID and exchange reaction for biomass (optional) 91 | biomass.EXrxn='Ex_Biomass'; 92 | biomass.mets='cpd11416ee[lu]'; 93 | % make a directory to save generated community models 94 | if ~exist([SAVEDIR filesep 'community'],'dir') 95 | mkdir([SAVEDIR filesep 'community']); 96 | end 97 | PathToSave=[SAVEDIR filesep 'community']; 98 | % in report, "one" next to sample name shows that community model has been 99 | % generated for the individuals in PathToSave directory 100 | [report]= MakeCommunity(modelList,PathToModels,abundance,sampleName,PathToSave,biomass); 101 | --------------------------------------------------------------------------------