├── README.md ├── Cal_imaging.m ├── PWAS.sh ├── MR_mrpresso.R ├── MR.R └── TimeToEvent.R /README.md: -------------------------------------------------------------------------------- 1 | # UKB_PWAS 2 | Code utilized in the paper "A phenome-wide association and Mendelian randomization study for Alzheimer's disease: a prospective cohort study of 502,493 participants from UK Biobank" 3 | -------------------------------------------------------------------------------- /Cal_imaging.m: -------------------------------------------------------------------------------- 1 | Resultcsd_image=cell(2,size(pheno_dataall,2)); 2 | 3 | for kk=1:size(pheno_dataall,2) 4 | 5 | phenodataori= table2array(pheno_dataall(:,kk)); 6 | 7 | if strfind(class(phenodataori),'cell')==1 8 | phenodataori=str2double(cellstr(phenodataori)); 9 | end 10 | phenodata=phenodataori(~isnan(phenodataori)); 11 | phenoid=pheno_id(~isnan(phenodataori)); 12 | 13 | Cov_model1=(Cov_all(~isnan(phenodataori),:)); 14 | 15 | final_id = intersect_multi({phenoid;newimageid;site_id;imageid_aseg}); 16 | 17 | [~,index]=intersect(phenoid,final_id); 18 | cov_final=Cov_model1(index,:); 19 | 20 | 21 | [~, index] = intersect(site_id, final_id); 22 | site_cov_final = site_cov(index,:); 23 | 24 | cov_final2 = [cov_final, site_cov_final]; 25 | 26 | [~,index]=intersect(phenoid,final_id); 27 | pheno_final=phenodata(index,:); 28 | 29 | 30 | [~,index]=intersect(newimageid,final_id); 31 | UKB_area_final=UKB_aparc_Area(index,1:end-1); 32 | UKB_thickness_final=UKB_aparc_Thickness(index,1:end-1); 33 | UKB_volume_final=UKB_aparc_Volume(index,1:end-1); 34 | UKB_aparc=[UKB_area_final UKB_thickness_final UKB_volume_final]; 35 | aparcname=UKB_aparc.Properties.VariableNames; 36 | UKB_aparc_final=double(cell2mat(table2cell(UKB_aparc))); 37 | 38 | [~,index]=intersect(imageid_aseg,final_id); 39 | UKB_aseg_final=double(cell2mat(table2cell(UKB_aseg_Volume(index,2:46)))); 40 | asegname=UKB_aseg_Volume.Properties.VariableNames; 41 | 42 | C_value_aparc = nan(204,1); 43 | P_value_aparc= nan(204,1); 44 | 45 | parfor i=1:size(UKB_aparc,2) 46 | i 47 | Covariate = cov_final2; 48 | 49 | [C_value_aparc(i,:), P_value_aparc(i,:)] = partialcorr(UKB_aparc_final(:,i),pheno_final,Covariate,'rows','complete'); 50 | 51 | 52 | end 53 | Resultcsd_image{1,kk}=[aparcname' num2cell(C_value_aparc) num2cell(P_value_aparc)]; 54 | 55 | 56 | C_value_aseg = nan(45,1); 57 | P_value_aseg= nan(45,1); 58 | 59 | parfor i=1:size(UKB_aseg_final,2) 60 | i 61 | Covariate = cov_final2; 62 | 63 | [C_value_aseg(i,:), P_value_aseg(i,:)] = partialcorr(UKB_aseg_final(:,i),pheno_final,Covariate,'rows','complete'); 64 | end 65 | Resultcsd_image{2,kk}=[asegname(2:46)' num2cell(C_value_aseg) num2cell(P_value_aseg)]; 66 | 67 | 68 | end -------------------------------------------------------------------------------- /PWAS.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 5 | --phenofile="/home1/PWAS/ukbdata.csv" \ 6 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 7 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 8 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 9 | --traitofinterest="pT_1" \ 10 | --resDir="/home1/PWAS/result/" \ 11 | --sensitivity \ 12 | --userId="userId" 13 | 14 | 15 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 16 | --phenofile="/home1/PWAS/ukbdata.csv" \ 17 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 18 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 19 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 20 | --traitofinterest="pT_0.1" \ 21 | --resDir="/home1/PWAS/result/" \ 22 | --sensitivity \ 23 | --userId="userId" 24 | 25 | 26 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 27 | --phenofile="/home1/PWAS/ukbdata.csv" \ 28 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 29 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 30 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 31 | --traitofinterest="pT_0.01" \ 32 | --resDir="/home1/PWAS/result/" \ 33 | --sensitivity \ 34 | --userId="userId" 35 | 36 | 37 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 38 | --phenofile="/home1/PWAS/ukbdata.csv" \ 39 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 40 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 41 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 42 | --traitofinterest="pT_0.001" \ 43 | --resDir="/home1/PWAS/result/" \ 44 | --sensitivity \ 45 | --userId="userId" 46 | 47 | 48 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 49 | --phenofile="/home1/PWAS/ukbdata.csv" \ 50 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 51 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 52 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 53 | --traitofinterest="pT_0.5" \ 54 | --resDir="/home1/PWAS/result/" \ 55 | --sensitivity \ 56 | --userId="userId" 57 | 58 | 59 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 60 | --phenofile="/home1/PWAS/ukbdata.csv" \ 61 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 62 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 63 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 64 | --traitofinterest="pT_0.05" \ 65 | --resDir="/home1/PWAS/result/" \ 66 | --sensitivity \ 67 | --userId="userId" 68 | 69 | 70 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 71 | --phenofile="/home1/PWAS/ukbdata.csv" \ 72 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 73 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 74 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 75 | --traitofinterest="pT_0.005" \ 76 | --resDir="/home1/PWAS/result/" \ 77 | --sensitivity \ 78 | --userId="userId" 79 | 80 | 81 | Rscript /home1/PWAS/PHESANT-master/WAS/phenomeScan.r \ 82 | --phenofile="/home1/PWAS/ukbdata.csv" \ 83 | --traitofinterestfile="/home1/PWAS/PRS_8pT.csv" \ 84 | --variablelistfile="/home1/PWAS/PHESANT-master/variable-info/outcome-info.tsv" \ 85 | --datacodingfile="/home1/PWAS/PHESANT-master/variable-info/data-coding-ordinal-info.txt" \ 86 | --traitofinterest="pT_0.0005" \ 87 | --resDir="/home1/PWAS/result/" \ 88 | --sensitivity \ 89 | --userId="userId" -------------------------------------------------------------------------------- /MR_mrpresso.R: -------------------------------------------------------------------------------- 1 | ###MR presso 2 | 3 | library(TwoSampleMR) 4 | library(ieugwasr) 5 | library(dplyr) 6 | library(simex) 7 | library(MRPRESSO) 8 | 9 | setwd("/AD_phewas/MR_mrpresso") 10 | #0.create folders 11 | dir.create("exposure_resources") 12 | dir.create("save_harmonise_dat") 13 | dir.create("save_report()/") 14 | dir.create("save_result_combine_individ/") 15 | 16 | 17 | 18 | #----------------------------------------------------------------# 19 | #-----------------Before analysis--------------------------------# 20 | #----------------------------------------------------------------# 21 | 22 | #read exposure variable description file 23 | variablefile <- read.csv('wget_varname_link.csv') 24 | IEU_ID <-variablefile %>% filter(IEU_WEBSITE_ID!='')%>% subset(select=c(IEU_WEBSITE_ID)) 25 | IEU_ID <- IEU_ID$IEU_WEBSITE_ID 26 | IEU_ID <- as.character(IEU_ID) 27 | 28 | 29 | #----------------------------------------------------------------# 30 | #-----------------Start analysis---------------------------------# 31 | #----------------------------------------------------------------# 32 | result_combine_f <- data.frame() 33 | 34 | for (i in IEU_ID) { 35 | #1. read exposure 36 | exposure_dat <- read.csv(paste0("/AD_phewas/MR/exposure_resources/",i,".clumped.csv")) 37 | trait1.exposure_data_clumped <- exposure_dat 38 | if (!is.null(exposure_dat)) { 39 | #2.read outcome 40 | outcome_dat <- extract_outcome_data(snps=trait1.exposure_data_clumped$SNP, outcomes="ieu-b-2") 41 | if (!is.null(outcome_dat)) { 42 | #3.harmonise exposure and outcome 43 | trait1.trait2.dat <- harmonise_data(trait1.exposure_data_clumped , outcome_dat) 44 | 45 | if(is.na(trait1.trait2.dat$samplesize.exposure[1])) { 46 | trait1.trait2.dat$samplesize.exposure <- variablefile$exposure.splsize[variablefile$IEU_WEBSITE_ID==i] 47 | } 48 | 49 | if(is.na(trait1.trait2.dat$samplesize.outcome[1])) { 50 | trait1.trait2.dat$samplesize.outcome <- 63926 51 | } 52 | #4.MR analysis and generate OR 53 | trait1.trait2.results <- mr(trait1.trait2.dat) 54 | trait1.trait2.results.withOR = generate_odds_ratios(trait1.trait2.results) 55 | result_combine <- filter(trait1.trait2.results.withOR, method=='Inverse variance weighted') 56 | 57 | if (nrow(trait1.trait2.dat)>2) { 58 | 59 | trait1.trait2.dat.run_mr_presso = run_mr_presso(trait1.trait2.dat,NbDistribution = 8000) 60 | result_combine$MR_PRESSO.Results.Global.pval <- trait1.trait2.dat.run_mr_presso[[1]]$`MR-PRESSO results`$`Global Test`$Pvalue 61 | result_combine$Main_MR_results.Effect_Raw <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$`Causal Estimate`[1] 62 | result_combine$Main_MR_results.Sd_Raw <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$Sd[1] 63 | result_combine$Main_MR_results.pval_Raw <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$`P-value`[1] 64 | result_combine$Main_MR_results.Effect_Corre <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$`Causal Estimate`[2] 65 | result_combine$Main_MR_results.Sd_Corre <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$Sd[2] 66 | result_combine$Main_MR_results.pval_Corre <- trait1.trait2.dat.run_mr_presso[[1]]$`Main MR results`$`P-value`[2] 67 | result_combine$MR_PRESSO.Results.Distortion.pval <- trait1.trait2.dat.run_mr_presso[[1]]$`MR-PRESSO results`$`Distortion Test`$Pvalue 68 | if (is.null(result_combine$MR_PRESSO.Results.Distortion.pval)) {result_combine$MR_PRESSO.Results.Distortion.pval <- NA} 69 | result_combine$MR_PRESSO.Results.Global.pval <- as.character(result_combine$MR_PRESSO.Results.Global.pval) 70 | result_combine$MR_PRESSO.Results.Distortion.pval <- as.character(result_combine$MR_PRESSO.Results.Distortion.pval) 71 | 72 | result_combine_f <- bind_rows(result_combine_f,result_combine) 73 | write.csv(result_combine_f,'/AD_phewas/MR_mrpresso/save_result_combine_individ/result_combine_run.csv',row.names=FALSE) 74 | } 75 | } 76 | } 77 | } 78 | 79 | 80 | -------------------------------------------------------------------------------- /MR.R: -------------------------------------------------------------------------------- 1 | #MR analysis 2 | #mrpresso was performed seperately in another script for its low processing speed 3 | 4 | library(TwoSampleMR) 5 | library(ieugwasr) 6 | library(dplyr) 7 | library(simex) 8 | 9 | setwd("/AD_phewas/MR") 10 | #0.0 create folders 11 | dir.create("exposure_resources") 12 | dir.create("save_harmonise_dat") 13 | dir.create("save_report()") 14 | dir.create("save_result_combine_individ") 15 | 16 | 17 | #0.1 setting for local clump 18 | devtools::install_github("explodecomputer/genetics.binaRies") 19 | genetics.binaRies::get_plink_binary() 20 | 21 | 22 | #----------------------------------------------------------------# 23 | #----------------- Befor analysis--------------------------------# 24 | #----------------------------------------------------------------# 25 | 26 | 27 | #read exposure variable description file 28 | variablefile <- read.csv('wget_varname_link.csv') 29 | IEU_ID <-variablefile %>% filter(IEU_WEBSITE_ID!='')%>% subset(select=c(IEU_WEBSITE_ID)) 30 | IEU_ID <- IEU_ID$IEU_WEBSITE_ID 31 | IEU_ID <- as.character(IEU_ID) 32 | 33 | 34 | #----------------------------------------------------------------# 35 | #-----------------Start analysis---------------------------------# 36 | #----------------------------------------------------------------# 37 | result_combine_f <- data.frame() 38 | df_norun <- data.frame() 39 | for (i in IEU_ID) { 40 | #1.read exposure 41 | exposure_dat<- extract_instruments(i,clump = FALSE) 42 | if (!is.null(exposure_dat)) { 43 | trait1.exposure_data <- exposure_dat 44 | write.csv(trait1.exposure_data,paste0("/AD_phewas/MR/exposure_resources/",i,".csv"),row.names = FALSE) 45 | #local_clump 46 | trait1.exposure_data <- rename(trait1.exposure_data, 47 | rsid=SNP, 48 | pval=pval.exposure, 49 | trait_id=id.exposure) 50 | trait1.exposure_data_clumped <- ld_clump(trait1.exposure_data, 51 | bfile="/AD_phewas/MR/1000G_reference_pannal/EUR/EUR", 52 | plink_bin = genetics.binaRies::get_plink_binary()) 53 | trait1.exposure_data_clumped <- rename(trait1.exposure_data_clumped, 54 | SNP=rsid, 55 | pval.exposure=pval, 56 | id.exposure=trait_id) 57 | write.csv(trait1.exposure_data_clumped,paste0("/AD_phewas/MR/exposure_resources/",i,".clumped.csv"),row.names = FALSE) 58 | 59 | 60 | #2.read outcome 61 | outcome_dat <- extract_outcome_data(snps=trait1.exposure_data_clumped$SNP, outcomes="ieu-b-2") 62 | if (!is.null(outcome_dat)) { 63 | #3.harmonise exposure and outcome 64 | trait1.trait2.dat <- harmonise_data(trait1.exposure_data_clumped , outcome_dat) 65 | 66 | if(is.na(trait1.trait2.dat$samplesize.exposure[1])) { 67 | trait1.trait2.dat$samplesize.exposure <- variablefile$exposure.splsize[variablefile$IEU_WEBSITE_ID==i] 68 | } 69 | 70 | if(is.na(trait1.trait2.dat$samplesize.outcome[1])) { 71 | trait1.trait2.dat$samplesize.outcome <- variablefile$exposure.outsize[variablefile$IEU_WEBSITE_ID==i] 72 | } 73 | write.csv(trait1.trait2.dat,paste0("/AD_phewas/MR/save_harmonise_dat/",i,"-ieu-b-2.harmonise.csv"),row.names = FALSE) 74 | #4.MR analysis and generate OR 75 | trait1.trait2.results <- mr(trait1.trait2.dat) 76 | trait1.trait2.results.withOR = generate_odds_ratios(trait1.trait2.results) 77 | 78 | 79 | if (nrow(trait1.trait2.dat)>1) { 80 | #5.turn the results into one row 81 | result_combine <- filter(trait1.trait2.results.withOR, method=='Inverse variance weighted') 82 | result_MREGGER <- filter(trait1.trait2.results.withOR, method=='MR Egger') 83 | result_WMe <- filter(trait1.trait2.results.withOR, method=='Weighted median') 84 | result_WMo<- filter(trait1.trait2.results.withOR, method=='Weighted mode') 85 | result_SM <- filter(trait1.trait2.results.withOR, method=='Simple mode') 86 | result_combine <- left_join(result_combine,result_MREGGER,by=c("id.exposure","id.outcome","outcome","exposure")) 87 | result_combine <- left_join(result_combine,result_WMe,by=c("id.exposure","id.outcome","outcome","exposure")) 88 | result_combine <- left_join(result_combine,result_WMo,by=c("id.exposure","id.outcome","outcome","exposure")) 89 | result_combine <- left_join(result_combine,result_SM,by=c("id.exposure","id.outcome","outcome","exposure")) 90 | 91 | #6.MR-EGGER INTERCEPT 92 | trait1.trait2.dat.mr_pleiotropy_test = mr_pleiotropy_test(trait1.trait2.dat) 93 | result_combine$egger_intercept <- trait1.trait2.dat.mr_pleiotropy_test$egger_intercept 94 | result_combine$egger_intercept.pval <- trait1.trait2.dat.mr_pleiotropy_test$pval 95 | 96 | if (nrow(trait1.trait2.dat)>2) { 97 | #7.MR-EGGER SIMEX 98 | BetaYG = trait1.trait2.dat$beta.outcome 99 | BetaXG = trait1.trait2.dat$beta.exposure 100 | seBetaYG = trait1.trait2.dat$se.outcome 101 | seBetaXG = trait1.trait2.dat$se.exposure 102 | Fit2 = lm(BetaYG~BetaXG,weights=1/seBetaYG^2,x=TRUE,y=TRUE) 103 | mod.sim <- simex(Fit2,B=1000,measurement.error = seBetaXG,SIMEXvariable="BetaXG",fitting.method ="quad",asymptotic="FALSE") 104 | summary(mod.sim) 105 | simex.beta = summary(mod.sim)[[1]]$jackknife[2] 106 | simex.se = summary(mod.sim)[[1]]$jackknife[4] 107 | simex.p = summary(mod.sim)[[1]]$jackknife[8] 108 | result_combine$isq = Isq(trait1.trait2.results$b[trait1.trait2.results$method == 'MR Egger'],trait1.trait2.results$se[trait1.trait2.results$method == 'MR Egger']) 109 | result_combine$simex.beta <- simex.beta 110 | result_combine$simex.se <- simex.se 111 | result_combine$simex.p <- simex.p 112 | #8.HETERO 113 | trait1.trait2.dat.mr_heterogeneity = mr_heterogeneity(trait1.trait2.dat) 114 | result_combine$MR_Egger.Q <- trait1.trait2.dat.mr_heterogeneity$Q[trait1.trait2.dat.mr_heterogeneity$method == 'MR Egger'] 115 | result_combine$MR_Egger.Q_df <- trait1.trait2.dat.mr_heterogeneity$Q_df[trait1.trait2.dat.mr_heterogeneity$method == 'MR Egger'] 116 | result_combine$MR_Egger.Q_pval <- trait1.trait2.dat.mr_heterogeneity$Q_pval[trait1.trait2.dat.mr_heterogeneity$method == 'MR Egger'] 117 | result_combine$Inverse_variance_weighted.Q <- trait1.trait2.dat.mr_heterogeneity$Q[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 118 | result_combine$Inverse_variance_weighted.Q_df <- trait1.trait2.dat.mr_heterogeneity$Q_df[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 119 | result_combine$Inverse_variance_weighted.Q_pval <- trait1.trait2.dat.mr_heterogeneity$Q_pval[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 120 | } else { 121 | #7.MR-EGGER SIMEX 122 | result_combine$isq = NA 123 | result_combine$isq <- as.numeric(result_combine$isq ) 124 | result_combine$simex.beta <- NA 125 | result_combine$simex.beta <- as.numeric(result_combine$simex.beta) 126 | result_combine$simex.se <- NA 127 | result_combine$simex.se <- as.numeric(result_combine$simex.se ) 128 | result_combine$simex.p <- NA 129 | result_combine$simex.p <- as.numeric(result_combine$simex.p) 130 | #8.HETERO 131 | trait1.trait2.dat.mr_heterogeneity = mr_heterogeneity(trait1.trait2.dat) 132 | result_combine$MR_Egger.Q <- NA 133 | result_combine$MR_Egger.Q <- as.numeric(result_combine$MR_Egger.Q) 134 | result_combine$MR_Egger.Q_df <- NA 135 | result_combine$MR_Egger.Q_df <- as.numeric(result_combine$MR_Egger.Q_df) 136 | result_combine$MR_Egger.Q_pval <- NA 137 | result_combine$MR_Egger.Q_pval <- as.numeric(result_combine$MR_Egger.Q_pval) 138 | result_combine$Inverse_variance_weighted.Q <- trait1.trait2.dat.mr_heterogeneity$Q[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 139 | result_combine$Inverse_variance_weighted.Q_df <- trait1.trait2.dat.mr_heterogeneity$Q_df[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 140 | result_combine$Inverse_variance_weighted.Q_pval <- trait1.trait2.dat.mr_heterogeneity$Q_pval[trait1.trait2.dat.mr_heterogeneity$method == 'Inverse variance weighted'] 141 | 142 | } 143 | #generate report and its path 144 | newpath <- paste0('AD_phewas/MR/save_report()/',i,'__ieu-b-2') 145 | dir.create(newpath) 146 | mr_report(trait1.trait2.dat,output_path = newpath) 147 | 148 | 149 | }else{ 150 | #5.turn the results into one row 151 | result_combine <- filter(trait1.trait2.results.withOR, method=='Wald ratio') 152 | result_MREGGER <- filter(trait1.trait2.results.withOR, method=='MR Egger') 153 | result_WMe <- filter(trait1.trait2.results.withOR, method=='Weighted median') 154 | result_WMo<- filter(trait1.trait2.results.withOR, method=='Weighted mode') 155 | result_SM <- filter(trait1.trait2.results.withOR, method=='Simple mode') 156 | result_combine <- left_join(result_combine,result_MREGGER,by=c("id.exposure","id.outcome","outcome","exposure")) 157 | result_combine <- left_join(result_combine,result_WMe,by=c("id.exposure","id.outcome","outcome","exposure")) 158 | result_combine <- left_join(result_combine,result_WMo,by=c("id.exposure","id.outcome","outcome","exposure")) 159 | result_combine <- left_join(result_combine,result_SM,by=c("id.exposure","id.outcome","outcome","exposure")) 160 | 161 | #6.MR-EGGER INTERCEPT 162 | result_combine$egger_intercept <- NA 163 | result_combine$egger_intercept <- as.numeric(result_combine$egger_intercept) 164 | result_combine$egger_intercept.pval <- NA 165 | result_combine$egger_intercept.pval <- as.numeric(result_combine$egger_intercept.pval) 166 | 167 | #7.MR-EGGER SIMEX 168 | result_combine$isq = NA 169 | result_combine$isq <- as.numeric(result_combine$isq ) 170 | result_combine$simex.beta <- NA 171 | result_combine$simex.beta <- as.numeric(result_combine$simex.beta) 172 | result_combine$simex.se <- NA 173 | result_combine$simex.se <- as.numeric(result_combine$simex.se ) 174 | result_combine$simex.p <- NA 175 | result_combine$simex.p <- as.numeric(result_combine$simex.p) 176 | #8.HETERO 177 | result_combine$MR_Egger.Q <- NA 178 | result_combine$MR_Egger.Q <- as.numeric(result_combine$MR_Egger.Q) 179 | result_combine$MR_Egger.Q_df <- NA 180 | result_combine$MR_Egger.Q_df <- as.numeric(result_combine$MR_Egger.Q_df) 181 | result_combine$MR_Egger.Q_pval <- NA 182 | result_combine$MR_Egger.Q_pval <- as.numeric(result_combine$MR_Egger.Q_pval) 183 | result_combine$Inverse_variance_weighted.Q <- NA 184 | result_combine$Inverse_variance_weighted.Q <- as.numeric(result_combine$Inverse_variance_weighted.Q) 185 | result_combine$Inverse_variance_weighted.Q_df <- NA 186 | result_combine$Inverse_variance_weighted.Q_df <- as.numeric(result_combine$Inverse_variance_weighted.Q_df) 187 | result_combine$Inverse_variance_weighted.Q_pval <- NA 188 | result_combine$Inverse_variance_weighted.Q_pval <- as.numeric(result_combine$Inverse_variance_weighted.Q_pval) 189 | 190 | newpath <- paste0('/AD_phewas/MR/save_report()/',i,'__ieu-b-2/') 191 | dir.create(newpath) 192 | 193 | 194 | } 195 | 196 | #9.other statistics 197 | lor <- trait1.trait2.dat$beta.exposure 198 | af <- filter(plink.freq,SNP %in% trait1.trait2.dat$SNP)$MAF 199 | numbercase= 21982 200 | numbercontrol=41944 201 | ADprevalence=0.04 202 | 203 | trait1.trait2.mr_steiger = mr_steiger2(r_exp = get_r_from_pn(trait1.trait2.dat$pval.exposure,trait1.trait2.dat$samplesize.exposure), 204 | r_out = get_r_from_lor(lor,af,numbercase,numbercontrol,ADprevalence), 205 | n_exp = trait1.trait2.dat$samplesize.exposure, 206 | n_out = trait1.trait2.dat$samplesize.outcome) 207 | result_combine$steigertest_P <- trait1.trait2.mr_steiger$steiger_test 208 | result_combine$causal_dir <- trait1.trait2.mr_steiger$correct_causal_direction 209 | result_combine$steigertest_P.adj <- trait1.trait2.mr_steiger$steiger_test_adj 210 | result_combine$exp.R2 = trait1.trait2.mr_steiger$r2_exp 211 | exp.R2=trait1.trait2.mr_steiger$r2_exp 212 | result_combine$F.stat = (trait1.trait2.dat$samplesize.exposure[1] - dim(trait1.trait2.dat)[1] - 1) / (dim(trait1.trait2.dat)[1]) * exp.R2 / ( 1 - exp.R2 ) 213 | 214 | #10.power calculate 215 | ratio = 41944/ 21982 216 | n = 21982 + 41944 217 | OR = trait1.trait2.results.withOR$or[trait1.trait2.results.withOR$method == 'Inverse variance weighted'|trait1.trait2.results.withOR$method == 'Wald ratio'] 218 | sig = 0.05 219 | rsq = exp.R2 220 | result_combine$power <- pnorm(sqrt(n*rsq*(ratio/(1+ratio))*(1/(1+ratio)))*OR-qnorm(1-sig/2)) 221 | 222 | write.csv(result_combine,paste0('/AD_phewas/MR/save_result_combine_individ/',i,'__ieu-b-2','.csv'),row.names = FALSE) 223 | 224 | #11.single_SNP analysis 225 | setwd(newpath) 226 | trait1.trait2.single_snp_analysis <- mr_singlesnp(trait1.trait2.dat) 227 | trait1.trait2.single_snp_analysis.withOR <- generate_odds_ratios(trait1.trait2.single_snp_analysis) 228 | write.csv(trait1.trait2.single_snp_analysis.withOR,paste0('SSA.',i,'__ieu-b-2','.csv'),row.names = FALSE) 229 | 230 | #12.leave-one-out analysis 231 | trait1.trait2.dat.mr_leaveoneout <- mr_leaveoneout(trait1.trait2.dat) 232 | trait1.trait2.dat.mr_leaveoneout.withOR <- generate_odds_ratios(trait1.trait2.dat.mr_leaveoneout) 233 | write.csv(trait1.trait2.dat.mr_leaveoneout.withOR ,paste0('LOO.',i,'__ieu-b-2','.csv'),row.names = FALSE) 234 | write.csv(result_combine_f,'/AD_phewas/MR/save_result_combine_individ/result_combine_run.csv',row.names=FALSE) 235 | 236 | #combine results 237 | result_combine_f <- bind_rows(result_combine_f,result_combine) 238 | write.csv(result_combine_f,'/AD_phewas/MR/save_result_combine_individ/result_combine_run.csv',row.names=FALSE) 239 | } else { 240 | reason.norun <- 'nosigsnp_in_outcome' 241 | df_norun_add <- data.frame(fieldid=i, 242 | reason.norun =reason.norun ) 243 | df_norun <- rbind.data.frame(df_norun,df_norun_add) 244 | #output the record 245 | write.csv(df_norun,'/AD_phewas/MR/save_result_combine_individ/nosigsnp_ls.csv',row.names=FALSE) 246 | 247 | } 248 | } 249 | else { 250 | reason.norun <- 'nosigsnp_in_exposure' 251 | df_norun_add <- data.frame(fieldid=i, 252 | reason.norun=reason.norun ) 253 | df_norun <- rbind.data.frame(df_norun,df_norun_add) 254 | #output the record 255 | write.csv(df_norun,'/AD_phewas/MR/save_result_combine_individ/nosigsnp_ls.csv',row.names=FALSE) 256 | 257 | } 258 | } 259 | 260 | 261 | 262 | 263 | 264 | 265 | -------------------------------------------------------------------------------- /TimeToEvent.R: -------------------------------------------------------------------------------- 1 | #Time-to-event analysis 2 | library(data.table) 3 | library(dplyr) 4 | 5 | library(survival) 6 | library(ggfortify) 7 | library(survminer) 8 | library(forcats) 9 | 10 | #####0.Prepare survival data ##### 11 | base_surv <- read.csv('/AD_phewas/baseline_table.csv') 12 | 13 | ######0.1 read covar table###### 14 | phesant_covar <- fread('/AD_phewas/phesant_covar.csv') 15 | base_surv <- left_join(base_surv,phesant_covar,by='eid') 16 | base_surv$edu_cat <- factor(base_surv$edu_cat) 17 | str(base_surv$edu_cat) 18 | 19 | ######0.2 sample filter###### 20 | analyzetable <- base_surv 21 | attach(analyzetable) 22 | analyzetable$Age_at_FUend <-Age_at_baseline+AD_day/365 23 | detach(analyzetable) 24 | 25 | table(analyzetable$Age_at_baseline) 26 | analyzetable<- filter(analyzetable,!is.na(AD_day),AD_year!=2037,(AD_status==1|AD_status==2&Age_at_FUend>50)) 27 | median(analyzetable$AD_day) 28 | quantile(analyzetable$AD_day) 29 | table(analyzetable$Age_at_baseline) 30 | hist(analyzetable$AD_day,breaks = 100) 31 | analyzetable <- analyzetable[,-which(colnames(analyzetable)=='Age_at_FUend')] 32 | analyzetable_backup <- analyzetable#backup 33 | 34 | ######0.3 read variable table ###### 35 | temp <- read.csv('/AD phewas/baseline_table_colnames.csv') 36 | 37 | #============================================================================# 38 | #============================================================================# 39 | 40 | #####COX analysis##### 41 | 42 | ######1 COX analysis##### 43 | analyzetable <- filter(analyzetable_backup,AD_day>=1095) 44 | quantile(analyzetable$AD_day) 45 | ######1.1 continuous variables##### 46 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='linear'|calcuType=='ordered') %>% select(fieldid) 47 | var_ofinterest <- var_ofinterest$fieldid 48 | 49 | coeff.old <- data.frame() 50 | for (i in var_ofinterest) { 51 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 52 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,which(names(analyzetable)==i)])) 53 | if (nrow(filter(analyzetable_noNA,sex=='male'))==0|nrow(filter(analyzetable_noNA,sex=='female'))==0) { 54 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','APOE4_new',sep = "+"))) 55 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 56 | test.ph <- cox.zph(pheno_cox_AD) 57 | varid <- i 58 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 59 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 60 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 61 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 62 | singlesex <- 1 63 | samplesize <- nrow(analyzetable_noNA) 64 | test.var <- test.ph$table[1,3] 65 | test.age <- test.ph$table[2,3] 66 | test.sex <- NA 67 | test.apoe4 <- test.ph$table[3,3] 68 | test.global <- test.ph$table[4,3] 69 | coeff.new <- data.frame(varid=varid, 70 | samplesize=samplesize, 71 | coeff=coeff, 72 | lower95=lower95, 73 | upper95=upper95, 74 | pvalue=pvalue, 75 | singlesex=singlesex, 76 | test.var =test.var, 77 | test.age =test.age, 78 | test.sex =test.sex, 79 | test.apoe4 =test.apoe4, 80 | test.global =test.global 81 | ) 82 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 83 | 84 | } 85 | else{ 86 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','sex','APOE4_new',sep = "+"))) 87 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 88 | test.ph <- cox.zph(pheno_cox_AD) 89 | varid <- i 90 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 91 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 92 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 93 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 94 | singlesex <- 0 95 | samplesize <- nrow(analyzetable_noNA) 96 | test.var <- test.ph$table[1,3] 97 | test.age <- test.ph$table[2,3] 98 | test.sex <- test.ph$table[3,3] 99 | test.apoe4 <- test.ph$table[4,3] 100 | test.global <- test.ph$table[5,3] 101 | coeff.new <- data.frame(varid=varid, 102 | samplesize=samplesize, 103 | coeff=coeff, 104 | lower95=lower95, 105 | upper95=upper95, 106 | pvalue=pvalue, 107 | singlesex=singlesex, 108 | test.var =test.var, 109 | test.age =test.age, 110 | test.sex =test.sex, 111 | test.apoe4 =test.apoe4, 112 | test.global =test.global 113 | ) 114 | 115 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 116 | 117 | } 118 | } 119 | 120 | coeff.1 <- coeff.old 121 | coeff.1 122 | 123 | ######1.2 binary variables##### 124 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='binary') %>% select(fieldid) 125 | var_ofinterest <- var_ofinterest$fieldid 126 | 127 | coeff.old <- data.frame() 128 | for (i in var_ofinterest) { 129 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 130 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,which(names(analyzetable)==i)])) 131 | if (nrow(filter(analyzetable_noNA,sex=='male'))==0|nrow(filter(analyzetable_noNA,sex=='female'))==0) { 132 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','APOE4_new',sep = "+"))) 133 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 134 | test.ph <- cox.zph(pheno_cox_AD) 135 | varid <- i 136 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 137 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 138 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 139 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 140 | singlesex <- 1 141 | samplesize <- nrow(analyzetable_noNA) 142 | test.var <- test.ph$table[1,3] 143 | test.age <- test.ph$table[2,3] 144 | test.sex <- NA 145 | test.apoe4 <- test.ph$table[3,3] 146 | test.global <- test.ph$table[4,3] 147 | case_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==1) 148 | control_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==0) 149 | coeff.new <- data.frame(varid=varid, 150 | samplesize=paste0(nrow(analyzetable_noNA),'(', 151 | nrow(case_r),'/', 152 | nrow(control_r),')'), 153 | coeff=coeff, 154 | lower95=lower95, 155 | upper95=upper95, 156 | pvalue=pvalue, 157 | singlesex=singlesex, 158 | test.var =test.var, 159 | test.age =test.age, 160 | test.sex =test.sex, 161 | test.apoe4 =test.apoe4, 162 | test.global =test.global 163 | ) 164 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 165 | 166 | } 167 | else{ 168 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','sex','APOE4_new',sep = "+"))) 169 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 170 | test.ph <- cox.zph(pheno_cox_AD) 171 | varid <- i 172 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 173 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 174 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 175 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 176 | singlesex <- 0 177 | samplesize <- nrow(analyzetable_noNA) 178 | test.var <- test.ph$table[1,3] 179 | test.age <- test.ph$table[2,3] 180 | test.sex <- test.ph$table[3,3] 181 | test.apoe4 <- test.ph$table[4,3] 182 | test.global <- test.ph$table[5,3] 183 | case_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==1) 184 | control_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==0) 185 | coeff.new <- data.frame(varid=varid, 186 | samplesize=paste0(nrow(analyzetable_noNA),'(', 187 | nrow(case_r),'/', 188 | nrow(control_r),')'), 189 | coeff=coeff, 190 | lower95=lower95, 191 | upper95=upper95, 192 | pvalue=pvalue, 193 | singlesex=singlesex, 194 | test.var =test.var, 195 | test.age =test.age, 196 | test.sex =test.sex, 197 | test.apoe4 =test.apoe4, 198 | test.global =test.global 199 | ) 200 | 201 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 202 | 203 | } 204 | } 205 | 206 | coeff.2 <- coeff.old 207 | coeff.2 208 | 209 | 210 | ######1.3 multilevel variables##### 211 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='multilevel') %>% select(fieldid) 212 | var_ofinterest <- var_ofinterest$fieldid 213 | var_ofinterest_index <- which(names(analyzetable) %in% var_ofinterest) 214 | 215 | coeff.z <- data.frame() 216 | for ( z in var_ofinterest_index) { 217 | 218 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 219 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,z])) 220 | values0 <- names(table(analyzetable_noNA[,z])) 221 | coeff.combine <- data.frame() 222 | for (j in 0:(length(values0)-2)) { 223 | analyzetable_noNA[,z] <- factor(analyzetable_noNA[,z],levels = values0) 224 | analyzetable_noNA[,z] <- fct_shift(analyzetable_noNA[,z],j) 225 | pheno_cox_AD <- coxph(Surv(AD_day,AD_status)~ analyzetable_noNA[,z] + Age_at_baseline + sex+APOE4_new,data=analyzetable_noNA) 226 | print(summary(pheno_cox_AD)) 227 | test.ph <- cox.zph(pheno_cox_AD) 228 | coeff.old <- data.frame() 229 | for ( l in 1:(length(values0)-1-j)) { 230 | 231 | varid <- paste0(colnames(analyzetable_noNA)[z],':',j+as.numeric(values0[1])+l,'vs',j+as.numeric(values0[1])) 232 | coeff <- summary(pheno_cox_AD)$conf.int[l,1] 233 | lower95 <- summary(pheno_cox_AD)$conf.int[l,3] 234 | upper95 <- summary(pheno_cox_AD)$conf.int[l,4] 235 | pvalue <- summary(pheno_cox_AD)$coefficients[l,5] 236 | singlesex <- NA 237 | test.var <- test.ph$table[1,3] 238 | test.age <- test.ph$table[2,3] 239 | test.sex <- test.ph$table[3,3] 240 | test.apoe4 <- test.ph$table[4,3] 241 | test.global <- test.ph$table[5,3] 242 | 243 | samplesize <- paste0(nrow(filter(analyzetable_noNA,analyzetable_noNA[,z]==j+as.numeric(values0[1]))),'/',nrow(filter(analyzetable_noNA,analyzetable_noNA[,z]==j+as.numeric(values0[1])+l))) 244 | coeff.new <- data.frame(varid=varid, 245 | samplesize=samplesize, 246 | coeff=coeff, 247 | lower95=lower95, 248 | upper95=upper95, 249 | pvalue=pvalue, 250 | singlesex=singlesex, 251 | test.var =test.var, 252 | test.age =test.age, 253 | test.sex =test.sex, 254 | test.apoe4 =test.apoe4, 255 | test.global =test.global) 256 | 257 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 258 | } 259 | coeff.combine <- rbind.data.frame(coeff.combine,coeff.old) 260 | } 261 | coeff.z <- rbind.data.frame(coeff.z,coeff.combine) 262 | } 263 | coeff.z 264 | coeff.3 <- coeff.z 265 | coeff.3 266 | 267 | 268 | ######1.4 combine results##### 269 | coeff.f <- rbind.data.frame(coeff.1,coeff.2,coeff.3) 270 | coeff.f.basicCOX.limitto3yearFU <- coeff.f 271 | 272 | write.csv(coeff.f.basicCOX.limitto3yearFU,'/AD_phewas/coeff.f.basicCOX.limitto3yearFU.csv',row.names = FALSE) 273 | 274 | 275 | #####2.COX analysis covar adjusted##### 276 | 277 | analyzetable <- filter(analyzetable_backup,AD_day>=1095) 278 | analyzetable$Smoking <- factor(analyzetable$Smoking) 279 | analyzetable$Alcohal <- factor(analyzetable$Alcohal) 280 | 281 | ######2.1.continuous variables##### 282 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='linear'|calcuType=='ordered') %>% select(fieldid) 283 | var_ofinterest <- var_ofinterest$fieldid 284 | 285 | coeff.old <- data.frame() 286 | for (i in var_ofinterest) { 287 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 288 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,which(names(analyzetable)==i)]), 289 | !is.na(edu_cat),!is.na(Townsand), 290 | !is.na(Smoking),!is.na(Alcohal),!is.na(BMI)) 291 | if (nrow(filter(analyzetable_noNA,sex=='male'))==0|nrow(filter(analyzetable_noNA,sex=='female'))==0) { 292 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','APOE4_new','edu_cat','Townsand','Smoking','Alcohal','BMI',sep = "+"))) 293 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 294 | 295 | varid <- i 296 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 297 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 298 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 299 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 300 | singlesex <- 1 301 | 302 | samplesize <- nrow(analyzetable_noNA) 303 | coeff.new <- data.frame(varid=varid, 304 | samplesize=samplesize, 305 | coeff=coeff, 306 | lower95=lower95, 307 | upper95=upper95, 308 | pvalue=pvalue, 309 | singlesex=singlesex 310 | ) 311 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 312 | 313 | } 314 | else{ 315 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','sex','APOE4_new','edu_cat','Townsand','Smoking','Alcohal','BMI',sep = "+"))) 316 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 317 | 318 | varid <- i 319 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 320 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 321 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 322 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 323 | singlesex <- 0 324 | samplesize <- nrow(analyzetable_noNA) 325 | coeff.new <- data.frame(varid=varid, 326 | samplesize=samplesize, 327 | coeff=coeff, 328 | lower95=lower95, 329 | upper95=upper95, 330 | pvalue=pvalue, 331 | singlesex=singlesex 332 | ) 333 | 334 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 335 | 336 | } 337 | } 338 | coeff.1 <- coeff.old 339 | coeff.1 340 | 341 | ######2.2.binary variables##### 342 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='binary') %>% select(fieldid) 343 | var_ofinterest <- var_ofinterest$fieldid 344 | 345 | coeff.old <- data.frame() 346 | for (i in var_ofinterest) { 347 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 348 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,which(names(analyzetable)==i)]), 349 | !is.na(edu_cat),is.na(Townsand), 350 | !is.na(Smoking),!is.na(Alcohal),!is.na(BMI)) 351 | if (nrow(filter(analyzetable_noNA,sex=='male'))==0|nrow(filter(analyzetable_noNA,sex=='female'))==0) { 352 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','APOE4_new','edu_cat','Townsand','Smoking','Alcohal','BMI',sep = "+"))) 353 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 354 | 355 | varid <- i 356 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 357 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 358 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 359 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 360 | singlesex <- 1 361 | samplesize <- nrow(analyzetable_noNA) 362 | 363 | case_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==1) 364 | control_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==0) 365 | coeff.new <- data.frame(varid=varid, 366 | samplesize=paste0(nrow(analyzetable_noNA),'(', 367 | nrow(case_r),'/', 368 | nrow(control_r),')'), 369 | coeff=coeff, 370 | lower95=lower95, 371 | upper95=upper95, 372 | pvalue=pvalue, 373 | singlesex=singlesex 374 | ) 375 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 376 | 377 | } 378 | else{ 379 | FML <- as.formula(paste0('Surv(AD_day,AD_status)~',paste(i,'Age_at_baseline','sex','APOE4_new','edu_cat','Townsand','Smoking','Alcohal','BMI',sep = "+"))) 380 | pheno_cox_AD <- coxph(FML,data=analyzetable_noNA) 381 | varid <- i 382 | 383 | coeff <- summary(pheno_cox_AD)$conf.int[1,1] 384 | lower95 <- summary(pheno_cox_AD)$conf.int[1,3] 385 | upper95 <- summary(pheno_cox_AD)$conf.int[1,4] 386 | pvalue <- summary(pheno_cox_AD)$coefficients[1,5] 387 | singlesex <- 0 388 | samplesize <- nrow(analyzetable_noNA) 389 | 390 | case_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==1) 391 | control_r <- filter(analyzetable_noNA,analyzetable_noNA[,which(names(analyzetable_noNA)==i)]==0) 392 | coeff.new <- data.frame(varid=varid, 393 | samplesize=paste0(nrow(analyzetable_noNA),'(', 394 | nrow(case_r),'/', 395 | nrow(control_r),')'), 396 | coeff=coeff, 397 | lower95=lower95, 398 | upper95=upper95, 399 | pvalue=pvalue, 400 | singlesex=singlesex 401 | 402 | ) 403 | 404 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 405 | 406 | } 407 | } 408 | 409 | coeff.2 <- coeff.old 410 | coeff.2 411 | 412 | 413 | ######2.3.multilevel variables##### 414 | var_ofinterest <- temp %>% filter(application_type=='phewas.selected_exposure',calcuType=='multilevel') %>% select(fieldid) 415 | var_ofinterest <- var_ofinterest$fieldid 416 | var_ofinterest_index <- which(names(analyzetable) %in% var_ofinterest) 417 | 418 | coeff.z <- data.frame() 419 | for ( z in var_ofinterest_index) { 420 | 421 | analyzetable_noNA <- filter(analyzetable, !is.na(Age_at_baseline),!is.na(sex),!is.na(APOE4_new), 422 | !is.na(AD_status),!is.na(AD_day),!is.na(analyzetable[,z]), 423 | !is.na(edu_cat),!is.na(Townsand), 424 | !is.na(Smoking),!is.na(Alcohal),!is.na(BMI)) 425 | values0 <- names(table(analyzetable_noNA[,z])) 426 | coeff.combine <- data.frame() 427 | for (j in 0:(length(values0)-2)) { 428 | analyzetable_noNA[,z] <- factor(analyzetable_noNA[,z],levels = values0) 429 | analyzetable_noNA[,z] <- fct_shift(analyzetable_noNA[,z],j) 430 | pheno_cox_AD <- coxph(Surv(AD_day,AD_status)~ analyzetable_noNA[,z] + Age_at_baseline + sex+APOE4_new+edu_cat+Townsand+Smoking+Alcohal+BMI,data=analyzetable_noNA) 431 | 432 | coeff.old <- data.frame() 433 | for ( l in 1:(length(values0)-1-j)) { 434 | 435 | varid <- paste0(colnames(analyzetable_noNA)[z],':',j+as.numeric(values0[1])+l,'vs',j+as.numeric(values0[1])) 436 | coeff <- summary(pheno_cox_AD)$conf.int[l,1] 437 | lower95 <- summary(pheno_cox_AD)$conf.int[l,3] 438 | upper95 <- summary(pheno_cox_AD)$conf.int[l,4] 439 | pvalue <- summary(pheno_cox_AD)$coefficients[l,5] 440 | singlesex <- NA 441 | 442 | 443 | samplesize <- paste0(nrow(filter(analyzetable_noNA,analyzetable_noNA[,z]==j+as.numeric(values0[1])+l)),'/',nrow(filter(analyzetable_noNA,analyzetable_noNA[,z]==j+as.numeric(values0[1])))) 444 | coeff.new <- data.frame(varid=varid, 445 | samplesize=samplesize, 446 | coeff=coeff, 447 | lower95=lower95, 448 | upper95=upper95, 449 | pvalue=pvalue, 450 | singlesex=singlesex 451 | ) 452 | 453 | coeff.old <- rbind.data.frame(coeff.old,coeff.new) 454 | } 455 | coeff.combine <- rbind.data.frame(coeff.combine,coeff.old) 456 | } 457 | coeff.z <- rbind.data.frame(coeff.z,coeff.combine) 458 | } 459 | coeff.z 460 | coeff.3 <- coeff.z 461 | coeff.3 462 | 463 | ######2.4.combine results##### 464 | coeff.f <- rbind.data.frame(coeff.1,coeff.2,coeff.3) 465 | coeff.f.crct <- coeff.f 466 | 467 | write.csv(coeff.f.educrct,'/AD_phewas/coeff.f.crct.csv',row.names = FALSE) 468 | 469 | #============================================================================# 470 | #============================================================================# 471 | 472 | 473 | 474 | 475 | --------------------------------------------------------------------------------