├── 1c_imporFG.R
├── 3_extract_all_outcomes.R
├── 1aa_SDtimmers.R
├── 1ee_importpressure.R
├── 1b_importeGFR.R
├── 1a_importlotta.R
├── 1bb_importeGFRSD.R
├── 7a_writetext.R
├── 6_create_supplementary_tables.R
├── 2_import_instrument.R
├── 4_harmonise_and_mrresult.R
└── 5_create_plots_figures.R


/1c_imporFG.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | library(data.table)
 3 | library(GagnonMR)
 4 | library(tidyverse)
 5 | 
 6 | setwd("/mnt/sda/gagelo01/Projects/small_MR_exploration/fg_BMI")
 7 | 
 8 | fg <- fread("Data/Raw/FG_combined_1000G_density_formatted_21-03-29.txt.gz")
 9 | setnames(fg, c("a1", "a2"), c("other_allele", "effect_allele"))
10 | 
11 | traduction = fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
12 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
13 | traduction[, maf := NULL]
14 | 
15 | 
16 | GagnonMR::formattovcf_createindex(all_out = fg,
17 |                                   snp_col = "rsid",
18 |                                   outcome_name = "Fasting_Glucose",
19 |                                   beta_col = "beta",
20 |                                   se_col = "se",
21 |                                   pval_col = "p-value",
22 |                                   eaf_col = NULL,
23 |                                   effect_allele_col = "effect_allele",
24 |                                   other_allele_col =  "other_allele",
25 |                                   ncase_col = NULL,
26 |                                   ncontrol_col = NULL,
27 |                                   samplesize_col = "n",
28 |                                   chr_col = NULL,
29 |                                   pos_col = NULL,
30 |                                   units = "natural logarithm transformed FI measured in pmol/L",
31 |                                   traduction = traduction,
32 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
33 |                                   df_index = fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt"),
34 |                                   group_name = "public",
35 |                                   year = 2021,
36 |                                   author = "Lagou V",
37 |                                   consortium = "MAGIC",
38 |                                   sex = "Males and Females",
39 |                                   population = "European",
40 |                                   initial_build = "HG19/GRCh37",
41 |                                   category = "Trait",
42 |                                   pmid = 33402679,
43 |                                   note = NA,
44 |                                   should_create_id = TRUE,
45 |                                   ID = NA )
46 | 
47 | 
48 | message("This script finished without error")
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/3_extract_all_outcomes.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | library(data.table)
 3 | library(tidyverse)
 4 | library(GagnonMR)
 5 | library(furrr)
 6 | 
 7 | setwd("/mnt/sda/gagelo01/Projects/Dysbiose_project")
 8 | gwasvcf::set_bcftools()
 9 | gwasvcf::set_plink()
10 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
11 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
12 | ao <- fread("/mnt/sda/gagelo01/Vcffile/available_outcomes_2021-10-13.txt")
13 | ao_small <- ao[id %in% list.files("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/")]
14 | ao_small[pmid == 32203549, unit := "SD"]
15 | 
16 | inst_clump <- fread( "Data/Modified/inst_clump.txt")
17 | 
18 | 
19 | ID_mrbase_out <-  c("ieu-b-109", "ieu-b-110", "ieu-b-111", "ukb-b-19953","ukb-b-12141")
20 | outcomes_mrbase <- paste0("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/", ID_mrbase_out, "/", ID_mrbase_out, ".vcf.gz")
21 | ID_server_out <- c("trait-2-2", "trait-6-1", "trait-7-2", "trait-2-4", "trait-12-2", 
22 |                    "dis-2-1", "dis-3-1", "dis-4-1", "dis-5-1", "dis-6-1", "dis-7-1", "dis-8-1",
23 |                    "trait-13-1", "trait-13-2")
24 | outcomes_server <- paste0("/mnt/sda/gagelo01/Vcffile/Server_vcf/", ID_server_out, "/", ID_server_out, ".vcf.gz")
25 | 
26 | 
27 | options(future.globals.maxSize= 5e9)
28 | plan(multisession, workers = 9, gc = TRUE) #I should try using multicore
29 | 
30 | 
31 | # test <- gwasvcf::query_gwas(vcf = c(outcomes_mrbase, outcomes_server)[1], rsid = unique(inst_clump$SNP),
32 | #                             proxies = "yes", bfile = ldref)
33 | # test %>% 
34 | #   gwasglue::gwasvcf_to_TwoSampleMR(., "outcome") %>%
35 | #   data.table::as.data.table(.)
36 | 
37 | outcome_all <- future_map(as.list(c(outcomes_mrbase, outcomes_server)), function(x, rsiid = unique(inst_clump$SNP)) {
38 |  
39 |   gwasvcf::set_bcftools()
40 |   gwasvcf::set_plink()
41 |    res <- gwasvcf::query_gwas(vcf = x, rsid = rsiid, proxies = "yes", bfile = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs",
42 |                               tag_r2 = 0.8) %>% 
43 |     gwasglue::gwasvcf_to_TwoSampleMR(., "outcome") %>%
44 |     data.table::as.data.table(.)
45 |   return(res)
46 | }, .options = furrr_options(seed = TRUE)) %>% rbindlist(.,fill = TRUE)
47 | 
48 | add_n <- c("ieu-b-109", "ieu-b-110", "ieu-b-111") 
49 | for(i in 1:length(add_n)) {
50 |   outcome_all[outcome %in% add_n[i], samplesize.outcome := ao_small[id %in% add_n[i],][,.(sample_size)]]
51 | }
52 | 
53 | outcome_all[,outcome := outcome %>% ifelse(grepl("UKB-b-19953|UKB-b-12141", .), tolower(.), .)]
54 | k<-ao_small[id %in% ID_mrbase_out, .(id, unit)]
55 | setnames(k, "id", "trait")
56 | theunits <- rbind(df_index[id %in% ID_server_out,.(trait, unit)], k)
57 | setnames(theunits, "unit", "units.outcome")
58 | mirge <- merge(outcome_all, theunits, by.x = "outcome", by.y = "trait")
59 | mirge[outcome %in% "Stanzick_eGFR", units.outcome := "SD"]
60 | fwrite(mirge, "Data/Modified/outcome_all")
61 | 
62 | print("this script finished without errors")


--------------------------------------------------------------------------------
/1aa_SDtimmers.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | library(data.table)
 3 | library(GagnonMR)
 4 | library(tidyverse)
 5 | 
 6 | 
 7 | gwasvcf::set_bcftools()
 8 | gwasvcf::set_plink()
 9 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
10 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
11 | df_index[id == "trait-7-1", ]
12 | 
13 | ############onlt change this section
14 | ancient_id<- "trait-7-1"
15 | new_id<-"trait-7-2"
16 | ###################
17 | 
18 | inst <- get_inst(vcffile = paste0("/mnt/sda/gagelo01/Vcffile/Server_vcf/", ancient_id, "/", ancient_id, ".vcf.gz"))
19 | sd <- coloc:::sdY.est(vbeta = (inst$se.exposure)^2,
20 |                       maf = inst[, ifelse(eaf.exposure > 0.5,1-eaf.exposure,eaf.exposure)],
21 |                       n = inst$samplesize.exposure)
22 | 
23 | sd
24 | 
25 | newrow <- df_index[id == ancient_id, ]
26 | newrow[, id := new_id]
27 | newrow[, note := "standardised with coloc"]
28 | newrow[, unit := "SD"]
29 | df_index <- rbind(df_index, newrow)
30 | 
31 | vcf <- VariantAnnotation::readVcf(paste0("/mnt/sda/gagelo01/Vcffile/Server_vcf/", ancient_id, "/", ancient_id, ".vcf.gz"))
32 | tsmr <- vcf %>% gwasglue::gwasvcf_to_TwoSampleMR(.) %>% as.data.table(.)
33 | tsmr[, beta.exposure := beta.exposure / sd]
34 | tsmr[, se.exposure := se.exposure / sd]
35 | 
36 | GagnonMR::formattovcf_createindex(all_out = tsmr,
37 |                                   snp_col = "SNP",
38 |                                   outcome_name = newrow[,trait],
39 |                                   beta_col = "beta.exposure",
40 |                                   se_col = "se.exposure",
41 |                                   pval_col = "pval.exposure",
42 |                                   eaf_col = "eaf.exposure",
43 |                                   effect_allele_col = "effect_allele.exposure",
44 |                                   other_allele_col =  "other_allele.exposure",
45 |                                   ncase_col = NULL,
46 |                                   ncontrol_col = NULL,
47 |                                   samplesize_col = "samplesize.exposure",
48 |                                   chr_col = "chr.exposure",
49 |                                   pos_col = "pos.exposure",
50 |                                   units = newrow$unit,
51 |                                   traduction = NULL,
52 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
53 |                                   df_index = df_index,
54 |                                   group_name = newrow$group_name,
55 |                                   year = newrow$year,
56 |                                   author = newrow$author,
57 |                                   consortium = newrow$consortium,
58 |                                   sex = newrow$sex,
59 |                                   population = newrow$population,
60 |                                   initial_build = newrow$initial_build,
61 |                                   category = newrow$category,
62 |                                   pmid = newrow$pmid,
63 |                                   note = newrow$note,
64 |                                   should_create_id = FALSE,
65 |                                   ID = new_id)
66 | fwrite(df_index,"/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
67 | message("this script finished without errors")
68 | 


--------------------------------------------------------------------------------
/1ee_importpressure.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | library(data.table)
 3 | library(GagnonMR)
 4 | library(tidyverse)
 5 | 
 6 | gwasvcf::set_bcftools()
 7 | gwasvcf::set_plink()
 8 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
 9 | 
10 | ao <- fread("/mnt/sda/gagelo01/Vcffile/available_outcomes_2021-10-13.txt")
11 | ao_small <- ao[id %in% list.files("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/")]
12 | vec_id <-ao_small[pmid == 30224653, id]
13 | 
14 | ##
15 | traduction = fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
16 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
17 | traduction[, maf := NULL]
18 | 
19 | 
20 | for(i in 1:length(vec_id)) {
21 | 
22 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
23 | vcffile <- paste0("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/", vec_id[i], "/", vec_id[i], ".vcf.gz")
24 | inst<-gwasvcf::query_gwas(vcffile, chrompos = "1:20000-1200000") %>% 
25 |   gwasglue::gwasvcf_to_TwoSampleMR(.) %>% 
26 |   as.data.table(.)
27 | sd <- coloc:::sdY.est(vbeta = (inst$se.exposure)^2,
28 |                       maf = inst[, ifelse(eaf.exposure > 0.5,1-eaf.exposure,eaf.exposure)],
29 |                       n = inst$samplesize.exposure)
30 | 
31 | ##
32 | tsmr <- VariantAnnotation::readVcf(vcffile) %>% 
33 |   gwasglue::gwasvcf_to_TwoSampleMR(.) %>% 
34 |   as.data.table(.)
35 | 
36 | tsmr[,beta.exposure := beta.exposure / sd]
37 | tsmr[,se.exposure := se.exposure / sd]
38 | tsmr[, chr.exposure := as.integer(chr.exposure) ]
39 | 
40 | newrow <- ao_small[id == vec_id[i]]
41 | newrow[, id := paste0("trait-13-", i) ]
42 | newrow[, trait := gsub(" ", "_", trait)]
43 | newrow[,unit := "SD"]
44 | setnames(newrow, "build",  "initial_build")
45 | newrow[,category := "Trait"]
46 | newrow[,note:= paste0("standardised with coloc initially in mmHg with SD ", sd)]
47 | colinclude <- colnames(df_index)
48 | newrow <- newrow[, ..colinclude]
49 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
50 | df_index <- rbind(df_index, newrow)
51 | 
52 | 
53 | GagnonMR::formattovcf_createindex(all_out = tsmr,
54 |                                   snp_col = "SNP",
55 |                                   outcome_name = newrow[, trait],
56 |                                   beta_col = "beta.exposure",
57 |                                   se_col = "se.exposure",
58 |                                   pval_col = "pval.exposure",
59 |                                   eaf_col = "eaf.exposure",
60 |                                   effect_allele_col = "effect_allele.exposure",
61 |                                   other_allele_col =  "other_allele.exposure",
62 |                                   ncase_col = NULL,
63 |                                   ncontrol_col = NULL,
64 |                                   samplesize_col = "samplesize.exposure",
65 |                                   chr_col = "chr.exposure",
66 |                                   pos_col = "pos.exposure",
67 |                                   units = "SD",
68 |                                   traduction = traduction,
69 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
70 |                                   df_index = df_index,
71 |                                   group_name = "public",
72 |                                   year = newrow$year,
73 |                                   author = newrow$author,
74 |                                   consortium = newrow$consortium,
75 |                                   sex = newrow$sex,
76 |                                   population = newrow$population,
77 |                                   initial_build = newrow$initial_build,
78 |                                   category = newrow$category,
79 |                                   pmid = newrow$pmid,
80 |                                   note = newrow$note,
81 |                                   should_create_id = FALSE,
82 |                                   ID = newrow$id )
83 | 
84 | 
85 | fwrite(df_index, "/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
86 | }
87 | 
88 | message("This script finished without error")
89 | 


--------------------------------------------------------------------------------
/1b_importeGFR.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | #import instrument
 3 | library(data.table)
 4 | library(GagnonMR)
 5 | library(tidyverse)
 6 | library(tictoc)
 7 | library(furrr)
 8 | 
 9 | gwasvcf::set_bcftools()
10 | gwasvcf::set_plink()
11 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
12 | ao <-  fread("/mnt/sda/gagelo01/Vcffile/available_outcomes_2021-10-13.txt")
13 | ao[grepl("glomerular", tolower(trait))][population == "European"]
14 | setwd("/mnt/sda/gagelo01/Projects/Dysbiose_project")
15 | 
16 | 
17 | wuttke <- fread("Data/Raw/20171017_MW_eGFR_overall_EA_nstud42.dbgap.txt.gz")
18 | stanzick <- fread("Data/Raw/metal_eGFR_meta_ea1.TBL.map.annot.gc.gz")
19 | 
20 | traduction <- fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
21 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
22 | traduction[, maf := NULL]
23 | 
24 | 
25 | GagnonMR::formattovcf_createindex(all_out = wuttke,
26 |                                   snp_col = "RSID",
27 |                                   outcome_name = "wuttke_eGFR",
28 |                                   beta_col = "Effect",
29 |                                   se_col = "StdErr",
30 |                                   pval_col = "P-value",
31 |                                   eaf_col = "Freq1",
32 |                                   effect_allele_col = "Allele1",
33 |                                   other_allele_col =  "Allele2",
34 |                                   ncase_col = NULL,
35 |                                   ncontrol_col = NULL,
36 |                                   samplesize_col = "n_total_sum",
37 |                                   chr_col = "Chr",
38 |                                   pos_col = "Pos_b37",
39 |                                   units = "log(eGFR)",
40 |                                   traduction = traduction,
41 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
42 |                                   df_index = df_index,
43 |                                   group_name = "public",
44 |                                   year = 2019,
45 |                                   author = "Wuttke Matthias",
46 |                                   consortium = "CKDGEN",
47 |                                   sex = "Males and Females",
48 |                                   population = "European",
49 |                                   initial_build = "HG19/GRCh37",
50 |                                   category = "Trait",
51 |                                   pmid = 31152163,
52 |                                   note = "",
53 |                                   should_create_id = TRUE,
54 |                                   ID = NA)
55 | 
56 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
57 | 
58 | 
59 | GagnonMR::formattovcf_createindex(all_out = stanzick,
60 |                                   snp_col = "RSID",
61 |                                   outcome_name = "Stanzick_eGFR",
62 |                                   beta_col = "Effect",
63 |                                   se_col = "StdErr",
64 |                                   pval_col = "P.value",
65 |                                   eaf_col = "Freq1",
66 |                                   effect_allele_col = "Allele1",
67 |                                   other_allele_col =  "Allele2",
68 |                                   ncase_col = NULL,
69 |                                   ncontrol_col = NULL,
70 |                                   samplesize_col = "n",
71 |                                   chr_col = "chr",
72 |                                   pos_col = "pos",
73 |                                   units = "log(eGFR)",
74 |                                   traduction = traduction,
75 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
76 |                                   df_index = df_index,
77 |                                   group_name = "public",
78 |                                   year = 2021,
79 |                                   author = "Stanzick Kira",
80 |                                   consortium = "CKDGEN UKBbiobank",
81 |                                   sex = "Males and Females",
82 |                                   population = "European",
83 |                                   initial_build = "HG19/GRCh37",
84 |                                   category = "Trait",
85 |                                   pmid = 34272381,
86 |                                   note = "",
87 |                                   should_create_id = TRUE,
88 |                                   ID = NA)
89 | 
90 | message("This script finished without errors")


--------------------------------------------------------------------------------
/1a_importlotta.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | #import instrument
  3 | library(data.table)
  4 | library(GagnonMR)
  5 | library(tidyverse)
  6 | library(tictoc)
  7 | library(furrr)
  8 | 
  9 | gwasvcf::set_bcftools()
 10 | gwasvcf::set_plink()
 11 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 12 | setwd("/mnt/sda/gagelo01/Projects/Dysbiose_project")
 13 | 
 14 | met_lab <-fread("/home/couchr02/Mendel_Commun/Christian/GWAS/proteome/Lotta/metabolites_label.txt")
 15 | colnames(met_lab) <- c("namefile", "abbreviation", "fullname", "class")
 16 | 
 17 | #convert label
 18 | traduction <- fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
 19 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
 20 | traduction[, maf := NULL]
 21 | 
 22 | 
 23 | newrow<- data.table( id  = paste0("met-2-", 1:met_lab[,.N]),
 24 |                      trait = met_lab$abbreviation,
 25 |                      group_name = "Public", year = 2021,
 26 |                      author = "Lotta A, Luca",
 27 |                      consortium = "Fenland, EPIC-Norfolk (Metabolon), INTERVAL (Metabolon), INTERVAL (Nightingale), Kettunen et al. 2016 (Nightingale), Draisma et al. 2015 (Biocates p150), Shin et al.2014 (Twins-UK, Metabolon), Shin et al. 2014 (KORA, Metbolon)",
 28 |                                     sex = "Males and Females", population = "European", unit = "SD",
 29 |                      nsnp = "~10000000", sample_size = ">10000" ,initial_build = "HG19/GRCh37", category = "Metabolites",
 30 |                      pmid = 33414548, sd = 1, 
 31 |                      note = met_lab$fullname,
 32 |                      ncase = NA, ncontrol = NA)
 33 | 
 34 | 
 35 | df_index <- rbind(df_index, newrow)
 36 | 
 37 | lotta_ID <- df_index[pmid == "33414548", ]$id
 38 | format_wrapper <- function(lotta_id, traduction, df_index, met_lab) {
 39 |   
 40 |   namefile <- met_lab[abbreviation == df_index[id == lotta_id & pmid == "33414548", trait], namefile  ]
 41 |   data <- fread(paste0("/home/couchr02/Mendel_Commun/Christian/GWAS/proteome/Lotta/Results/", namefile, ".txt.gz"))
 42 |   
 43 |     GagnonMR::formattovcf_createindex(all_out = data,
 44 |                                     snp_col = "rsid",
 45 |                                     outcome_name = df_index[id == lotta_id, trait],
 46 |                                     beta_col = "Beta",
 47 |                                     se_col = "SE",
 48 |                                     pval_col = "Pvalue_MA",
 49 |                                     eaf_col = "Freq1_MA",
 50 |                                     effect_allele_col = "Allele1",
 51 |                                     other_allele_col =  "Allele2",
 52 |                                     ncase_col = NULL,
 53 |                                     ncontrol_col = NULL,
 54 |                                     samplesize_col = "Weight_MA",
 55 |                                     chr_col = "chr",
 56 |                                     pos_col = "pos",
 57 |                                     units = "SD",
 58 |                                     traduction = traduction,
 59 |                                     out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
 60 |                                     df_index = df_index,
 61 |                                     group_name = "public",
 62 |                                     year = 2021,
 63 |                                     author = "Lotta A, Luca",
 64 |                                     consortium = "Fenland, EPIC-Norfolk (Metabolon), INTERVAL (Metabolon), INTERVAL (Nightingale), Kettunen et al. 2016 (Nightingale), Draisma et al. 2015 (Biocates p150), Shin et al.2014 (Twins-UK, Metabolon), Shin et al. 2014 (KORA, Metbolon)",
 65 |                                     sex = "Males and Females",
 66 |                                     population = "Mix",
 67 |                                     initial_build = "HG19/GRCh37",
 68 |                                     category = "Metabolites",
 69 |                                     pmid = 33414548,
 70 |                                     note = df_index[id == lotta_id, note],
 71 |                                     should_create_id = FALSE,
 72 |                                     ID = lotta_id)
 73 | }
 74 | 
 75 | 
 76 | options(future.globals.maxSize= 1e10)
 77 | plan(multisession, workers = 8)
 78 | 
 79 | df_index_copy <- df_index
 80 | traduction_copy <- traduction
 81 | met_lab_copy <- met_lab
 82 | tic()
 83 | 
 84 | k <- list.files("/mnt/sda/gagelo01/Vcffile/Server_vcf")
 85 | l <- k[grepl("met-2-", k)]
 86 | n <- as.numeric(gsub("met-2-", "", l))
 87 | index <- which(!(1:173 %in% n))
 88 | 
 89 | future_map(lotta_ID[index], function(x) {format_wrapper(lotta_id = x, traduction =  traduction_copy, df_index =  df_index_copy, met_lab = met_lab_copy)},
 90 |            .options = furrr_options(seed = TRUE))
 91 | 
 92 | fwrite(df_index, "/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 93 | 
 94 | message("this script finished without errors")
 95 | 
 96 | toc()
 97 | 
 98 |                      
 99 |                      
100 |                      


--------------------------------------------------------------------------------
/1bb_importeGFRSD.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | library(data.table)
  3 | library(GagnonMR)
  4 | library(tidyverse)
  5 | 
  6 | 
  7 | gwasvcf::set_bcftools()
  8 | gwasvcf::set_plink()
  9 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
 10 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 11 | df_index[trait == "Stanzick_eGFR", ]
 12 | 
 13 | 
 14 | 
 15 | inst <- get_inst(vcffile = "/mnt/sda/gagelo01/Vcffile/Server_vcf/trait-12-1/trait-12-1.vcf.gz")
 16 | sd <- coloc:::sdY.est(vbeta = (inst$se.exposure)^2,
 17 |                       maf = inst[, ifelse(eaf.exposure > 0.5,1-eaf.exposure,eaf.exposure)],
 18 |                       n = inst$samplesize.exposure)
 19 | 
 20 | sd
 21 | 
 22 | newrow <- df_index[trait == "Stanzick_eGFR", ]
 23 | newrow[, id := "trait-12-2"]
 24 | newrow[, note := "standardised with coloc"]
 25 | df_index <- rbind(df_index, newrow)
 26 | 
 27 | stanzick <- fread("Data/Raw/metal_eGFR_meta_ea1.TBL.map.annot.gc.gz")
 28 | 
 29 | stanzick[, Effect := Effect / sd]
 30 | stanzick[, StdErr := StdErr / sd]
 31 | 
 32 | traduction <- fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
 33 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
 34 | traduction[, maf := NULL]
 35 | 
 36 | GagnonMR::formattovcf_createindex(all_out = stanzick,
 37 |                                   snp_col = "RSID",
 38 |                                   outcome_name = "Stanzick_eGFR",
 39 |                                   beta_col = "Effect",
 40 |                                   se_col = "StdErr",
 41 |                                   pval_col = "P.value",
 42 |                                   eaf_col = "Freq1",
 43 |                                   effect_allele_col = "Allele1",
 44 |                                   other_allele_col =  "Allele2",
 45 |                                   ncase_col = NULL,
 46 |                                   ncontrol_col = NULL,
 47 |                                   samplesize_col = "n",
 48 |                                   chr_col = "chr",
 49 |                                   pos_col = "pos",
 50 |                                   units = "log(eGFR)",
 51 |                                   traduction = traduction,
 52 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
 53 |                                   df_index = df_index,
 54 |                                   group_name = "public",
 55 |                                   year = 2021,
 56 |                                   author = "Stanzick Kira",
 57 |                                   consortium = "CKDGEN",
 58 |                                   sex = "Males and Females",
 59 |                                   population = "European",
 60 |                                   initial_build = "HG19/GRCh37",
 61 |                                   category = "Trait",
 62 |                                   pmid = 31152163,
 63 |                                   note = "Standardised with coloc",
 64 |                                   should_create_id = FALSE,
 65 |                                   ID = "trait-12-2")
 66 | fwrite(df_index,"/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 67 | 
 68 | ######
 69 | 
 70 | inst <- get_inst(vcffile = "/mnt/sda/gagelo01/Vcffile/Server_vcf/trait-11-1/trait-11-1.vcf.gz")
 71 | sd <- coloc:::sdY.est(vbeta = (inst$se.exposure)^2,
 72 |                       maf = inst[, ifelse(eaf.exposure > 0.5,1-eaf.exposure,eaf.exposure)],
 73 |                       n = inst$samplesize.exposure)
 74 | 
 75 | sd
 76 | 
 77 | newrow <- df_index[trait == "wuttke_eGFR", ]
 78 | newrow[, id := "trait-11-2"]
 79 | newrow[, note := "standardised with coloc"]
 80 | df_index <- rbind(df_index, newrow)
 81 | 
 82 | wuttke <- fread("Data/Raw/20171017_MW_eGFR_overall_EA_nstud42.dbgap.txt.gz")
 83 | wuttke[, Effect := Effect / sd]
 84 | wuttke[, StdErr := StdErr / sd]
 85 | 
 86 | 
 87 | GagnonMR::formattovcf_createindex(all_out = wuttke,
 88 |                                   snp_col = "RSID",
 89 |                                   outcome_name = "wuttke_eGFR",
 90 |                                   beta_col = "Effect",
 91 |                                   se_col = "StdErr",
 92 |                                   pval_col = "P-value",
 93 |                                   eaf_col = "Freq1",
 94 |                                   effect_allele_col = "Allele1",
 95 |                                   other_allele_col =  "Allele2",
 96 |                                   ncase_col = NULL,
 97 |                                   ncontrol_col = NULL,
 98 |                                   samplesize_col = "n_total_sum",
 99 |                                   chr_col = "Chr",
100 |                                   pos_col = "Pos_b37",
101 |                                   units = "log(eGFR)",
102 |                                   traduction = traduction,
103 |                                   out_wd = "/mnt/sda/gagelo01/Vcffile/Server_vcf",
104 |                                   df_index = df_index,
105 |                                   group_name = "public",
106 |                                   year = 2021,
107 |                                   author = "Wuttke Matthias",
108 |                                   consortium = "CKDGEN UKBbiobank",
109 |                                   sex = "Males and Females",
110 |                                   population = "European",
111 |                                   initial_build = "HG19/GRCh37",
112 |                                   category = "Trait",
113 |                                   pmid = 34272381,
114 |                                   note = "Standardise with coloc",
115 |                                   should_create_id = FALSE,
116 |                                   ID = "trait-11-2")
117 | 
118 | fwrite(df_index,"/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
119 | 
120 | message("This script finished without errors")


--------------------------------------------------------------------------------
/7a_writetext.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | library(data.table)
  3 | library(tidyverse)
  4 | library(TwoSampleMR)
  5 | library(MendelianRandomization)
  6 | library(GagnonMR)
  7 | 
  8 | 
  9 | 
 10 | ########################## descriptive statistics   ###################################
 11 | inst_clump <- fread( "Data/Modified/inst_clump.txt")
 12 | outcome_all <- fread( "Data/Modified/outcome_all")
 13 | harm_all <- fread( "Data/Modified/harm_all.txt")
 14 | harm_all <- harm_all[exposure != "willer_LDL-cholesterol:ieu-a-300",]
 15 | primary_df_full <- fread( "/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Primary/primary_df")
 16 | primary_df_full <- merge(primary_df_full, distinct(harm_all[,.(exposure, study)]), by = "exposure", all.x = TRUE)
 17 | primary_df_full[, lci := b - se*1.96]
 18 | primary_df_full[, uci := b + se*1.96]
 19 | primary_df <- primary_df_full[exposure != "willer_LDL-cholesterol:ieu-a-300" ]
 20 | primary_df[, exposure_outcome := paste0(exposure, "_", outcome)]
 21 | 
 22 | 
 23 | harm_all[, .N, by = "exposure_outcome"][order(N)]
 24 | list_sensitivity <- readRDS( "Data/Modified/Sensitivity/list_sensitivity")
 25 | veclog <- readRDS( "Data/Modified/Sensitivity/veclog")
 26 | MVMR_dat <- fread("/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Sensitivity/MVMR")
 27 | 
 28 | #####
 29 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 30 | ao <- fread("/mnt/sda/gagelo01/Vcffile/available_outcomes_2021-10-13.txt")
 31 | ID_mrbase_out <-  c("ieu-b-109", "ieu-b-110", "ieu-b-111", "ukb-b-19953","ukb-b-12141")
 32 | ID_server_out <- c("trait-2-2", "trait-6-1", "trait-7-2", "trait-2-4", "trait-12-2", 
 33 |                    "dis-2-1", "dis-3-1", "dis-4-1", "dis-5-1", "dis-6-1", "dis-7-1", "dis-8-1",
 34 |                    "trait-13-1", "trait-13-2", "trait-12-2")
 35 | 
 36 | 
 37 | k <- df_index[id %in% ID_server_out,]
 38 | m <- ao[id %in% ID_mrbase_out, ]
 39 | 
 40 | dataset <-rbind(k,m,fill = TRUE )
 41 | dataset <- dataset[,.(trait,group_name, year, author,  consortium, sex, population, unit, nsnp, sample_size, ncase, ncontrol, pmid, note)]
 42 | ######
 43 | return_format_data<-function(data) {
 44 |   return(data[, paste0(round(exp(b), digits = 2), " 95% CI=", round(exp(lci), digits = 2), "-",  round(exp(uci), digits = 2), ", p=",pval %>% formatC(., format = "e", digits = 1))])
 45 | }
 46 | return_format_data_noexp <-function(data) {
 47 |   return(data[, paste0(round(b, digits = 2), " 95% CI=", round(lci, digits = 2), "-",  round(uci, digits = 2), ", p=",pval %>% formatC(., format = "e", digits = 1))])
 48 | }
 49 | #included exposure
 50 | harm_all$exposure %>% unique %>% length
 51 | harm_all$outcome %>% unique %>% length
 52 | harm_all$exposure_outcome %>% unique %>% length
 53 | 
 54 | 
 55 | #Abstract
 56 | harm_all[study %in% c("sanna", "kettunen", "framingham", "lotta"), length(unique(exposure))]
 57 | harm_all[study %in% c("kurilshikov", "ruhlemann"),length(unique(exposure))]
 58 | sum(veclog)
 59 | harm_all[study != "willer", ]$exposure_outcome %>% unique %>% length
 60 | dt_sen <-rbindlist(list_sensitivity[veclog])
 61 | dt_sen[method == "Inverse variance weighted", ][abs(b)<0.1, .N]
 62 | 
 63 | ##INtro
 64 | 
 65 | #Results
 66 | #para 1
 67 | harm_all[, length(unique(exposure))]
 68 | dataset[population != "European", ] 
 69 | harm_all[study %in% c("sanna", "kettunen", "framingham", "lotta"), length(unique(exposure))]
 70 | harm_all[study %in% c("kurilshikov", "ruhlemann"),length(unique(exposure))]
 71 | harm_all[,length(unique(exposure))]
 72 | 
 73 | #para 2
 74 | harm_all[study %in% c("sanna", "kettunen", "framingham", "lotta"), length(unique(exposure_outcome))]
 75 | primary_df[study %in% c("sanna", "kettunen", "framingham", "lotta"), mean(abs(b))]
 76 | primary_df[study %in% c("sanna", "kettunen", "framingham", "lotta") & pval < 0.05, .N]
 77 | primary_df[study %in% c("sanna", "kettunen", "framingham", "lotta") & fdr < 0.05, .N]
 78 | primary_df_full[exposure == "willer_LDL-cholesterol:ieu-a-300" & 
 79 |                   outcome %in% c("van_der_Harst_CAD", "Deelen_longevity")] %>% return_format_data(.)
 80 | 
 81 | #para 3
 82 | harm_all[study %in% c("kurilshikov", "ruhlemann"), length(unique(exposure))]
 83 | primary_df[study %in% c("kurilshikov", "ruhlemann"), length(unique(exposure_outcome))]
 84 | primary_df[study %in% c("kurilshikov", "ruhlemann"), mean(abs(b))]
 85 | primary_df[study %in% c("kurilshikov", "ruhlemann") & pval < 0.05, .N]
 86 | primary_df[study %in% c("kurilshikov", "ruhlemann") & fdr < 0.05, .N]
 87 | 
 88 | #Para 4
 89 | primary_df[pval < 0.05,.N]
 90 | length(list_sensitivity)
 91 | sum(veclog)
 92 | harm_all[exposure_outcome  %in% names(list_sensitivity)[veclog], all(steiger_dir)]
 93 | harm_all[exposure_outcome  %in% names(list_sensitivity)[veclog], any(is_in_pleiotropic_region)]
 94 | 
 95 | dtsen <- rbindlist(list_sensitivity[veclog])
 96 | dtsen[method == "Inverse variance weighted", ][abs(b)<0.1, .N]
 97 | 
 98 | dtsen[method == "Inverse variance weighted" & abs(b)>0.2,] %>%
 99 |   return_format_data(.)
100 | 
101 | 
102 | #para7
103 | {MVMR_dat[,b := as.numeric(b)]
104 | MVMR_dat[, exposure_outcome := paste0(exposure, "_", outcome)]
105 | MVMR_dat_split <- split(MVMR_dat, MVMR_dat$exposure_outcome)
106 | list_dat <- vector(mode = "list", length(MVMR_dat_split))
107 | for(i in 1:length(MVMR_dat_split)) {
108 | dat <- MVMR_dat_split[[i]]
109 | reference <- dat[MVMR == "no counfounder", b]
110 | dat[, comparison := apply(.SD, 1, function(x) as.numeric(x[names(x)=="b"])/reference), ]
111 | list_dat[[i]] <- dat
112 | }
113 | 
114 | datfull <-rbindlist(list_dat)}
115 | 
116 | datfull[comparison < 0.6 & !(outcome == "ukb-b-19953" & MVMR == "with BMI"),][,.(exposure,outcome,b,comparison, MVMR)]
117 | datfull[comparison < 0.6 & !(outcome == "ukb-b-19953" & MVMR == "with BMI"),][exposure == "lotta_Serotonin" & outcome == "van_der_Harst_CAD"] %>% 
118 |   return_format_data(.)
119 | datfull[comparison < 0.6 & !(outcome == "ukb-b-19953" & MVMR == "with BMI"),][exposure == "kurilshikov_order.Lactobacillales.id.1800"] %>% 
120 |   return_format_data(.)
121 | 
122 | 
123 | ######Discussion
124 | primary_df[fdr<0.05,]
125 | dtsen[method == "Inverse variance weighted" & abs(b)>0.2,]
126 | dtsen[method == "Inverse variance weighted" & abs(b)>0.1,]
127 | 
128 | #comparison with other studies
129 | primary_df[exposure %in% "framingham_trimethylamine_N_oxide" & outcome %in% "van_der_Harst_CAD",] %>% 
130 |   return_format_data(.)
131 | 
132 | #Method
133 | primary_df[,length(unique(exposure_outcome))]
134 | suppose <- length(unique(harm_all$outcome)) * harm_all[study != "willer", length(unique(exposure))]
135 | 
136 | suppose - primary_df[,length(unique(exposure_outcome))]  
137 | 


--------------------------------------------------------------------------------
/6_create_supplementary_tables.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | library(data.table)
  3 | library(readxl)
  4 | library(TwoSampleMR)
  5 | library(tidyverse)
  6 | library(writexl)
  7 | library("xlsx")
  8 | library(GagnonMR)
  9 | 
 10 | setwd("/home/gagelo01/workspace/Projects/Dysbiose_project")
 11 | harm_all <- fread( "Data/Modified/harm_all.txt")
 12 | dataset_info<- read_excel("Data/Modified/informationondataused.xlsx")
 13 | expinf <- readxl::read_excel(path = "Data/Modified/exposures_cohort_info.xlsx") 
 14 | setDT(expinf)
 15 | ########Supplementary table titles and description
 16 | dt_title <- data.table(title = paste0("Supplementary Table ", 1:7),
 17 |                        caption = c("Description of the datasets used.",
 18 |                                    "Description of the metabolites selection rational.",
 19 |                                    "Summary of the instrument selection criteria for each data source. LD R2, pvalue treshold and Fstatistics.",
 20 |                                    "Harmonised dataset for each exposure outcome.",
 21 |                                    "Primary MR results and associated statistics.",
 22 |                                     "Robust MR results and other sensitivity analyses results.",
 23 |                                    "Multivariable MR results and associated statistics."))
 24 | ########Supplementary table with information
 25 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 26 | ao<-fread("/mnt/sda/gagelo01/Vcffile/available_outcomes_2021-10-13.txt")
 27 | ao_small<-ao[id %in% list.files("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/")]
 28 | ID_server_out <- c("trait-2-2", "trait-6-1", "trait-7-2", "trait-2-4", "trait-12-2", 
 29 |                    "dis-2-1", "dis-3-1", "dis-4-1", "dis-5-1", "dis-6-1", "dis-7-1", "dis-8-1",
 30 |                    "trait-13-1", "trait-13-2")
 31 | ID_mrbase_out <-  c("ieu-b-109", "ieu-b-110", "ieu-b-111", "ukb-b-19953","ukb-b-12141")
 32 | 
 33 | outcome_table <- rbind(ao[id %in% ID_mrbase_out, ], df_index[id %in% ID_server_out], fill = TRUE)
 34 | outcome_table[, author == "Richardson, Tom", unit := "SD"]
 35 | dis <- c("ukb-b-12141", "dis-2-1", "dis-3-1", "dis-4-1", "dis-5-1", "dis-6-1", "dis-7-1", "dis-8-1")
 36 | longlife <- c("trait-6-1", "trait-7-2") 
 37 | outcome_table[, category := id %>% ifelse(. %in% dis, "Disease", .) %>% 
 38 |                 ifelse(. %in% longlife, "Mortality", .) %>%
 39 |                 ifelse(!(. %in% c("Disease", "Mortality")), "Metabolic risk factor", .)]
 40 | outcome_table[,variable := "outcome"]
 41 | outcome_table <- outcome_table[,.(variable,category, trait, group_name, year, author, consortium, 
 42 |                                   sex, population, unit, nsnp,
 43 |                                   sample_size, pmid, note, ncase, ncontrol)]
 44 | outcome_table <- outcome_table[order(category, trait)] 
 45 | 
 46 | setDT(expinf)
 47 | expinf[trait == "Microbial relative abundance", trait := paste0(trait, " (", tolower(author), ")")]
 48 | expinf[, c("sex", "unit") := .("Males and Females", "SD")]
 49 | expinf[, category := ifelse(author %in% c("Kurilshikov", "Ruhlemann"), "Taxa abundance", "metabolites")]
 50 | lotta_tobind <- df_index[pmid == "33414548",][1,]
 51 | lotta_tobind[, trait := c("serotonin, leucine, isoleucine, valine, kynurenine")]
 52 | lotta_tobind[,category := "metabolites"]
 53 | lotta_tobind[, note := NULL]
 54 | lotta_tobind <- lotta_tobind[,.(trait, group_name, year, author, consortium, 
 55 |                 sex, population, unit, nsnp, category,
 56 |                 sample_size, pmid, ncase, ncontrol)]
 57 | exp <- rbind(lotta_tobind, expinf, fill = TRUE)
 58 | exp[,variable := "exposure"]
 59 | 
 60 | suptab1 <- rbind(outcome_table, exp, fill = TRUE)
 61 | suptab1[, c("group_name", "id", "initial_build", "sd", "nsnp") := NULL]
 62 | dattrait <- data.frame(trait = suptab1[,trait],
 63 |            url = c("https://datashare.is.ed.ac.uk/handle/10283/3203",
 64 |                    "https://ctg.cncr.nl/software/summary_statistics",
 65 |                    "http://diagram-consortium.org/downloads.html",
 66 |                    "https://www.megastroke.org/download.html",
 67 |                    "https://www.ebi.ac.uk/gwas/publications/34841290",
 68 |                    "https://gwas.mrcieu.ac.uk/files/ukb-b-12141/ukb-b-12141.vcf.gz",
 69 |                    "http://ckdgen.imbi.uni-freiburg.de/",
 70 |                    "https://data.mendeley.com/datasets/gbbsrpx6bs/1",
 71 |                    "https://gwas.mrcieu.ac.uk/files/ukb-b-19953/ukb-b-19953.vcf.gz",
 72 |                    "https://magicinvestigators.org/downloads/",
 73 |                    "https://magicinvestigators.org/downloads/",
 74 |                    "https://www.ebi.ac.uk/gwas/publications/32203549",
 75 |                    "https://www.ebi.ac.uk/gwas/publications/32203549",
 76 |                    "https://ckdgen.imbi.uni-freiburg.de/",
 77 |                    "https://www.ebi.ac.uk/gwas/publications/30224653",
 78 |                    "https://www.ebi.ac.uk/gwas/publications/30224653",
 79 |                    "https://www.ebi.ac.uk/gwas/publications/32203549",
 80 |                    "https://www.longevitygenomics.org/downloads",
 81 |                    "https://datashare.ed.ac.uk/handle/10283/3209",
 82 |                    "https://omicscience.org/apps/crossplatform/",
 83 |                    "https://static-content.springer.com/esm/art%3A10.1038%2Fs41588-019-0350-x/MediaObjects/41588_2019_350_MOESM1_ESM.pdf",
 84 |                    "https://www.cell.com/action/showFullTableHTML?isHtml=true&tableId=tbl2&pii=S1550-4131%2813%2900257-X",
 85 |                    "http://www.computationalmedicine.fi/data#NMR_GWAS",
 86 |                    "https://www.nature.com/articles/s41588-020-00747-1",
 87 |                    "https://mibiogen.gcc.rug.nl/"))
 88 |                    
 89 | suptab1 <- cbind(suptab1, url = dattrait$url)
 90 | suptab1[, note := gsub("standardised with coloc", "standardised with coloc:::sdY.est function", note)]    
 91 | 
 92 | suptab1 <- merge(suptab1, dataset_info, by= "trait")
 93 | #Supplementary Table 2
 94 | suptab2<- readxl::read_excel(path = "Data/Modified/tables_metabolites.xlsx") %>% 
 95 |   as.data.table
 96 | 
 97 | #Supplementary Table 3
 98 | suptab3<- readxl::read_excel(path = "Data/Modified/instrumentselection.xlsx") %>% 
 99 |   as.data.table
100 | setnames(suptab3, "min F-statistics", "F.statistics")
101 | suptab3[, F.statistics := 10]
102 | #Supplementary Table 4
103 | harm_all <- fread( "Data/Modified/harm_all_clean.txt")
104 | harm_all[,c("id.exposure", "id.outcome", "outcome", "exposure") := NULL]
105 | setnames(harm_all, c("exposure_clean", "outcome_clean", "Category"), c("exposure", "outcome", "exposure_category"))
106 | suptab4 <- harm_all
107 | #Supplementary Table 5
108 | primary_df <- fread( "Data/Modified/primary_df_clean.txt")
109 | harm_all <- fread( "Data/Modified/harm_all_clean.txt")
110 | harm_all <- distinct(harm_all[, .(exposure, outcome, fstat.exposure)])
111 | harm_all[ , fstat.exposure := mean(fstat.exposure), by = c("exposure", "outcome")]
112 | harm_all
113 | primary_df <- merge(
114 |   primary_df,
115 |   distinct(harm_all),
116 |   by = c("exposure", "outcome"))
117 | primary_df[,c("id.exposure", "id.outcome", "outcome", "exposure") := NULL]
118 | setnames(primary_df, c("exposure_clean", "outcome_clean", "Category"), c("exposure", "outcome", "exposure_category"))
119 | primary_df <- primary_df[, .(exposure_category, outcome_category, exposure, outcome, method, nsnp,
120 | b,se,pval,fdr, power, fstat.exposure) ]          
121 | primary_df <- primary_df[order(exposure_category, outcome_category, exposure, outcome),]
122 | suptab5 <- primary_df
123 | suptab5[,power:=NULL]
124 | #Supplementary table 6
125 | list_sensitivity <- readRDS( "Data/Modified/Sensitivity/list_sensitivity")
126 | veclog <- readRDS( "Data/Modified/Sensitivity/veclog")
127 | suptab6 <- rbindlist(list_sensitivity)
128 | 
129 | #Supplementary Table 7
130 | MVMR_dat <- fread("Data/Modified/MVMR_clean.txt")
131 | MVMR_dat[,c( "outcome", "exposure") := NULL]
132 | setnames(MVMR_dat, c("exposure_clean", "outcome_clean", "Category", "F_stastistics"),
133 |          c("exposure", "outcome", "exposure_category", "conditional_F_stastistics"))
134 | MVMR_dat<- MVMR_dat[, .(exposure_category, outcome_category, exposure, outcome, method, nsnp,
135 |            b,se,lci,uci,pval,MVMR, nsnp, cochranQ,cochranQpval,conditional_F_stastistics)]
136 | MVMR_dat <- MVMR_dat[order(exposure_category, outcome_category, exposure, outcome),]
137 | suptab7<-MVMR_dat
138 | 
139 | 
140 | 
141 | 
142 | 
143 | writexl::write_xlsx(x = list("Tables captions and titles" = dt_title,
144 |                              "Supplementary Table 1" = suptab1,
145 |                              "Supplementary Table 2" = suptab2, 
146 |                              "Supplementary Table 3" = suptab3, 
147 |                              "Supplementary Table 4" = suptab4, 
148 |                              "Supplementary Table 5" = suptab5, 
149 |                              "Supplementary Table 6" = suptab6,
150 |                              "Supplementary Table 7" = suptab7),
151 |                     path = "Results/supplementary_tables.xlsx")
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/2_import_instrument.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | library(data.table)
  3 | library(tidyverse)
  4 | library(TwoSampleMR)
  5 | library(MendelianRandomization)
  6 | library(GagnonMR)
  7 | library(readxl)
  8 | 
  9 | setwd("/mnt/sda/gagelo01/Projects/Dysbiose_project")
 10 | gwasvcf::set_bcftools()
 11 | gwasvcf::set_plink()
 12 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
 13 | df_index <- fread("/mnt/sda/gagelo01/Vcffile/server_gwas_id.txt")
 14 | #
 15 | inst_sanna <- fread("Data/Raw/inst_sanna")
 16 | inst_sanna[, units.exposure := "SD"]
 17 | 
 18 | rsid_traduction <-  fread("/mnt/sda/couchr02/rsids/rsid_pos_86millions.txt")
 19 | rsid_traduction[,chr_pos := paste0(chr, "_", pos)]
 20 | all_out <- rsid_traduction[rsid %in% inst_sanna$SNP]
 21 | mirge <- merge(inst_sanna, all_out[,c(1,3,2) ], by.x = "SNP", by.y = "rsid", all.x = TRUE)
 22 | setnames(mirge, c("pos", "chr"), c("pos.exposure", "chr.exposure"))
 23 | inst_sanna <- mirge
 24 | 
 25 | ###"framingham"
 26 | traduction = fread("/mnt/sda/couchr02/1000G_Phase3/1000G_Phase3_b37_rsid_maf.txt")
 27 | traduction[, EUR := EUR %>% ifelse(.==0,0.001,. ) %>% ifelse(.==1, 0.999, .)]
 28 | traduction[, maf := NULL]
 29 | framingham_pheno <- c("indole_3_propionate", "trimethylamine_N_oxide") #carnitine is also
 30 | format_framingham <- function(framingham_pheno) {
 31 |   instrument <- read_excel("Data/Raw/mmc2.xlsx", sheet = framingham_pheno)
 32 |   setDT(instrument)
 33 |   instrument <- instrument[Chr != "X", ]
 34 |   instrument[, Chr := as.integer(Chr)]
 35 |     all_out <- merge(instrument, traduction, by.x = c("rsID"), by.y = c("rsid"), all = FALSE) 
 36 |     setnames(all_out, c("min_all", "maj_all"), c("effect_allele","other_allele"))
 37 |     all_out[,c("Chr", "PhysPos") := NULL] #I think it is GRCH36 so better to remove it
 38 |     all_out <- all_out[(effect_allele == a0 | effect_allele == a1) & (other_allele == a0 | other_allele == a1) & a0 != a1  & effect_allele != other_allele, ] #because low number removed, coded on the forward strand
 39 |     all_out[effect_allele == a0, beta := beta*-1]
 40 |     all_out[effect_allele == a0, effect_allele := a1]
 41 |     all_out[other_allele == a1, other_allele := a0] 
 42 |     all_out[, eaf := ifelse(EUR < 0.5, MAF, 1-MAF)]
 43 |     
 44 |   instrument <- all_out
 45 |   
 46 |   instrument[,Phenotype := framingham_pheno]
 47 |   instrument[,units := "SD"]
 48 |   instrument[,samplesize := 2076]
 49 |   inst <- TwoSampleMR::format_data(
 50 |     instrument,
 51 |     type = "exposure",
 52 |     phenotype_col = "Phenotype",
 53 |     snp_col = "rsID",
 54 |     beta_col = "beta",
 55 |     se_col = "se",
 56 |     effect_allele_col = "effect_allele", #I verified and this is the effect allele
 57 |     other_allele_col = "other_allele", #I verified and this is the reference allele
 58 |     eaf_col = "eaf",
 59 |     pval_col = "pval",
 60 |     units_col = "units",
 61 |     samplesize_col = "samplesize",
 62 |     chr_col = "chr",
 63 |     pos_col = "position",
 64 |     )
 65 |   setDT(inst)
 66 |   return(inst)
 67 | } #do not estimate eaf, cause old GWAS
 68 | 
 69 | list_framingham <- pmap(data.frame(framingham_pheno = framingham_pheno), format_framingham)
 70 | inst_framingham <- rbindlist(list_framingham, fill = TRUE)
 71 | inst_framingham <- inst_framingham[pval.exposure < 1*10^-5, ]
 72 | 
 73 | ##kettunen
 74 | kettunen_pheno <- c("Ace")
 75 | format_kettunen <- function(kettunen_pheno) {
 76 |   instrument <- fread(paste0("Data/Raw/Summary_statistics_MAGNETIC_kettunen_", kettunen_pheno, ".txt.gz"))
 77 |   setnames(instrument, "p-value", "p_value")
 78 |   instrument <- instrument[p_value < 1*10^-5, ]
 79 |   instrument[,Phenotype := kettunen_pheno]
 80 |   instrument[,units := "SD"]
 81 |   
 82 |   inst_kett<- format_data(
 83 |     instrument,
 84 |     type = "exposure",
 85 |     phenotype_col = "Phenotype",
 86 |     snp_col = "ID",
 87 |     beta_col = "beta",
 88 |     se_col = "se",
 89 |     eaf_col = "eaf",
 90 |     effect_allele_col = "EA",
 91 |     other_allele_col = "NEA",
 92 |     pval_col = "p_value",
 93 |     units_col = "units",
 94 |     ncase_col = "ncase",
 95 |     ncontrol_col = "ncontrol",
 96 |     samplesize_col = "n_samples",
 97 |     chr_col = "chromosome",
 98 |     pos_col = "position")
 99 |   
100 |   setDT(inst_kett)
101 |   return(inst_kett)
102 | }
103 | 
104 | list_kettunen <- pmap(data.frame(kettunen_pheno = kettunen_pheno), format_kettunen)
105 | inst_kettunen <- rbindlist(list_kettunen, fill = TRUE)
106 | 
107 | 
108 | ###Lotta
109 | vec_id <- df_index[pmid == "33414548" ,][ trait %in% c("Ile", "Leu", "Val") | note %in% c("Serotonin", "Kynurenine"), ]$id
110 | 
111 | vec_vcffile <- paste0("/mnt/sda/gagelo01/Vcffile/Server_vcf/", vec_id, "/", vec_id, ".vcf.gz")
112 | 
113 | inst_lotta <- map(as.list(vec_vcffile), function(x) get_inst(vcffile = x,pval = 1e-6, clump = FALSE, r2 = 0.01, kb = 10000)) %>% 
114 |   rbindlist(., fill = TRUE)
115 | inst_lotta[,units.exposure := "SD"]
116 | #######ruhlemann
117 | ruhlemann <-read_excel("Data/Raw/41588_2020_747_MOESM3_ESM.xlsx", sheet = 4, range = "A3:CN10003")
118 | setDT(ruhlemann)
119 | ruhlemann <- ruhlemann[META.P < 1*10^-6, ]
120 | 
121 | ####change exposure of ruhlemann
122 | key_exposure <-read_excel("Data/Raw/41588_2020_747_MOESM3_ESM.xlsx", sheet = 1, range = "A4:C283")
123 | setDT(key_exposure)
124 | setnames(key_exposure, "Feature name", "feature")
125 | 
126 | key_exposure[, Taxonomy := mgsub::mgsub(Taxonomy,pattern = c("K_", "P_", "C_", "O_", "F_", "G_"),
127 |                                         c("kingdom-", "phylum-", "class-",  "order-",   "family-",  "genus-"))]
128 | key_exposure[, feature := sub("_", "-", feature)]
129 | ruhlemann[, feature := sub("NB_", "", feature)]
130 | ruhlemann[, feature := sub("_", "-", feature)]
131 | test <- merge(ruhlemann ,key_exposure[,c("feature", "Taxonomy")], by = "feature", all.x = TRUE)
132 | test[, Taxonomy %>% unique %>% length, by = "feature"]$V1 %>% table
133 | test[, feature %>% unique %>% length, by = "Taxonomy"]$V1 %>% table
134 | test$Taxonomy %>% unique(.) %>% length(.) #Here I make a decision. I want my exposure to be "feature", but i will include Taxonomy
135 | test[ , Taxonomy := tolower(Taxonomy)]
136 | test[ , feature_taxonomy := paste0(feature, "(", Taxonomy, ")")]
137 | ruhlemann <- test
138 | ##remove expousre ruhlemann already evaluated in kuri
139 | ku_bac <- read_excel("Data/Raw/41588_2020_763_MOESM3_ESM.xlsx", sheet = 3, skip =2)
140 | setDT(ku_bac)
141 | ku <- ku_bac[Quant==TRUE, Taxon.name]
142 | ku <- unique(ku)
143 | ku<-ku%>% gsub("\\.id..*", "", .)
144 | ku<-tolower(ku)
145 | ku <- ku %>% gsub("\\.", "-", .) %>% gsub("--", "-", .)
146 | ruhlemann <- ruhlemann[!(Taxonomy %in% ku), ]
147 | #add rsid and eaf
148 | mirge <- merge(ruhlemann, traduction, by.x = c("chrom", "pos"), by.y = c("chr", "position"), all = FALSE) #genome assembly hg19 (GRCh37).
149 | all_out <- mirge
150 | all_out[,units := "SD"]
151 | 
152 | setnames(all_out, c("A1", "A2"), c("other_allele", "effect_allele"))
153 | all_out <- all_out[(effect_allele == a0 | effect_allele == a1) & (other_allele == a0 | other_allele == a1) & a0 != a1  & effect_allele != other_allele, ] #because low number removed, coded on the forward strand
154 | all_out[effect_allele == a0, META.BETA := META.BETA*-1]
155 | all_out[effect_allele == a0, effect_allele := a1]
156 | all_out[other_allele == a1, other_allele := a0] 
157 | all_out[, eaf := ifelse(EUR < 0.5, META.MAF, 1-META.MAF)]
158 | 
159 | 
160 | ##format
161 | ruhlemann_inst  <- TwoSampleMR::format_data(
162 |   all_out,
163 |   type = "exposure",
164 |   phenotype_col = "feature_taxonomy",
165 |   snp_col = "rsid",
166 |   beta_col = "META.BETA",
167 |   se_col = "META.se",
168 |   eaf_col = "eaf",
169 |   effect_allele_col = "effect_allele", #See Supplementary Tables 2
170 |   other_allele_col = "other_allele", #See Supplementary Tables 2
171 |   pval_col = "META.P",
172 |   units_col = "units",
173 |   samplesize_col = "META.N",
174 |   id_col = "id",
175 |   chr_col = "chrom",
176 |   pos_col = "pos") #
177 | 
178 | setDT(ruhlemann_inst)
179 | 
180 | inst_ruhlemann <- ruhlemann_inst
181 | 
182 | #kurilshikov
183 | kurilshikov <- fread("Data/Raw/MBG.allHits.p1e4.txt")
184 | kurilshikov <- kurilshikov[P.weightedSumZ < 1e6,]
185 | 
186 | inst_kurilshikov <- TwoSampleMR::format_data(
187 |   kurilshikov,
188 |   type = "exposure",
189 |   phenotype_col = "bac",
190 |   snp_col = "rsID",
191 |   beta_col = "beta",
192 |   se_col = "SE",
193 |   eaf_col = ,
194 |   effect_allele_col = "eff.allele",
195 |   other_allele_col = "ref.allele",
196 |   pval_col = "P.weightedSumZ",
197 |   samplesize_col = "N",
198 |   chr_col = "chr",
199 |   pos_col = "bp")
200 | 
201 | setDT(inst_kurilshikov)
202 | inst_kurilshikov[,units.exposure := "SD"]
203 | all_out <- merge(inst_kurilshikov, traduction, by.x = c("chr.exposure", "pos.exposure"), by.y = c("chr", "position"), all = FALSE)
204 | setDT(all_out)                                   
205 | all_out <- all_out[(effect_allele.exposure == a0 | effect_allele.exposure == a1) & (other_allele.exposure == a0 | other_allele.exposure == a1)
206 |                    &  a0 != a1 & effect_allele.exposure != other_allele.exposure, ]
207 | all_out <- all_out[chr.exposure %in% 1:22, ]
208 | all_out[, `:=`(chr.exposure, as.integer(chr.exposure))]
209 | all_out[effect_allele.exposure == a0, `:=`(beta.exposure, beta.exposure * -1)]
210 | all_out[effect_allele.exposure == a0, `:=`(effect_allele.exposure, a1)]
211 | all_out[other_allele.exposure == a1, `:=`(other_allele.exposure, a0)]
212 | all_out[, `:=`(eaf.exposure, EUR)]
213 | 
214 | all_out[, c("rsid", "a0", "a1","EUR") := NULL]
215 | all_out <- all_out[pval.exposure < 1e-6,]
216 | inst_kurilshikov <- all_out
217 | 
218 | #####change exposure and id column
219 | list_inst <- list(inst_sanna, inst_framingham, inst_kettunen, inst_lotta, inst_ruhlemann, inst_kurilshikov)
220 | study_name <- c( "sanna", "framingham", "kettunen", "lotta", "ruhlemann", "kurilshikov")
221 | names(list_inst) <- study_name
222 | for(i in 1:length(list_inst)) {
223 |   inst <- list_inst[[i]]
224 |   inst$exposure <- paste0(study_name[i], "_", inst$exposure)
225 |   inst$id.exposure <- inst$exposure
226 |   list_inst[[i]] <- inst
227 | }
228 | 
229 | dt_inst_noselect <- rbindlist(list_inst, fill = TRUE)
230 | fwrite(dt_inst_noselect, "Data/Modified/dt_inst_noselect.txt")
231 | 
232 | ######add ldl cholesterol
233 | ##add ldl of GLGC consortium
234 | ldl <- GagnonMR::get_inst("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/ieu-a-300/ieu-a-300.vcf.gz",pval = 5e-8, r2 = 0.01) 
235 | ldl[, exposure := "willer_LDL-cholesterol:ieu-a-300" ]
236 | ldl[,units.exposure :=  "SD"]
237 | ldl[, data_source.exposure := NULL]
238 | ldl[,id.exposure := exposure]
239 | 
240 | ###rbind by names
241 | dt_inst <- rbindlist(c(list_inst, list(ldl)), use.names = TRUE, fill = TRUE)
242 | dt_inst[is.na(units.exposure),]
243 | dt_inst[is.na(samplesize.exposure),]
244 | dt_inst[,c("chrompos", "CHROM", "POS", "N_ALLELES", "N_CHR", "allele1", "freq1", "allele2", "freq2") := NULL]
245 | 
246 | ####rsq
247 | dt_inst <- add_rsq(dt_inst)
248 | setDT(dt_inst)
249 | dt_inst[is.na(rsq.exposure),] #parfait
250 | 
251 | 
252 | #add column study and microbiote
253 | dt_inst <- separate(dt_inst, col = "exposure", into = c("study", "microbiote"), sep = "_", remove = FALSE)
254 | 
255 | fwrite(dt_inst, "Data/Modified/all_inst.txt")
256 | 
257 | all_inst <- dt_inst
258 | 
259 | all_inst_split <- split(all_inst, all_inst$exposure)
260 | 
261 | inst_clump <- map(all_inst_split, function(x) { 
262 | rsiid <- x %>% dplyr::select(rsid=SNP, pval=pval.exposure, id = id.exposure) %>%
263 |   ieugwasr::ld_clump(., clump_r2 = 0.01, plink_bin=genetics.binaRies::get_plink_binary(), bfile=ldref) %>%
264 |   {.$rsid}
265 | return(x[SNP %in% rsiid]) }) %>% rbindlist(., fill = TRUE)
266 | 
267 | #fstat
268 | inst_clump_split <- split(inst_clump, inst_clump$exposure)
269 | fstat_fromdat <- function(dat) {
270 |   k <- nrow(dat)
271 |   n <- mean(dat$samplesize.exposure)
272 |   r2sum <- sum(dat$rsq.exposure)
273 |   Fstat <- ((n - k - 1)/k) * (r2sum/(1 - r2sum))
274 |   return(Fstat)
275 | }
276 | vec_fstat <-lapply(inst_clump_split, fstat_fromdat)
277 | inst_clump$fstat.exposure <- vec_fstat[match(inst_clump$exposure, names(vec_fstat))]
278 | inst_clump[is.na(fstat.exposure)|fstat.exposure<0,] #parfait
279 | 
280 | #
281 | exposure_to_remove <- inst_clump[, mean(fstat.exposure) > 10, by = "exposure"][V1 == FALSE,]$exposure #select mean fstat > 10
282 | inst_clump <- inst_clump[!(exposure %in% exposure_to_remove), ]
283 | exposure_to_remove <- inst_clump[, .N >= 3, by = "exposure"][V1 == FALSE, ]$exposure
284 | inst_clump <- inst_clump[!(exposure %in% exposure_to_remove), ]
285 | 
286 | fwrite(inst_clump, "Data/Modified/inst_clump.txt")
287 | 
288 | print("script finished without errors")
289 | 


--------------------------------------------------------------------------------
/4_harmonise_and_mrresult.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | library(data.table)
  3 | library(tidyverse)
  4 | library(TwoSampleMR)
  5 | library(MendelianRandomization)
  6 | library(GagnonMR)
  7 | 
  8 | setwd("/mnt/sda/gagelo01/Projects/Dysbiose_project")
  9 | gwasvcf::set_bcftools()
 10 | gwasvcf::set_plink()
 11 | ldref = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs"
 12 | inst_clump <- fread( "Data/Modified/inst_clump.txt")
 13 | outcome_all <- fread( "Data/Modified/outcome_all")
 14 | 
 15 | arguments <- tidyr::crossing(exposure_name = unique(inst_clump$exposure), outcome_name = unique(outcome_all$outcome))
 16 | list_arguments <- split(arguments, 1:nrow(arguments))
 17 | harm_all <- map(list_arguments, function(x) {
 18 |   harm <- harmonise_data(inst_clump[exposure == x$exposure_name,], outcome_dat = outcome_all[outcome == x$outcome_name,], 
 19 |                          action = 1) #there all coded on the forward strand is rhee coded on the forward strand I assume that yes, because build 36 is coded on the forward strand
 20 |   return(harm)
 21 | }) %>% rbindlist(., fill = TRUE)
 22 | 
 23 | # harm_all1 <- harmonise_data(inst_clump[!(study %in%  c("framingham", "IBD")), ], outcome_all, action = 1)
 24 | # harm_all2 <- harmonise_data(inst_clump[study %in%  c("framingham", "IBD")], outcome_all, action = 2)
 25 | # harm_all <- rbindlist(list(harm_all1, harm_all2), fill = TRUE)
 26 | setDT(harm_all)
 27 | harm_all[, exposure_outcome := paste0(exposure,"_", outcome)]
 28 | 
 29 | ##remove every association with less than 3 genetic instruments
 30 | harm_all<-harm_all[mr_keep==TRUE,]
 31 | to_remove <- harm_all[, .N, by ="exposure_outcome"][N<3,]$exposure_outcome
 32 | harm_all <- harm_all[!(exposure_outcome %in% to_remove),]
 33 | # ####remove ruhlemann exposure that are in kurilshikov
 34 | # harm_all[, taxa := tolower(exposure)]
 35 | # harm_all[study == c("kurilshikov"),taxa := taxa %>%gsub("kurilshikov_","",.)%>% gsub("\\.id..*", "", .) %>% gsub("\\.", "-", .) %>% gsub("--", "-", .)]
 36 | # harm_all[study == c("ruhlemann"), taxa := gsub("[\\(\\)]", "", regmatches(exposure, gregexpr("\\(.*?\\)", exposure)))]
 37 | # ku<-harm_all[study == c("kurilshikov"),taxa]
 38 | # ru<-harm_all[study == c("ruhlemann"), taxa ]
 39 | # to_remove <- harm_all[study == c("ruhlemann") & taxa %in% ru[(ru %in% ku)], unique(exposure)]
 40 | # harm_all <- harm_all[!(exposure %in% to_remove),]
 41 | ###primary analysis
 42 | harm_all_split <- split(harm_all, harm_all$exposure_outcome)
 43 | perform_primary_analysis <- function(dat) {
 44 |   if (nrow(dat) > 3) {
 45 |     primary_analysis <- "mr_ivw" #multiplicative random effects IVW estimate. The standard error is corrected for under dispersion
 46 |   }
 47 |   else {
 48 |     primary_analysis <- "mr_ivw" #"mr_ivw_fe"
 49 |   }
 50 |   res <- TwoSampleMR::mr(dat, method_list = c(primary_analysis))
 51 |   return(res)
 52 | }
 53 | primary_df<-map(harm_all_split, perform_primary_analysis) %>% rbindlist
 54 | 
 55 | setDT(primary_df)
 56 | willer_primary <- primary_df[exposure == "willer_LDL-cholesterol:ieu-a-300"]
 57 | primary_df<- primary_df[exposure != "willer_LDL-cholesterol:ieu-a-300"]
 58 | 
 59 | 
 60 | #adjust for multiple  testing
 61 | primary_df$fdr <- p.adjust(primary_df$pval, method = "fdr")#OK so nothing comes out
 62 | primary_df[order(fdr)]
 63 | 
 64 | primary_df[fdr < 0.05,]
 65 | primary_df[,min(fdr)]
 66 | 
 67 | ##steiger filtering
 68 | harm_all <- steiger_filtering(harm_all)
 69 | setDT(harm_all)
 70 | harm_all_steiger <- harm_all[harm_all$steiger_dir,]
 71 | setDT(harm_all_steiger)
 72 | harm_all_steiger_split <- split(harm_all_steiger, harm_all_steiger$exposure_outcome)
 73 | 
 74 | primary_df_steiger <-map(harm_all_steiger_split, perform_primary_analysis) %>% rbindlist
 75 | 
 76 | #remove SNPs in the ABO, APOE, HLA-A gene region
 77 | to_exclude = c("APOE", "ABO", "HLA-A")
 78 | window = 2e+06
 79 | gencode <- fread("/home/couchr02/Mendel_Commun/Nicolas/GTEx/gencode.v19.genes.v7.patched_contigs.txt")
 80 | list <- vector(mode = "list", length = length(to_exclude))
 81 | for (i in 1:length(to_exclude)) {
 82 |   bon <- gencode[gene_name == to_exclude[i], ]
 83 |   list[[i]] <- data.frame(chr = bon[1, ]$chr, start = min(bon$start) - 
 84 |                             window/2, end = max(bon$end) + window/2, gene_name = bon[1, 
 85 |                             ]$gene_name)
 86 | }
 87 | region_df <- rbindlist(list)
 88 | harm_all[, is_in_pleiotropic_region := FALSE]
 89 | for (i in 1:nrow(region_df)) {
 90 |   harm_all[(chr.exposure == region_df[i, ]$chr) & 
 91 |            (pos.exposure >= region_df[i, ]$start) & (pos.exposure <= 
 92 |                                                        region_df[i, ]$end), is_in_pleiotropic_region := TRUE]
 93 | }
 94 | harm_all_notpleio <- harm_all[is_in_pleiotropic_region == FALSE, ]
 95 | setDT(harm_all_notpleio)
 96 | harm_all_notpleio_split <- split(harm_all_notpleio, harm_all_notpleio$exposure_outcome)
 97 | 
 98 | primary_df_notpleio <- map(harm_all_notpleio_split, perform_primary_analysis) %>% rbindlist
 99 | 
100 | fwrite(primary_df_steiger, "Data/Modified/primary_df_steiger.txt")
101 | fwrite(primary_df_notpleio, "Data/Modified/primary_df_notpleio.txt")
102 | fwrite(harm_all, "Data/Modified/harm_all.txt")
103 | ###robust MR methods
104 | setDT(harm_all)
105 | primary_df[, exposure_outcome := paste0(exposure,"_", outcome)]
106 | exposure_sign <- primary_df[pval < 0.05, ]$exposure_outcome
107 | exposure_sign <- exposure_sign[!grepl("willer_LDL", exposure_sign)]
108 | harm_all[, exposure_outcome := paste0(exposure,"_", outcome)]
109 | harm_all[,id.exposure := exposure]
110 | harm_all[,id.outcome := outcome]
111 | list_harm_sign <- split(harm_all[exposure_outcome %in% exposure_sign], harm_all[exposure_outcome %in% exposure_sign]$exposure_outcome)
112 | list_sensitivity <- lapply(list_harm_sign, function(x) GagnonMR::all_mr_methods(dat = x, Primary_mr = "random_underdispersion"))
113 | 
114 | list_sensitivity <- map(list_sensitivity, function(x) x[!(method %in% 
115 |                                                             c("Weighted mode", "MR Egger", "Robust adjusted profile score (RAPS)")),])
116 | 
117 | egger_intercept_test<- map_dbl(list_harm_sign, function(x) mr_pleiotropy_test(x) %>%
118 |                                  as.data.table(.) %>%
119 |                                  .$pval) < 0.05 #FALSE == intercept (average pleitropy) not significantly different from zero 
120 | df_egger <- data.frame(exposure_outcome = names(egger_intercept_test),
121 |                        intercept_test = unlist(egger_intercept_test))
122 | 
123 | map(list_sensitivity, function(x) x[,exposure_outcome := paste0(exposure, "_", outcome)])
124 | list_sensitivity <- map(list_sensitivity, function(x) {merge(x,df_egger, by = "exposure_outcome")})
125 | 
126 | veclog <- vector(mode = "logical", length = length(list_sensitivity))
127 | toinclude <- FALSE #Do we want to include those that could not be evaluated#
128 | for(i in 1:length(list_sensitivity)) {
129 |   dat <- list_sensitivity[[i]]
130 |   outi <- dat[method %in%  c("MR-PRESSO (Outlier-corrected)", "IVW radial"), ]
131 |   outlier_robust_val <- if(nrow(outi) == 0) {toinclude} else{outi[,pval < 0.05]}
132 |   conta <- dat[method == "Contamination mixture", ]
133 |   conta_val <- if(nrow(conta) == 0) {toinclude} else{conta[,pval < 0.05]}
134 |   
135 |   stat <- outlier_robust_val &
136 |     dat[method ==  "Weighted median", pval < 0.05] &
137 |     dat[1,intercept_test == FALSE] &
138 |     conta_val
139 |   
140 |   veclog[i]<- stat
141 |   names(veclog)[i]<-dat$exposure_outcome %>% unique
142 | }
143 |  
144 | saveRDS(veclog, "Data/Modified/Sensitivity/veclog")
145 | saveRDS(list_sensitivity, "Data/Modified/Sensitivity/list_sensitivity")
146 | #power analysis
147 | list_harm<- split(harm_all, harm_all$exposure_outcome)
148 | 
149 | vec_power <- map_dbl(list_harm, function(x) power.calculator_fromdat_vishner(x, effect = 0.1 ) )
150 | 
151 | primary_df[, exposure_outcome := paste0(exposure, "_", outcome)]
152 | primary_df$power <- vec_power[match(primary_df$exposure_outcome, names(vec_power))]
153 | primary_df <- rbindlist(list(primary_df, willer_primary), fill = TRUE)
154 | fwrite(primary_df, "/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Primary/primary_df")
155 | 
156 | #MVMR correcting for BMI and alcohol intake frequency
157 | 
158 | harm_all_sen <- harm_all[exposure_outcome %in% names(veclog[veclog == TRUE]),] #half (3 out of 6) sensitivity analyses with p-value <0.05
159 | id_vec <- c("ukb-b-19953", "ukb-b-5779") #BMI alcohol in UKB
160 | list_harm_sen <- split(harm_all_sen,  harm_all_sen$exposure_outcome)
161 | 
162 | sensitivity_mvmr <- function(dat, id) {
163 |   the_clump <- inst_clump[exposure == unique(dat$exposure),]
164 |   confound <- gwasvcf::query_gwas(vcf = paste0("/mnt/sda/gagelo01/Vcffile/MRBase_vcf/", id, "/", id,".vcf.gz"), 
165 |                                   rsid = dat$SNP,  proxies = "yes", bfile = "/home/couchr02/Mendel_Commun/Christian/LDlocal/EUR_rs") %>%
166 |     gwasglue::gwasvcf_to_TwoSampleMR(., "exposure")
167 |   
168 |   inst_mvmr <- rbind(the_clump, confound, fill = TRUE)
169 |   df_instrument <-  GagnonMR::prepare_for_mvmr(exposure_dat = inst_mvmr, d1 = inst_mvmr, clump_r2 = 0.01, clump_kb = 10000,
170 |                                                harmonise_strictness = 1, pval_threshold = 1, clump_exp = NULL, should_clump = TRUE)
171 |   
172 |   outcome_names <- dat[,c("SNP", colnames(dat)[grepl("outcome", colnames(dat))])]
173 |   outcome <- dat[,.SD, .SDcols = outcome_names]
174 |   exposure_outcome_harmonized <- mv_harmonise_data(exposure_dat = df_instrument,
175 |                                                    outcome_dat = outcome,
176 |                                                    harmonise_strictness = 1)
177 |   mvmr_results <- GagnonMR::mv_multiple_MendelianRandomization(exposure_outcome_harmonized = exposure_outcome_harmonized, 
178 |                                                                only_IVW = TRUE)
179 |   
180 |   mvmr_results
181 |   res<-mvmr_results[exposure == unique(dat$exposure),]
182 |   return(res)
183 | }
184 | 
185 | list_counfound_BMI <- map(list_harm_sen, function(x) sensitivity_mvmr(dat = x, id = id_vec[1]))
186 | list_counfound_alcohol <- map(list_harm_sen, function(x) sensitivity_mvmr(dat = x, id = id_vec[2]))
187 | 
188 | 
189 | no_counfound <- lapply(list_sensitivity[veclog], function(x) x[1,]) %>% rbindlist(.)
190 | withbmi <- rbindlist(list_counfound_BMI)
191 | withalcohol <- rbindlist(list_counfound_alcohol)
192 | 
193 | no_counfound$MVMR <- "no counfounder"
194 | no_counfound$method <- "IVW"
195 | # setnames(no_counfound, c("exposure", "outcome", "b", "se", "lci","uci", "pval"),
196 |          # c("Exposure", "Outcome",  "Estimate", "StdError", "CILower",  "CIUpper",  "Pvalue"))
197 | withbmi$MVMR <- "with BMI"
198 | withalcohol$MVMR <- "with alcohol intake frequency"
199 | 
200 | MVMR<-rbindlist(list(no_counfound[,c("method", "exposure", "outcome",  "b", "se", "lci",  "uci",  "pval", "MVMR", "nsnp")],
201 |                      withbmi, withalcohol), fill = TRUE)
202 | 
203 | 
204 | MVMR<-MVMR[order(exposure, outcome),]
205 | 
206 | fwrite(MVMR, "Data/Modified/Sensitivity/MVMR")
207 | 
208 | 
209 | 
210 | message("This script finished without errors")
211 | 
212 | # 
213 | # ########################## descriptive statistics   ###################################
214 | # harm_all <- fread( "Data/Modified/harm_all.txt")
215 | # harm_all <- harm_all[exposure != "willer_LDL-cholesterol:ieu-a-300",]
216 | # primary_df <- fread( "/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Primary/primary_df")
217 | # primary_df <- primary_df[exposure != "willer_LDL-cholesterol:ieu-a-300"  ]
218 | # primary_df[, exposure_outcome := paste0(exposure, "_", outcome)]
219 | # harm_all[, .N, by = "exposure_outcome"][order(N)]
220 | # list_sensitivity <- readRDS( "Data/Modified/Sensitivity/list_sensitivity")
221 | # vec5<- sapply(list_sensitivity, function(x) sum(x[, pval] < 0.05 )) # est-ce qu'il y a
222 | # #included exposure
223 | # harm_all$exposure %>% unique %>% length
224 | # harm_all$outcome %>% unique %>% length
225 | # harm_all$exposure_outcome %>% unique %>% length
226 | # #included microbial metabolites
227 | # harm_all[study %in% c("sanna", "kettunen", "framingham"), length(unique(exposure))]
228 | # harm_all[study %in% c("sanna", "kettunen", "framingham"), length(unique(exposure_outcome))]
229 | # primary_df[exposure_outcome %in% harm_all[study %in% c("sanna", "kettunen", "framingham"), ]$exposure_outcome, ][,sum(pval < 0.05)]
230 | # 
231 | # primary_df_full[exposure == "willer_LDL-cholesterol:ieu-a-300" & outcome %in% c("van_der_Harst_CAD", "Timmers_parental_lifespan") , ][,.(outcome,b, b-1.96*se, b+1.96*se, pval)]
232 | # 
233 | # #included microbial taxa abundance
234 | # harm_all[study %in% c("kurilshikov", "ruhlemann"), length(unique(exposure))]
235 | # harm_all[study %in% c("kurilshikov", "ruhlemann"), length(unique(exposure_outcome))]
236 | # primary_df[exposure_outcome %in% harm_all[study %in% c("kurilshikov", "ruhlemann"), ]$exposure_outcome, ][,sum(pval < 0.05)]
237 | # 
238 | # #general
239 | # primary_df[,length(unique(exposure_outcome))]
240 | # primary_df[,sum(pval < 0.05)]
241 | # primary_df[which.min(fdr),]
242 | # 
243 | # #power
244 | # mean(primary_df$power)
245 | # sd(primary_df$power)
246 | # 
247 | # #sensitivity
248 | # list_sensitivity <- readRDS( "Data/Modified/Sensitivity/list_sensitivity")
249 | # length(list_sensitivity)
250 | # sum(vec5>=4)
251 | # list_sensitivity[vec5 >= 4]
252 | # list_sensitivity$kettunen_Ile_NAFLD[method == "Inverse variance weighted", c(exp(b), exp(lci), exp(uci))]
253 | # list_sensitivity$kettunen_Ile_NAFLD
254 | # 
255 | # ##mvmr
256 | # MVMR<-fread( "Data/Modified/Sensitivity/MVMR")
257 | # MVMR[, max(Pvalue), by = c("Exposure", "Outcome")]
258 | # 
259 | # #positive findings
260 | # ok <- MVMR[, max(Pvalue), by = c("Exposure", "Outcome")]
261 | # dt_sensitivity <- rbindlist(list_sensitivity, fill = TRUE)
262 | # dt_sensitivity[exposure == ok[V1< 0.05,]$Exposure[1] & outcome == ok[V1< 0.05,]$Outcome[1],][method == "Inverse variance weighted",c( exposure, exp(b), exp(lci), exp(uci))]
263 | # dt_sensitivity[exposure == ok[V1< 0.05,]$Exposure[2] & outcome == ok[V1< 0.05,]$Outcome[2],][method == "Inverse variance weighted",c( exposure, exp(b), exp(lci), exp(uci))]
264 | # dt_sensitivity[exposure == ok[V1< 0.05,]$Exposure[3] & outcome == ok[V1< 0.05,]$Outcome[3],][method == "Inverse variance weighted",c( exposure, exp(b), exp(lci), exp(uci))]
265 | # 
266 | # 
267 | # dt_sensitivity[exposure == ok[V1> 0.05 & V1 < 0.1,]$Exposure[1] & outcome == ok[V1> 0.05 & V1 < 0.1,]$Outcome[1],][method == "Inverse variance weighted",c( exposure, exp(b), exp(lci), exp(uci))]
268 | # dt_sensitivity[exposure == ok[V1> 0.05 & V1 < 0.1,]$Exposure[2] & outcome == ok[V1> 0.05 & V1 < 0.1,]$Outcome[2],][method == "Inverse variance weighted",c( exposure, b, lci, uci)]
269 | # 
270 | 


--------------------------------------------------------------------------------
/5_create_plots_figures.R:
--------------------------------------------------------------------------------
  1 | library(ckbplotr) #it has to loaded at the beginning of the session
  2 | library(data.table)
  3 | library(readxl)
  4 | library(TwoSampleMR)
  5 | library(tidyverse)
  6 | library(GagnonMR)
  7 | library(ggpubr)
  8 | library(ggforestplot)
  9 | library(ggforce)
 10 | 
 11 | #creating report and plots
 12 | #clean primary_df
 13 | primary_df_full <- fread("/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Primary/primary_df")
 14 | inst_clump <- fread( "Data/Modified/inst_clump.txt")
 15 | 
 16 | 
 17 | cleanify <- function(primary_df_full) {
 18 | primary_df_full$exposure_clean <- as.character(primary_df_full$exposure) %>% 
 19 |   ifelse(. == "framingham_betaine", "Betaine", .) %>%
 20 |   ifelse(. == "framingham_carnitine", "Carnitine", .) %>%
 21 |   ifelse(. == "framingham_choline", "Choline", .) %>%
 22 |   ifelse(. == "framingham_indole_3_propionate", "Indole-3-propionate", .) %>%
 23 |   ifelse(. == "kettunen_Ace", "Acetate", .) %>%
 24 |   ifelse(. == "lotta_Ile", "Isoleucine", .) %>%
 25 |   ifelse(. == "lotta_Leu", "Leucine", .) %>%
 26 |   ifelse(. == "lotta_Serotonin", "Serotonin", .) %>%
 27 |   ifelse(. == "lotta_Val" , "Valine", .) %>%
 28 |   ifelse(. == "lotta_PEA", "Phenylethylamine", . ) %>%
 29 |   ifelse(. == "lotta_Glu", "Glutamate", . ) %>% 
 30 |   ifelse(. == "lotta_Asp", "Aspartate", .) %>%
 31 |   ifelse(. == "lotta_C0", "Carnitine", .) %>%
 32 |   ifelse(. == "lotta_Kynurenine", "Kynurenine", .) %>% 
 33 |   ifelse(. == "framingham_trimethylamine_N_oxide", "Trimethylamine N-oxide (TMAO)", .) %>%
 34 |   ifelse(. == "sanna_fecal_propionate_levels", "Fecal propionate", .) %>%
 35 |   ifelse(. == "sanna_PWY-5022", "Pathway PWY-5022", .) %>%
 36 |   ifelse(. == "willer_LDL-cholesterol:ieu-a-300", "LDL cholesterol", .) 
 37 | 
 38 | 
 39 | # primary_df_full$outcome_clean <- sapply(strsplit(primary_df_full$outcome, "_"), function(x) x[length(x)])
 40 | primary_df_full$outcome_clean <- primary_df_full$outcome %>% 
 41 |   ifelse(. == "osteoporosis", "Osteoporosis", .) %>%
 42 |   ifelse(. == "NAFLD", "Non-alcoholic fatty liver disease", .) %>%
 43 |   ifelse(. == "van_der_Harst_CAD" , "Coronary artery disease", .) %>%
 44 |   ifelse(. == "Malik_Stroke", "Ischemic stroke", .) %>%
 45 |   ifelse(. == "Mahajan_Type2diabetes", "Type 2 diabetes", .) %>%
 46 |   ifelse(. == "Jansen_Alzheimer", "Alzheimer's disease", .) %>%
 47 |   ifelse(. == "Timmers_parental_lifespan", "Parental lifespan", .) %>%
 48 |   ifelse(. == "Wuttke_Chronic_kidney" , "Chronic kidney disease", .) %>%
 49 |   ifelse(. == "Deelen_longevity", "Human longevity", .) %>%
 50 |   ifelse(. == "Howard_Depression", "Depression", .) %>%
 51 |   ifelse(. == "diastolic_blood_pressure", "Diastolic blood pressure", .) %>%
 52 |   ifelse(. == "Fasting_Glucose_standardised", "Fasting glucose", .) %>%
 53 |   ifelse(. == "Fasting_Insulin", "Fasting insulin", .) %>%
 54 |   ifelse(. == "ieu-b-109" , "HDL cholesterol", .) %>%
 55 |   ifelse(. == "ieu-b-110", "LDL cholesterol", .) %>%
 56 |   ifelse(. == "ieu-b-111", "Triglycerides", .) %>%
 57 |   ifelse(. == "Stanzick_eGFR", "Estimated glomerular filtration rate", .) %>%
 58 |   ifelse(. == "systolic_blood_pressure" , "Systolic blood pressure", .) %>%
 59 |   ifelse(. == "ukb-b-12141", "Osteoporosis", .) %>%
 60 |   ifelse(. == "ukb-b-19953" , "Body mass index", .)
 61 | 
 62 | primary_df_full[, outcome_category := ifelse(outcome_clean %in% c("Coronary artery disease", "Ischemic stroke", 
 63 |                                                                   "Chronic kidney disease",
 64 |                                                                   "Type 2 diabetes", "Non-alcoholic fatty liver disease",
 65 |                                                      "Osteoporosis", "Depression", "Alzheimer's disease", "Human longevity", 
 66 |                                                      "Parental lifespan"), "Disease", "Cardiometabolic trait")]
 67 |                                                      
 68 | 
 69 | primary_df_full[, outcome_clean := factor(outcome_clean,
 70 |             levels = c("Coronary artery disease", "Ischemic stroke", "Chronic kidney disease", "Type 2 diabetes", "Non-alcoholic fatty liver disease",
 71 |                       "Osteoporosis", "Depression", "Alzheimer's disease", "Human longevity", "Parental lifespan", "HDL cholesterol", 
 72 |                       "LDL cholesterol","Triglycerides",  "Systolic blood pressure", "Diastolic blood pressure","Fasting glucose", 
 73 |                       "Fasting insulin", "Estimated glomerular filtration rate", "Body mass index"))]
 74 | #Category
 75 | primary_df_full$Category <- primary_df_full$exposure %>%
 76 |   ifelse(. == "IBD_IBD", "Dysbiotic disease", . ) %>%
 77 |   ifelse(grepl("fecal_propionate_levels",.), "Fecal Metabolites", .) %>%
 78 |   ifelse(grepl("PWY-", .) & grepl("sanna", .), "Microbial Pathway", .) %>%
 79 |   ifelse(grepl("framingham", .) | grepl("kettunen|lotta", .),  "Plasma Metabolites", .) %>%
 80 |   ifelse(grepl("dutch", .) & grepl("PWY", .), "Microbial Pathway", .) %>%
 81 |   ifelse(grepl("dutch", .) & !grepl("PWY", .), "Microbe Abundance", .) %>%
 82 |   ifelse(grepl("fin_gut", .) |  grepl("ruhlemann",.) | grepl("kurilshikov",.), "Microbe Abundance", .) %>%
 83 |   ifelse(grepl("willer_LDL", .), "Positive control", .)
 84 | 
 85 | all(primary_df_full$Category %in% c("Dysbiotic disease", "Fecal Metabolites", "Plasma Metabolites", "Microbe Abundance", "Microbial Pathway")) #if TRUE great
 86 | 
 87 | setDT(primary_df_full)
 88 | primary_df_full[,Category := factor(Category, levels = c("Dysbiotic disease", "Plasma Metabolites","Fecal Metabolites", "Microbial Pathway", "Microbe Abundance", "Positive control"))]
 89 | primary_df_full<- primary_df_full[order(Category,outcome, exposure)]
 90 | 
 91 | primary_df_full[Category == "Microbe Abundance", ]$exposure_clean <- sapply(strsplit(primary_df_full[Category == "Microbe Abundance", ]$exposure_clean, "_"),
 92 |                                                                            function(x) paste0(x[2:length(x)], collapse = "_"))
 93 | 
 94 | primary_df_full[Category == "Microbe Abundance", exposure_clean := gsub("(", " (", exposure_clean, fixed = TRUE)]    
 95 | return(primary_df_full)
 96 | }
 97 | primary_df_full <- cleanify(primary_df_full = primary_df_full)
 98 | fwrite(primary_df_full, "Data/Modified/primary_df_clean.txt")
 99 | #This document builds mostly on https://mrcieu.github.io/TwoSampleMR/articles/perform_mr.html
100 | 
101 | #figure 3 -> Create a forest plot of all IVW with IBD I want it to look like in Iyas and all.
102 | #https://neilstats.github.io/ckbplotr/articles/make_forest_plot.html#forest-plot-with-row-labels
103 | 
104 | ###a better way I think. All exposures. Outcome as title. One forest plot per outcome.
105 | # primary_df <- primary_df_full[exposure == "IBD_IBD",]
106 | # resultsA <- data.frame(variable = paste0(primary_df$outcome_clean),
107 | #                          estimate = primary_df$b,
108 | #                          stderr =  primary_df$se,
109 | #                          n = primary_df$nsnp,
110 | #                        P_value = formatC(primary_df$pval, format = "e", digits = 1))
111 | # 
112 | # 
113 | # setDT(resultsA)
114 | # resultsA <- resultsA[order(-estimate)]
115 | # m<-make_forest_plot(panels         = list(resultsA),
116 | #                  col.key        = "variable",
117 | #                  panel.headings = "Primary MR analysis for inflammatory bowel disease \n and 10 health outcomes",
118 | #                  exponentiate   = TRUE,
119 | #                  nullval    = 1,
120 | #                  pointsize        = 2,
121 | #                  col.left         = c("n"),
122 | #                  col.left.heading = c("n SNP"),
123 | #                  col.right = "P_value",
124 | #                  col.right.heading = c("OR (95% CI)", "P-value"),
125 | #                  printplot = TRUE,
126 | #                  xlab =  "Effect of IBD on chronic diseases and longevity",
127 | #                  colour = "blue",
128 | #                  cicolour = "darkred")
129 | # 
130 | # # ggsave("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/IBD_IVW_forest_plot.png",
131 | # #        width=436/72,height=244/72,units="in",scale=1,
132 | # #        device = "png")
133 | # 
134 | # b<- m[[1]]
135 | # a <- readRDS( file = "Analysis/LD_score/Results/Forest_plot_IBD_Rg_benoitversion.rdata")
136 | # 
137 | # ggarrange(a, b, 
138 | #           labels = c("A", "B"),
139 | #           ncol = 1, nrow = 2)
140 | # 
141 | # ggsave("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/2_IBD_twopanel_plot.tiff",
142 | #        width=700/72,height=730/72,units="in",scale=1,
143 | #        device = "tiff")
144 | ###huge code for other stuff
145 | # list_primary_forest <- vector(mode = "list", length = length(unique(primary_df_full$outcome)))
146 | # for(i in 1:length(unique(primary_df_full$outcome))) {
147 | #   primary_df <- primary_df_full[outcome == unique(outcome)[i]]
148 | # 
149 | #   resultsA <- data.frame(variable = paste0(primary_df$exposure),
150 | #                          estimate = primary_df$b,
151 | #                          stderr =  primary_df$se,
152 | #                          n = primary_df$nsnp)
153 | # 
154 | #   if(primary_df[1,outcome] == "Timmers_parental_lifespan") {
155 | #     exponentiate <- FALSE
156 | #     col.right.heading <- "Beta (95% CI)"
157 | #     xlab <- "Difference in lifespan years caused by gut dysbiosis"
158 | #     nullval <- 0
159 | #   } else {
160 | #     exponentiate <- TRUE
161 | #     col.right.heading <- "HR (95% CI)"
162 | #     xlab <- "Hazard ratio of having a disease caused by microbiome"
163 | #     nullval <- NULL
164 | #   }
165 | # 
166 | #   list_primary_forest[[i]] <-
167 | #     make_forest_plot(panels         = list(resultsA),
168 | #                      col.key        = "variable",
169 | #                      panel.headings = paste0("Primarary MR analysis for ", primary_df[1,outcome]),
170 | #                      exponentiate   = exponentiate,
171 | #                      nullval    = nullval,
172 | #                      pointsize        = 2,
173 | #                      col.left         = c("n"),
174 | #                      col.left.space   = c(0.02),
175 | #                      col.left.heading = c("n SNP"),
176 | #                      col.right.heading = col.right.heading,
177 | #                      printplot = FALSE,
178 | #                      xlab = xlab)
179 | # 
180 | #   print(list_primary_forest[[i]])
181 | #   ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Primary/Forest_plot/",
182 | #                 i, "_", unique(primary_df_full$outcome)[i], "_forest_plot.png" ),
183 | #          width=12,height=12,units="in",scale=1,
184 | #          device = "png")
185 | # 
186 | # }
187 | # 
188 | # saveRDS(list_primary_forest, "/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Primary/Forest_plot/list_primary_forest")
189 | # 
190 | 
191 | #Balloon_plot figure 4 - 5
192 | primary_df_full[, zscore := b/se]
193 | primary_df_full[, Category := factor(Category, levels = c("Positive control", "Dysbiotic disease", "Fecal Metabolites",  "Microbial Pathway", "Plasma Metabolites",  "Microbe Abundance"  )),]
194 | primary_df_full<- primary_df_full[order(Category),]
195 | primary_df_full[exposure_clean == outcome_clean, c( "b", "se", "pval", "zscore") := NA]
196 | dat_plot <- data.frame(exposure = factor(primary_df_full$outcome_clean, levels = unique(primary_df_full$outcome_clean)), #C,est mélangeant, mias puisque je veux les exposures sur l'ave des X j'inverse
197 |                        outcome = factor(primary_df_full$exposure_clean, levels = unique(primary_df_full$exposure_clean)),
198 |                        pval = as.numeric(primary_df_full$pval),
199 |                        z_score = as.numeric(primary_df_full$zscore),
200 |                        beta_score = as.numeric(primary_df_full$b),
201 |                        Category = primary_df_full$Category,
202 |                        outcome_category = primary_df_full$outcome_category)
203 |                        #Category = factor(primary_df_full$Category, levels = sort(unique(primary_df_full$Category))))
204 | 
205 | dat_plot <- dat_plot[dat_plot$Category != "Dysbiotic disease",]
206 | setDT(dat_plot)
207 | dat_plot <- dat_plot[order(Category),]
208 | ###########
209 | plot_balloon <- function(dat_plot, bonferroni_threshold = 0.05) {
210 |   
211 |   
212 |   dat_plot$pval = as.numeric(dat_plot$pval)
213 |   dat_plot$log10_pval = -log10(dat_plot$pval)
214 |   dat_plot$shape_point = sapply(dat_plot$pval, FUN = function(x) {ifelse( x < bonferroni_threshold, "rond", "Non-significant")})
215 |   
216 |   dat_plot_rond = dat_plot
217 |   dat_plot_rond$beta_score[which(dat_plot_rond$pval >= bonferroni_threshold)] = NA
218 |   dat_plot_rond$log10_pval[which(dat_plot_rond$pval >= bonferroni_threshold)] = NA
219 |   dat_plot_rond$shape_point = sapply(dat_plot_rond$pval, FUN = function(x) {ifelse( x < bonferroni_threshold, "rond", NA)})
220 |   
221 |   
222 |   dat_plot_croix = dat_plot
223 |   dat_plot_croix$beta_score[which(dat_plot_croix$pval < bonferroni_threshold)] = NA
224 |   dat_plot_croix$log10_pval[which(dat_plot_croix$pval < bonferroni_threshold)] = NA
225 |   dat_plot_croix$shape_point = sapply(dat_plot_croix$pval, FUN = function(x) {ifelse( x >= bonferroni_threshold, "Non-significant", NA)})
226 |   
227 |   balloon_plot = ggplot2::ggplot() +
228 |     ggplot2::geom_point(data = dat_plot_croix, ggplot2::aes(x = exposure, y = outcome, shape = factor(shape_point)), size = 2, color = "gray20") +
229 |     ggplot2::geom_point(data = dat_plot_rond, ggplot2::aes(x = exposure, y = outcome, size = log10_pval, color = beta_score)) +
230 |     facet_grid(. ~  outcome_category, scales= "free_x") +
231 |     ggplot2::scale_color_gradient2(name = "Beta",
232 |                                    low = scales::muted("#5884E5"),
233 |                                    mid = "white",
234 |                                    high = scales::muted("#9E131E"),
235 |                                    midpoint = 0
236 |     ) +
237 |     ggplot2::scale_shape_manual(name = "", values = c(4,1)) +
238 |     ggplot2::scale_size(name = expression(-Log[10](P)), range = c(4,7)) +
239 |     # ggplot2::coord_fixed(clip = "off", ratio = 1) +
240 |     ggplot2::guides(size = guide_legend(order = 1),
241 |                     shape = guide_legend(order = 2)) +
242 |     ggplot2::theme(
243 |       panel.grid.major.y = element_line(size = 0.25, colour = "gray60"),
244 |       panel.grid.major.x = element_line(size = 0.25, colour = "gray60"),
245 |       panel.grid.minor.y = element_blank(),
246 |       panel.grid.minor.x = element_blank(),
247 |       panel.background = element_blank(),
248 |       plot.margin = margin(t = 2, r = 0.5, b = 0.5, l = 0.5, "cm"),
249 |       legend.position = "right",
250 |       legend.text = element_text(
251 |         color = "gray20",
252 |         size = 10,
253 |         margin = margin(l = 0.2, r = 0.2)
254 |       ),
255 |       legend.spacing.y = unit(0.1, 'cm'),
256 |       legend.key = element_rect(fill = "transparent", colour = "transparent"),
257 |       legend.key.size = unit(0.8, "cm"),
258 |       axis.title = element_blank(),
259 |       axis.line = element_line(size = 0.5, colour = "gray20"),
260 |       axis.ticks = element_line(size = 0.5, colour = "gray20"),
261 |       axis.text.y = element_text(
262 |         size = 10,
263 |         colour = "gray20"
264 |       ),
265 |       axis.text.x = element_text(
266 |         angle = 60,
267 |         size = 8,
268 |         hjust = 1,
269 |         face = "plain",
270 |         colour = "gray20"
271 |       ))
272 |   
273 |   print(balloon_plot)
274 |   
275 | }
276 | 
277 | #############
278 | plot_balloon(dat_plot[dat_plot$Category != "Microbe Abundance",])
279 | 
280 | ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/2_balloon_plot_nomicrobre.png"),
281 |        width=638/72,height=458/72,units="in",scale=1,
282 |        device = "png")
283 | 
284 | 
285 | dat_plot2 <- dat_plot[dat_plot$Category %in% c("Microbe Abundance", "Positive control"), ]
286 | plot_balloon(dat_plot2)
287 | ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/3_balloon_plot_onlymicrobre.png"),
288 |        width=750/72,height=1400/72,units="in",scale=1, dpi=700,
289 |        device = "png")
290 | 
291 | # dat_plot2 <- dat_plot[dat_plot$Category == "Microbe Abundance", ]
292 | # 
293 | # kk <-unique(dat_plot2$outcome)
294 | # split_kk <- split(kk, ceiling(seq_along(kk)/(length(kk)/2+1)))
295 | # 
296 | # dat_plot2 <- dat_plot[(dat_plot$Category == "Microbe Abundance" & 
297 | #                          dat_plot$outcome %in% split_kk[[1]]) |
298 | #                         (dat_plot$Category ==  "Positive control"),]
299 | # 
300 | # 
301 | # plot_balloon(dat_plot2)
302 | # 
303 | # ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/3b_balloon_plot_onlymicrobre.png"),
304 | #        width=750/72,height=900/72,units="in",scale=1, dpi=700,
305 | #        device = "png")
306 | # 
307 | # dat_plot3 <- dat_plot[(dat_plot$Category == "Microbe Abundance" & 
308 | #                          dat_plot$outcome %in% split_kk[[2]]) |
309 | #                         (dat_plot$Category ==  "Positive control"),]
310 | # 
311 | # 
312 | # plot_balloon(dat_plot3)
313 | # 
314 | # ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/3b_balloon_plot_onlymicrobre.png"),
315 | #        width=750/72,height=900/72,units="in",scale=1, dpi=700,
316 | #        device = "png")
317 | 
318 | #######Sensitivity forest
319 | #sensitivity
320 | list_sensitivity <- readRDS( "Data/Modified/Sensitivity/list_sensitivity")
321 | veclog <- readRDS( "Data/Modified/Sensitivity/veclog")
322 | 
323 | # x <- list_sensitivity[[1]]
324 | # x[method %in% c("Weighted mode", "Weighted median"), any(pval<0.05)] & 
325 | #   x[method %in% c("IVW radial", "Weighted median"), any(pval<0.05)]
326 | # vec_which<-sapply(list_sensitivity, function(x) x[method %in% c("Weighted mode", "Weighted median"), pval]
327 | # df_compare_traits_groups
328 | 
329 | 
330 | length(list_sensitivity)
331 | sum(veclog)
332 | dt_sen <- rbindlist(list_sensitivity[veclog], fill = TRUE)
333 | dt_sen <- cleanify(dt_sen)
334 | 
335 | dt_sen <- dt_sen[order(exposure_clean, outcome_clean), ]
336 | max_chr_exp <-dt_sen[, max(nchar(exposure_clean))]
337 | dt_sen[, outcome_clean := as.character(outcome_clean)]
338 | max_chr_out <-dt_sen[, max(nchar(outcome_clean))]
339 | 
340 | #exposure_really_clean
341 | # dt_sen$exposure_really_clean <- 
342 | #   dt_sen[, paste0(exposure_clean, paste(rep("-", max_chr_exp - nchar(exposure_clean)),collapse = "")), by = seq_len(nrow(dt_sen))]$V1
343 | # 
344 | # 
345 | # dt_sen$outcome_really_clean <- 
346 | #   dt_sen[, paste0(paste(rep("-", max_chr_out - nchar(outcome_clean)),collapse = ""), outcome_clean), by = seq_len(nrow(dt_sen))]$V1
347 | # 
348 | 
349 | dt_sen[, exposure_really_clean := paste0(exposure_clean, "  -->  ")]
350 | dt_sen[, outcome_really_clean := outcome_clean]
351 | 
352 | dt_sen[, method := factor(method, levels = unique(method))]
353 | dt_sen[, name := paste0(exposure_really_clean,  outcome_really_clean)]
354 | dt_sen[, unique(nchar(name))]
355 | dt_sen[, Category_other := ifelse(Category == "Microbe Abundance","Microbe Abundance", "Microbiota Associated Metabolites")]
356 | dt_sen[method == "Contamination mixture", se := ((b - lci) + (uci - b))/(2*1.96) ]
357 | 
358 | 
359 | my_forest_plot <- function(data) {
360 | forestplot(
361 |   df = data,
362 |   name = name,
363 |   se = se,
364 |   estimate = b,
365 |   pvalue = pval,
366 |   psignif = 0.05,
367 |   xlab = "Effect size (SD or log(OR)) per 1-SD\nincrease in gut microbiota features",
368 |   ci = 0.95,
369 |   colour = method,
370 |   xlim = dt_sen[method != "MR Egger", round(c(min(lci),max(uci)), digits = 1)]
371 | ) + theme(legend.position = "right") +
372 |   theme(text = element_text(size = 10)) +
373 |   scale_x_continuous(breaks = c(-0.3,-0.2,-0.1,0.1,0.2,0.3,0.4)) +
374 |   ggforce::facet_col(
375 |     # facets = ~outcome_category,
376 |     facets = ~Category_other,
377 |     scales = "free_y",
378 |     space = "free"
379 |   )
380 | }
381 | 
382 | my_forest_plot(data= dt_sen)
383 | ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/4_forest_sensitivity.png"),
384 |        width=732/72,height=437/72,units="in",scale=1,
385 |        device = "png")
386 | 
387 | # Histogram of power figure 6 
388 | primary_df <- primary_df_full
389 | primary_df <- primary_df[exposure != "willer_LDL-cholesterol:ieu-a-300"]
390 | under.8<-primary_df[, mean(power), by = outcome][V1 <0.8, ]$outcome
391 | primary_df[, category := ifelse(outcome %in% under.8, outcome, "other")]
392 | 
393 | sum(primary_df$power<0.8)/length(primary_df$power)
394 | 
395 | ggplot(primary_df, aes(x=power)) +
396 |   geom_histogram(binwidth = 0.1) +
397 |   scale_x_continuous(breaks =seq(0.2, 1, by = 0.2)) +
398 |   theme_classic() +
399 |   annotate(geom = "text", x = 0.8, y = 300, 
400 |            label = paste0(round(sum(primary_df$power>0.8)/length(primary_df$power)*100,1), "% \n over 0.8"),
401 |            hjust = 0, vjust = 1, size = 5) +
402 |   geom_vline(xintercept=0.8, linetype="dotted", colour = "red") +
403 |   ggtitle(paste0("Power to find a 0.1 effect for all ", nrow(primary_df)," associations" ))
404 | 
405 | ggsave("Results/Figures/5_histogram_of_power.png",
406 |        width=460/72,height=326/72, units="in", scale=1, device = "png")
407 | 
408 | #MVMR forest plot figure 7
409 | 
410 | MVMR_dat <- fread("/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Sensitivity/MVMR")
411 | MVMR_dat[, MVMR := MVMR %>% ifelse(. == "no counfounder", "no confounder", .)]
412 | MVMR_dat <- cleanify(MVMR_dat)
413 | MVMR_dat <- MVMR_dat[order(exposure_clean, outcome_clean), ]
414 | max_chr_exp <-MVMR_dat[, max(nchar(exposure_clean))]
415 | MVMR_dat[, outcome_clean := as.character(outcome_clean)]
416 | max_chr_out <-MVMR_dat[, max(nchar(outcome_clean))]
417 | MVMR_dat[,outcome_clean := as.character(outcome_clean)]
418 | MVMR_dat[, exposure_really_clean := paste0(exposure_clean, "  -->  ")]
419 | MVMR_dat[, outcome_really_clean := outcome_clean]
420 | 
421 | MVMR_dat[, method := factor(MVMR, levels = c("no confounder", "with BMI", "with alcohol intake frequency"))]
422 | MVMR_dat[, name := paste0(exposure_really_clean,  outcome_really_clean)]
423 | MVMR_dat[, Category_other := ifelse(Category == "Microbe Abundance","Microbe Abundance", "Microbiota Associated Metabolites")]
424 | 
425 | my_forest_plot(MVMR_dat)
426 | 
427 | ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Figures/MVMR_forest_plot.png"),
428 |        width=730/72,height=430/72,units="in",scale=1,
429 |        device = "png")
430 | 
431 | fwrite(MVMR_dat, "Data/Modified/MVMR_clean.txt")
432 | # list_MVMR <- list("1a_MVMR_forest_plot" = MVMR_dat[outcome_category == "Disease",], 
433 | #                   "1b_MVMR_forest_plot" = MVMR_dat[outcome_category == "Risk_factor",])
434 | # 
435 | # for(i in 1:length(list_MVMR)) {
436 | # 
437 | # MVMR <- list_MVMR[[i]]
438 | # resultsA <- data.frame(variable = as.character(1:nrow(MVMR)),
439 | #                        estimate = round(MVMR$b, digits =3),
440 | #                        lci =  round(MVMR$lci, digits = 3),
441 | #                        uci =  round(MVMR$uci, digits = 3),
442 | #                        P_value = formatC(MVMR$pval, format = "e", digits = 1),
443 | #                        n = MVMR$nsnp)
444 | # 
445 | # mylabels <- data.frame(heading1 = "",
446 | #                        heading2 = paste0(MVMR$exposure_clean, " with ", MVMR$outcome_clean),
447 | #                        heading3 = MVMR$MVMR,
448 | #                        variable = as.character(1:nrow(MVMR)))
449 | # 
450 | # make_forest_plot(panels = list(resultsA),
451 | #                  col.key = "variable",
452 | #                  panel.headings = "IVW estimates with and without correcting for confounder",
453 | #                  row.labels = mylabels,
454 | #                  exponentiate = TRUE,
455 | #                  pointsize = 2, 
456 | #                  rows = unique(mylabels$heading1),
457 | #                  col.stderr = NULL,
458 | #                  col.lci = "lci",
459 | #                  col.uci = "uci",
460 | #                  col.left         = c("n"),
461 | #                  col.left.heading = c("n SNP"),
462 | #                  col.right = "P_value",
463 | #                  col.right.heading = c("OR (95% CI)", "P-value"),
464 | #                  xlab = "IVW estimates",
465 | #                  colour = "blue",
466 | #                  cicolour = "darkred",
467 | #                  blankrows = c(0,1,0,0),
468 | #                  xlim	= c(0.65, 1.8)
469 | # )
470 | # 
471 | # ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Supplementary_material/", names(list_MVMR)[i],".png"),
472 | #        width=710/72,height=576/72, units="in", scale=1,
473 | #        device = "png")
474 | # 
475 | # }
476 | 
477 | #10-11 -> For all IVW estimate with p < 0.05, perform sensitivity analysis and present the scatter plot
478 | list_sensitivity <- readRDS( "/home/gagelo01/workspace/Projects/Dysbiose_project/Data/Modified/Sensitivity/list_sensitivity")
479 | list_sensitivity <- list_sensitivity[sapply(list_sensitivity, function(x) sum(x$pval < 0.05)) > 3]
480 | list_sensitivity <- lapply(list_sensitivity, cleanify)
481 | list_sensitivity <- lapply(list_sensitivity, function(x)  x[, c("exposure", "outcome", "exposure_copy", "outcome_copy") := .(exposure_clean, outcome_clean, exposure, outcome)])
482 | 
483 | harm_all <- fread( "Data/Modified/harm_all.txt")
484 | harm_all <- cleanify(harm_all)
485 | fwrite(harm_all, "Data/Modified/harm_all_clean.txt")
486 | harm_all[, id.exposure := exposure]
487 | harm_all[, id.outcome := outcome]
488 | list_harm <- split(harm_all, harm_all$exposure_outcome)
489 | 
490 | list_harm<-list_harm[!sapply(list_harm, function(x) { x[1,"SNP"] == 0})]
491 | lapply(list_harm, setDT)
492 | list_harm <- lapply(list_harm, cleanify)
493 | list_harm <- lapply(list_harm, function(x)  x[, c("exposure", "outcome") := .(exposure_clean, outcome_clean)])
494 | 
495 | for(j in 1:length(list_sensitivity)) {
496 |   vecto <- rep(NA, length(list_harm))
497 | for(i in 1:length(list_harm)) {
498 |   expi <- list_harm[[i]][1, "exposure"] == list_sensitivity[[j]][1,"exposure"]
499 |   outci <- list_harm[[i]][1, "outcome"]  == list_sensitivity[[j]][1,"outcome"]
500 |   vecto[i] <-  expi & outci
501 | 
502 | }
503 | 
504 | index <- which(vecto)
505 | li_sen <- list_sensitivity[[j]]
506 | 
507 | p <- mr_scatter_plot(li_sen, list_harm[[index]])
508 | ggplot_object <- p[[1]] + theme_classic() +
509 |  theme(legend.position="top")
510 | ggplot_object
511 | ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Supplementary_material/Scatter_plot/",
512 |        j, "_", list_sensitivity[[j]][1,"exposure_copy"], "__" ,list_sensitivity[[j]][1,"outcome_copy"],"_", "scatter_plot.png"),
513 |        width=538/72,height=335/72, units="in", scale=1,
514 |        device = "png")
515 | 
516 | saveRDS(ggplot_object, file = paste0("Results/Supplementary_material/ggplot_object/", j, "_scatter.rdata"))
517 | }
518 | 
519 | 
520 | #8-9 -> For all IVW estimate with p < 0.05 and nSNPs > 1, perform  a forest plot that look like Iyas and all.-----
521 | 
522 | list_sensitivity_forest <- vector(mode = "list", length = length(list_sensitivity))
523 | 
524 | for(i in 1:length(list_sensitivity)) {
525 |   li_sen <- list_sensitivity[[i]]
526 |   if(li_sen[method == "Robust adjusted profile score (RAPS)", uci - lci] > 8) {
527 |     li_sen[method == "Robust adjusted profile score (RAPS)", uci := b + 4]
528 |     li_sen[method == "Robust adjusted profile score (RAPS)", lci := b - 4]
529 |   }
530 |   
531 |   if(li_sen[1,"outcome"] == "Parental lifespan") {
532 |     exponentiate <- FALSE
533 |     col.right.heading <- c("Beta (95% CI)", "P-value")
534 |     xlab <- paste0("1 year change in ", li_sen[1,]$outcome_clean, " per 1-SD increase of ", li_sen[1,]$exposure_clean)
535 |     nullval <- 0
536 |   } else {
537 |     exponentiate <- TRUE
538 |     col.right.heading <- c("OR (95% CI)", "P-value")
539 |     xlab <- paste0("Odds ratio of ", li_sen[1,]$outcome_clean, " per 1-SD increase of ", li_sen[1,]$exposure_clean)
540 |     nullval <- 1
541 |   }
542 |   
543 |   
544 |   
545 |   resultsA <- data.frame(variable = li_sen$method,
546 |                          estimate = round(li_sen$b, digits =3),
547 |                          lci =  round(li_sen$lci, digits = 3),
548 |                          uci =  round(li_sen$uci, digits = 3),
549 |                          n = li_sen$nsnp,
550 |                          pval = formatC(li_sen$pval, format = "e", digits = 1))
551 |   
552 |   mylabels <- data.frame(heading1 = li_sen$type_of_test,
553 |                         heading2 = li_sen$method,
554 |                          heading3 = as.character(NA),
555 |                          variable = li_sen$method)
556 |   
557 |   list_sensitivity_forest[[i]] <-
558 |     make_forest_plot(panels = list(resultsA),
559 |                      col.key = "variable",
560 |                      row.labels = mylabels,
561 |                      exponentiate = exponentiate,
562 |                      pointsize = 2, 
563 |                      rows = unique(mylabels$heading1),
564 |                      col.stderr = NULL,
565 |                      col.lci = "lci",
566 |                      col.uci = "uci",
567 |                      col.left         = c("n"),
568 |                      col.left.heading = c("n SNP"),
569 |                      col.right.heading = col.right.heading,
570 |                      col.right = "pval",
571 |                      xlab = xlab,
572 |                      nullval = nullval,
573 |                      blankrows = c(0,0,0,0),
574 |                      panel.names = "")
575 |   
576 |   ggplot_object <- list_sensitivity_forest[[i]]
577 |   ggplot_object[[1]] 
578 |   ggsave(paste0("/home/gagelo01/workspace/Projects/Dysbiose_project/Results/Supplementary_material/Forest_plot/",
579 |                 i, "_", li_sen[1, "exposure_copy"], "__", li_sen[1, "outcome_copy"], "_forest_plot.png" ),
580 |          width=659/72,height=357/72,units="in",scale=1, device = "png")
581 |   
582 |   saveRDS(ggplot_object, file = paste0("Results/Supplementary_material/ggplot_object/", i, "_forest.rdata"))
583 |   
584 | }
585 | 
586 | ###two panel supplementary figure
587 | 
588 | for(i in 1:length(list_sensitivity)) {
589 | a  <- readRDS(file = paste0("Results/Supplementary_material/ggplot_object/", i, "_scatter.rdata"))
590 | b <- readRDS(file = paste0("Results/Supplementary_material/ggplot_object/", i, "_forest.rdata"))
591 | 
592 | ggarrange(a, b[[1]], 
593 |           labels = c("A", "B"),
594 |           ncol = 1, nrow = 2)
595 | 
596 | ggsave(paste0("Results/Supplementary_material/TwoPanel/", i, "_scatter_forest.png"),
597 |        width=490/72,height=475/72,units="in",scale=1,
598 |        device = "png")
599 | }
600 | 
601 | # ##forest plot between IBD and diseases
602 | # harm_all_clean <- cleanify(harm_all)
603 | # harm_all_clean[exposure_clean == "IBD_IBD", exposure_clean := "Inflammatory bowel disease"]
604 | # harm_all_clean[,exposure := exposure_clean]
605 | # harm_all_clean[,outcome := outcome_clean]
606 | # singlesnp <- mr_singlesnp(dat = harm_all_clean[exposure == "Inflammatory bowel disease"], single_method = "mr_wald_ratio", all_method = "mr_ivw")
607 | # setDT(singlesnp)
608 | # index <- unique(singlesnp[,outcome])
609 | # for(i in 1:length(index)) {
610 | #   s<-mr_forest_plot(singlesnp_results = singlesnp[outcome == index[i]], exponentiate = TRUE) 
611 | #   s[[1]] +
612 | #     theme_classic() +
613 | #     theme(legend.position="none") +
614 | #     theme(axis.text.y = element_text(size=6),
615 | #           axis.title=element_text(size=10))
616 | #   
617 | #   titre<-gsub(" ", "_", index[i])
618 | #   ggsave(paste0("Results/Supplementary_material/Singlesnp/singlesnp_IBD_", titre, ".png"),
619 | #          width=459/72,height=594/72,units="in",scale=1, device = "png")
620 | #   
621 | # }
622 | 
623 | 
624 | 


--------------------------------------------------------------------------------