├── .Rbuildignore ├── tests ├── testthat.R └── testthat │ ├── test-plot_mbs_contexts.R │ ├── test-plot_main_dbs_contexts.R │ ├── test-plot_main_indel_contexts.R │ ├── test-pool_mut_matrix.R │ ├── test-cos_sim.R │ ├── test-mut_type.R │ ├── test-count_mbs_contexts.R │ ├── test-plot_dbs_contexts.R │ ├── test-get_dbs_context.R │ ├── test-count_indel_contexts.R │ ├── test-plot_correlation_bootstrap.R │ ├── test-plot_bootstrapped_contribution.R │ ├── test-plot_signature_strand_bias.R │ ├── test-plot_river.R │ ├── test-plot_indel_contexts.R │ ├── test-get_indel_context.R │ ├── test-signature_potential_damage_analysis.R │ ├── test-plot_long_profile.R │ ├── test-plot_96_profile.R │ ├── test-plot_compare_mbs.R │ ├── test-plot_192_profile.R │ ├── test-count_dbs_contexts.R │ ├── test-bin_mutation_density.R │ ├── test-plot_compare_profiles.R │ ├── test-strand_occurrences.R │ ├── test-plot_compare_dbs.R │ ├── test-cos_sim_matrix.R │ ├── test-plot_compare_indels.R │ ├── test-plot_original_vs_reconstructed.R │ ├── test-binomial_test.R │ ├── test-plot_profile_heatmap.R │ ├── test-plot_strand.R │ ├── test-plot_regional_similarity.R │ ├── test-plot_enrichment_depletion.R │ ├── test-fit_to_signatures_bootstrapped.R │ ├── test-convert_sigs_to_ref.R │ ├── test-context_potential_damage_analysis.R │ ├── test-plot_lesion_segregation.R │ ├── test-mut_type_occurrences.R │ ├── test-plot_contribution_heatmap.R │ ├── test-plot_strand_bias.R │ ├── test-type_context.R │ ├── test-lengthen_mut_matrix.R │ ├── test-plot_contribution.R │ ├── test-mut_matrix.R │ ├── test-fit_to_signatures.R │ ├── test-mutations_from_vcf.R │ ├── test-get_mut_type.R │ ├── test-plot_cosine_heatmap.R │ ├── test-enrichment_depletion_test.R │ ├── test-fit_to_signatures_strict.R │ ├── test-plot_spectrum.R │ ├── test-plot_spectrum_region.R │ ├── test-strand_bias_test.R │ ├── test-split_muts_region.R │ ├── test-genomic_distribution.R │ ├── test-plot_rainfall.R │ └── test-mut_matrix_stranded.R ├── inst ├── states │ ├── blood_grl.rds │ ├── blosum62.rds │ ├── dbs_refit.rds │ ├── distr_data.rds │ ├── snv_refit.rds │ ├── CTCF_g_data.rds │ ├── indel_refit.rds │ ├── mut_mat_data.rds │ ├── nmf_res_data.rds │ ├── repli_strand.rds │ ├── blood_grl_dbs.rds │ ├── blood_grl_indel.rds │ ├── blood_grl_mbs.rds │ ├── mut_mat_repli.rds │ ├── mut_mat_s_data.rds │ ├── promoter_g_data.rds │ ├── regional_sims.rds │ ├── regulatory_data.rds │ ├── blood_dbs_counts.rds │ ├── blood_mbs_counts.rds │ ├── grl_split_region.rds │ ├── strict_snv_refit.rds │ ├── blood_indel_counts.rds │ ├── context_mismatches.rds │ ├── lesion_segregation.rds │ ├── mut_mat_longregions.rds │ ├── mut_mat_splitregions.rds │ ├── nmf_res_strand_data.rds │ ├── blood_grl_dbs_context.rds │ ├── bootstrapped_snv_refit.rds │ ├── mut_mat_data_extended.rds │ ├── strict_best_snv_refit.rds │ ├── blood_grl_indel_context.rds │ ├── promoter_flanking_g_data.rds │ ├── read_vcfs_as_granges_output.rds │ ├── blood_indels_counts_split_region.rds │ └── blood_indels_longmatrix_split_region.rds ├── scripts │ ├── create_example_mbs.R │ ├── create_example_dbs.R │ ├── create_example_indels.R │ └── create_example_regulatory_regions.R ├── CITATION └── extdata │ └── empty.vcf ├── .gitignore ├── man ├── show-region_cossim-method.Rd ├── MutationalPatterns-defunct.Rd ├── cos_sim.Rd ├── mut_96_occurrences.Rd ├── mut_type.Rd ├── mutations_from_vcf.Rd ├── binomial_test.Rd ├── get_sim_tb.Rd ├── pool_mut_mat.Rd ├── mut_192_occurrences.Rd ├── mut_type_occurrences.Rd ├── get_dbs_context.Rd ├── count_mbs_contexts.Rd ├── convert_sigs_to_ref.Rd ├── cluster_signatures.Rd ├── type_context.Rd ├── plot_river.Rd ├── count_dbs_contexts.Rd ├── mut_context.Rd ├── plot_mbs_contexts.Rd ├── cos_sim_matrix.Rd ├── plot_correlation_bootstrap.Rd ├── plot_signature_strand_bias.Rd ├── count_indel_contexts.Rd ├── mut_matrix.Rd ├── strand_occurrences.Rd ├── plot_main_dbs_contexts.Rd ├── region_cossim-class.Rd ├── merge_signatures.Rd ├── plot_96_profile.Rd ├── plot_strand.Rd ├── plot_main_indel_contexts.Rd ├── plot_dbs_contexts.Rd ├── get_mut_type.Rd ├── fit_to_signatures.Rd ├── plot_192_profile.Rd ├── plot_strand_bias.Rd ├── get_indel_context.Rd ├── plot_bootstrapped_contribution.Rd ├── plot_profile_region.Rd ├── strand_bias_test.Rd ├── enrichment_depletion_test.Rd ├── lengthen_mut_matrix.Rd ├── plot_profile_heatmap.Rd ├── plot_enrichment_depletion.Rd ├── plot_original_vs_reconstructed.Rd ├── bin_mutation_density.Rd ├── split_muts_region.Rd ├── plot_compare_mbs.Rd ├── plot_indel_contexts.Rd ├── extract_signatures.Rd ├── plot_compare_dbs.Rd ├── plot_rainfall.Rd └── rename_nmf_signatures.Rd ├── R ├── cos_sim.R ├── mutations_from_vcf.R ├── mut_type.R ├── get_ref_alt.R ├── pool_mut_matrix.R ├── binomial_test.R ├── get_sig_start.R ├── cluster_signatures.R ├── mut_context.R ├── intersect_with_region.R ├── mut_matrix.R ├── type_context.R ├── cos_sim_matrix.R ├── convert_sigs_to_ref.R ├── mut_192_occurrences.R └── plot_mbs_contexts.R ├── LICENSE ├── README.md └── NAMESPACE /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | paper/* 5 | images 6 | vignettes/*.html -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(MutationalPatterns) 3 | 4 | test_check("MutationalPatterns") 5 | -------------------------------------------------------------------------------- /inst/states/blood_grl.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl.rds -------------------------------------------------------------------------------- /inst/states/blosum62.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blosum62.rds -------------------------------------------------------------------------------- /inst/states/dbs_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/dbs_refit.rds -------------------------------------------------------------------------------- /inst/states/distr_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/distr_data.rds -------------------------------------------------------------------------------- /inst/states/snv_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/snv_refit.rds -------------------------------------------------------------------------------- /inst/states/CTCF_g_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/CTCF_g_data.rds -------------------------------------------------------------------------------- /inst/states/indel_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/indel_refit.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_data.rds -------------------------------------------------------------------------------- /inst/states/nmf_res_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/nmf_res_data.rds -------------------------------------------------------------------------------- /inst/states/repli_strand.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/repli_strand.rds -------------------------------------------------------------------------------- /inst/states/blood_grl_dbs.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_dbs.rds -------------------------------------------------------------------------------- /inst/states/blood_grl_indel.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_indel.rds -------------------------------------------------------------------------------- /inst/states/blood_grl_mbs.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_mbs.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_repli.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_repli.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_s_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_s_data.rds -------------------------------------------------------------------------------- /inst/states/promoter_g_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/promoter_g_data.rds -------------------------------------------------------------------------------- /inst/states/regional_sims.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/regional_sims.rds -------------------------------------------------------------------------------- /inst/states/regulatory_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/regulatory_data.rds -------------------------------------------------------------------------------- /inst/states/blood_dbs_counts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_dbs_counts.rds -------------------------------------------------------------------------------- /inst/states/blood_mbs_counts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_mbs_counts.rds -------------------------------------------------------------------------------- /inst/states/grl_split_region.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/grl_split_region.rds -------------------------------------------------------------------------------- /inst/states/strict_snv_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/strict_snv_refit.rds -------------------------------------------------------------------------------- /inst/states/blood_indel_counts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indel_counts.rds -------------------------------------------------------------------------------- /inst/states/context_mismatches.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/context_mismatches.rds -------------------------------------------------------------------------------- /inst/states/lesion_segregation.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/lesion_segregation.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_longregions.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_longregions.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_splitregions.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_splitregions.rds -------------------------------------------------------------------------------- /inst/states/nmf_res_strand_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/nmf_res_strand_data.rds -------------------------------------------------------------------------------- /inst/states/blood_grl_dbs_context.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_dbs_context.rds -------------------------------------------------------------------------------- /inst/states/bootstrapped_snv_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/bootstrapped_snv_refit.rds -------------------------------------------------------------------------------- /inst/states/mut_mat_data_extended.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_data_extended.rds -------------------------------------------------------------------------------- /inst/states/strict_best_snv_refit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/strict_best_snv_refit.rds -------------------------------------------------------------------------------- /inst/states/blood_grl_indel_context.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_indel_context.rds -------------------------------------------------------------------------------- /inst/states/promoter_flanking_g_data.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/promoter_flanking_g_data.rds -------------------------------------------------------------------------------- /inst/states/read_vcfs_as_granges_output.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/read_vcfs_as_granges_output.rds -------------------------------------------------------------------------------- /inst/states/blood_indels_counts_split_region.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indels_counts_split_region.rds -------------------------------------------------------------------------------- /inst/states/blood_indels_longmatrix_split_region.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indels_longmatrix_split_region.rds -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Rapp.history 5 | .DS_Store 6 | .RDataTmp 7 | MutationalPatterns.Rproj 8 | vignettes/.build.timestamp 9 | *.pdf 10 | *.tex 11 | *.synctex.gz 12 | Introduction_to_MutationalPatterns.toc 13 | Introduction_to_MutationalPatterns.log 14 | Introduction_to_MutationalPatterns.bbl 15 | vignettes/NMF_* 16 | vignettes/*.html -------------------------------------------------------------------------------- /tests/testthat/test-plot_mbs_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-plot_mbs_contexts") 2 | 3 | ## Get mbs counts 4 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot contexts 9 | output <- plot_mbs_contexts(mbs_counts) 10 | 11 | test_that("Output has correct class", { 12 | expect_true(inherits(output, c("gg"))) 13 | }) 14 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_main_dbs_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-plot_main_dbs_contexts") 2 | 3 | ## Get dbs counts 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot contexts 9 | output <- plot_main_dbs_contexts(dbs_counts) 10 | 11 | test_that("Output has correct class", { 12 | expect_true(inherits(output, c("gg"))) 13 | }) 14 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_main_indel_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-plot_main_indel_contexts") 2 | 3 | 4 | ## Get indel counts 5 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | ## Plot contexts 10 | output <- plot_main_indel_contexts(indel_counts) 11 | 12 | test_that("Output has correct class", { 13 | expect_true(inherits(output, c("gg"))) 14 | }) 15 | -------------------------------------------------------------------------------- /man/show-region_cossim-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S4_class.R 3 | \name{show,region_cossim-method} 4 | \alias{show,region_cossim-method} 5 | \title{An S4 method to show an instance of the region_cossim class.} 6 | \usage{ 7 | \S4method{show}{region_cossim}(object) 8 | } 9 | \arguments{ 10 | \item{object}{A region_cossim object.} 11 | } 12 | \description{ 13 | An S4 method to show an instance of the region_cossim class. 14 | } 15 | -------------------------------------------------------------------------------- /inst/scripts/create_example_mbs.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(VariantAnnotation) 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 4 | library(ref_genome, character.only = TRUE) 5 | 6 | # Get grl 7 | grl <- readRDS("inst/states/blood_grl.rds") 8 | 9 | # Get mbs 10 | grl_mbs <- get_mut_type(grl, "mbs") 11 | saveRDS(grl_mbs, "inst/states/blood_grl_mbs.rds") 12 | 13 | # Count contexts 14 | mbs_counts <- count_mbs_contexts(grl_mbs) 15 | saveRDS(mbs_counts, "inst/states/blood_mbs_counts.rds") 16 | -------------------------------------------------------------------------------- /man/MutationalPatterns-defunct.Rd: -------------------------------------------------------------------------------- 1 | \name{MutationalPatterns-defunct} 2 | \alias{mutation_context} 3 | \alias{mutation_types} 4 | \alias{strand_from_vcf} 5 | \alias{explained_by_signatures} 6 | \title{Defunct functions in package \sQuote{MutationalPattern}} 7 | \description{These functions are defunct and no longer available.} 8 | 9 | \details{ 10 | Defunct functions are: 11 | \code{mutation_context}, 12 | \code{mutation_types}, 13 | \code{strand_from_vcf}, 14 | \code{explained_by_signatures} 15 | } -------------------------------------------------------------------------------- /tests/testthat/test-pool_mut_matrix.R: -------------------------------------------------------------------------------- 1 | context("test-pool_mut_matrix") 2 | 3 | # Get mut_mat 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3)) 8 | 9 | 10 | output <- pool_mut_mat(mut_mat, grouping) 11 | 12 | test_that("Output has correct class", { 13 | expect_true(inherits(output, c("matrix"))) 14 | }) 15 | 16 | test_that("Output has correct dimensions", { 17 | expect_equal(dim(output), c(96, 3)) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test-cos_sim.R: -------------------------------------------------------------------------------- 1 | context("test-cos_sim") 2 | 3 | # Calculate cosine similarity 4 | x <- c(1.1, 2.1, 0.2, 0.1, 2.9) 5 | y <- c(0.9, 1.9, 0.5, 0.4, 3.1) 6 | output <- cos_sim(x, y) 7 | 8 | 9 | test_that("Output has correct class and data type", { 10 | expect_true(inherits(output, c("numeric"))) 11 | expect_equal(typeof(output), "double") 12 | }) 13 | 14 | test_that("Output has expected size", { 15 | expect_equal(length(output), 1) 16 | }) 17 | 18 | test_that("Output is equal to expected", { 19 | expect_equal(output, 0.9895599) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test-mut_type.R: -------------------------------------------------------------------------------- 1 | context("test-mut_type") 2 | 3 | # Read vcfs 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | # Get mut type 8 | output <- mut_type(vcfs[[1]]) 9 | 10 | # Unit tests 11 | test_that("Output has correct class", { 12 | expect_true(inherits(output, c("character"))) 13 | }) 14 | 15 | test_that("The 6 base mutation types are returned", { 16 | base_types <- sort(unique(output)) 17 | expect_equal(base_types, c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test-count_mbs_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-count_mbs_contexts") 2 | 3 | ## Get a GRangesList object with mbs contexts. 4 | grl_mbs <- readRDS(system.file("states/blood_grl_mbs.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | output <- count_mbs_contexts(grl_mbs) 9 | expected <- readRDS(system.file("states/blood_mbs_counts.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | test_that("Output has correct class", { 14 | expect_true(inherits(output, c("matrix"))) 15 | }) 16 | 17 | test_that("Output is identical to expected", { 18 | expect_identical(output, expected) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_dbs_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-plot_dbs_contexts") 2 | 3 | ## Get dbs counts 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot contexts 9 | output <- plot_dbs_contexts(dbs_counts) 10 | output_samey <- plot_dbs_contexts(dbs_counts, same_y = TRUE) 11 | output_condensed <- plot_dbs_contexts(dbs_counts, condensed = TRUE) 12 | 13 | test_that("Output has correct class", { 14 | expect_true(inherits(output, c("gg"))) 15 | expect_true(inherits(output_samey, c("gg"))) 16 | expect_true(inherits(output_condensed, c("gg"))) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-get_dbs_context.R: -------------------------------------------------------------------------------- 1 | context("test-get_dbs_context") 2 | 3 | 4 | ## Get GRangesList with DBS. 5 | dbs_grl <- readRDS(system.file("states/blood_grl_dbs.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | ## Set context dbs 10 | output <- get_dbs_context(dbs_grl) 11 | 12 | expected <- readRDS(system.file("states/blood_grl_dbs_context.rds", 13 | package = "MutationalPatterns" 14 | )) 15 | 16 | 17 | test_that("Output has correct class", { 18 | expect_true(inherits(output, c("GRanges", "CompressedGRangesList"))) 19 | }) 20 | 21 | test_that("Output is equal to expected", { 22 | expect_equal(output, expected) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-count_indel_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-count_indel_contexts") 2 | 3 | ## Get a GRangesList object with indel contexts. 4 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | output <- count_indel_contexts(grl_indel_context) 9 | expected <- readRDS(system.file("states/blood_indel_counts.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | test_that("Output has correct class", { 14 | expect_true(inherits(output, c("matrix"))) 15 | }) 16 | 17 | test_that("Output is identical to expected", { 18 | expect_identical(output, expected) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_correlation_bootstrap.R: -------------------------------------------------------------------------------- 1 | context("test-plot_correlation_bootstrap") 2 | 3 | # Get contri boots 4 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Run default function 9 | output <- plot_correlation_bootstrap(contri_boots) 10 | 11 | # Run for all samples combined 12 | output_combi <- plot_correlation_bootstrap(contri_boots, per_sample = FALSE) 13 | 14 | # Test 15 | test_that("Output has correct class", { 16 | expect_true(inherits(output, c("list"))) 17 | expect_true(inherits(output[[1]], c("gg"))) 18 | expect_true(inherits(output_combi, c("gg"))) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_bootstrapped_contribution.R: -------------------------------------------------------------------------------- 1 | context("test-plot_bootstrapped_contribution") 2 | 3 | ## contri_boots 4 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot contexts 9 | 10 | test_that("Output has correct class", { 11 | output <- plot_bootstrapped_contribution(contri_boots) 12 | expect_true(inherits(output, c("gg"))) 13 | 14 | output <- plot_bootstrapped_contribution(contri_boots, mode = "relative") 15 | expect_true(inherits(output, c("gg"))) 16 | 17 | output <- plot_bootstrapped_contribution(contri_boots, plot_type = "barplot") 18 | expect_true(inherits(output, c("gg"))) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_signature_strand_bias.R: -------------------------------------------------------------------------------- 1 | context("test-plot_signature_strand_bias") 2 | 3 | # Load strand data 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Load nmf results 9 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | ## Provide column names for the plot. 14 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B") 15 | 16 | output <- plot_signature_strand_bias(nmf_res_strand$signatures) 17 | 18 | # Perform tests 19 | test_that("Output has correct class", { 20 | expect_true(inherits(output, c("gg"))) 21 | }) 22 | -------------------------------------------------------------------------------- /R/cos_sim.R: -------------------------------------------------------------------------------- 1 | #' Cosine similarity function 2 | #' 3 | #' Calculate the cosine similarity between two vectors of the same length. 4 | #' The cosine similarity is a value between 0 (distinct) and 1 (identical) 5 | #' and indicates how much two vectors are alike. 6 | #' 7 | #' @param x Vector 1 of length n 8 | #' @param y Vector 2 of length n 9 | #' @return Cosine similarity value; a value between 0 and 1 10 | #' 11 | #' @examples 12 | #' x <- c(1.1, 2.1, 0.2, 0.1, 2.9) 13 | #' y <- c(0.9, 1.9, 0.5, 0.4, 3.1) 14 | #' cos_sim(x, y) 15 | #' @export 16 | 17 | cos_sim <- function(x, y) { 18 | res <- x %*% y / (sqrt(x %*% x) * sqrt(y %*% y)) 19 | # coerce matrix to numeric 20 | res <- as.numeric(res) 21 | return(res) 22 | } 23 | -------------------------------------------------------------------------------- /man/cos_sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cos_sim.R 3 | \name{cos_sim} 4 | \alias{cos_sim} 5 | \title{Cosine similarity function} 6 | \usage{ 7 | cos_sim(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector 1 of length n} 11 | 12 | \item{y}{Vector 2 of length n} 13 | } 14 | \value{ 15 | Cosine similarity value; a value between 0 and 1 16 | } 17 | \description{ 18 | Calculate the cosine similarity between two vectors of the same length. 19 | The cosine similarity is a value between 0 (distinct) and 1 (identical) 20 | and indicates how much two vectors are alike. 21 | } 22 | \examples{ 23 | x <- c(1.1, 2.1, 0.2, 0.1, 2.9) 24 | y <- c(0.9, 1.9, 0.5, 0.4, 3.1) 25 | cos_sim(x, y) 26 | } 27 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_river.R: -------------------------------------------------------------------------------- 1 | context("test-plot_river") 2 | 3 | 4 | # Get input data 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | 14 | ## Create riverplot of profile 15 | output <- plot_river(mut_mat) 16 | 17 | ## Create condensed riverplot of extended profile 18 | output_extended <- plot_river(mut_mat_extended, condensed = TRUE) 19 | 20 | 21 | test_that("Output has correct class", { 22 | expect_true(inherits(output, c("gg"))) 23 | expect_true(inherits(output_extended, c("gg"))) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_indel_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-plot_indel_contexts") 2 | 3 | ## Get indel counts 4 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot contexts 9 | output <- plot_indel_contexts(indel_counts) 10 | output_same_y <- plot_indel_contexts(indel_counts, same_y = TRUE) 11 | output_extra_labels <- plot_indel_contexts(indel_counts, extra_labels = TRUE) 12 | output_condensed <- plot_indel_contexts(indel_counts, condensed = TRUE) 13 | 14 | test_that("Output has correct class", { 15 | expect_true(inherits(output, c("gg"))) 16 | expect_true(inherits(output_same_y, c("gg"))) 17 | expect_true(inherits(output_extra_labels, c("gg"))) 18 | expect_true(inherits(output_condensed, c("gg"))) 19 | }) 20 | -------------------------------------------------------------------------------- /man/mut_96_occurrences.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_96_occurrences.R 3 | \name{mut_96_occurrences} 4 | \alias{mut_96_occurrences} 5 | \title{Count 96 trinucleotide mutation occurrences} 6 | \usage{ 7 | mut_96_occurrences(type_context, gr_sizes) 8 | } 9 | \arguments{ 10 | \item{type_context}{result from type_context function} 11 | 12 | \item{gr_sizes}{A vector indicating the number of variants per GRanges} 13 | } 14 | \value{ 15 | Mutation matrix with 96 trinucleotide mutation occurrences 16 | } 17 | \description{ 18 | @details 19 | This function is called by mut_matrix. It calculates the 96 trinucleotide context for all variants 20 | and then splits these per GRanges (samples). It then calculates how often each 96 trinucleotide context occurs. 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test-get_indel_context.R: -------------------------------------------------------------------------------- 1 | context("test-get_indel_context") 2 | 3 | ## Get a GRangesList object with only indels. 4 | indel_grl <- readRDS(system.file("states/blood_grl_indel.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Load the corresponding reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | ## Get the indel contexts 13 | output <- get_indel_context(indel_grl, ref_genome) 14 | 15 | expected <- readRDS(system.file("states/blood_grl_indel_context.rds", 16 | package = "MutationalPatterns" 17 | )) 18 | 19 | test_that("Output has correct class", { 20 | expect_true(inherits(output, c("GRanges", "CompressedGRangesList"))) 21 | }) 22 | 23 | test_that("Output is equal to expected", { 24 | expect_equal(output, expected) 25 | }) 26 | -------------------------------------------------------------------------------- /tests/testthat/test-signature_potential_damage_analysis.R: -------------------------------------------------------------------------------- 1 | context("test-signature_potential_damage_analysis") 2 | 3 | # Get the signatures 4 | signatures <- get_known_signatures() 5 | 6 | # Get the contexts 7 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 8 | package = "MutationalPatterns" 9 | )) 10 | 11 | contexts <- rownames(mut_mat)[1:6] 12 | 13 | # Get context mismatches 14 | context_mismatches <- readRDS(system.file("states/context_mismatches.rds", 15 | package = "MutationalPatterns" 16 | )) 17 | 18 | # Run function 19 | output <- signature_potential_damage_analysis(signatures, contexts, context_mismatches) 20 | 21 | test_that("Output has correct class", { 22 | expect_true(inherits(output, "tbl_df")) 23 | }) 24 | 25 | test_that("Output has correct size", { 26 | expect_equal(dim(output), c(240, 7)) 27 | }) 28 | -------------------------------------------------------------------------------- /inst/scripts/create_example_dbs.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(VariantAnnotation) 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 4 | library(ref_genome, character.only = TRUE) 5 | 6 | # Get grl 7 | grl <- readRDS("inst/states/blood_grl.rds") 8 | 9 | # Get dbs 10 | grl_dbs <- get_mut_type(grl, "dbs") 11 | saveRDS(grl_dbs, "inst/states/blood_grl_dbs.rds") 12 | 13 | # Set context 14 | grl_dbs_context <- get_dbs_context(grl_dbs) 15 | saveRDS(grl_dbs_context, "inst/states/blood_grl_dbs_context.rds") 16 | 17 | # Count contexts 18 | dbs_counts <- count_dbs_contexts(grl_dbs_context) 19 | saveRDS(dbs_counts, "inst/states/blood_dbs_counts.rds") 20 | 21 | # Refit to signatures 22 | signatures <- get_known_signatures("dbs") 23 | 24 | 25 | fit_res <- fit_to_signatures(dbs_counts, signatures) 26 | saveRDS(fit_res, "inst/states/dbs_refit.rds") 27 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_long_profile.R: -------------------------------------------------------------------------------- 1 | context("test-plot_profile_region") 2 | 3 | # Read the long mutation matrix information: 4 | input <- readRDS(system.file("states/mut_mat_longregions.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Plot the 96-profile of three samples 9 | output <- plot_profile_region(input) 10 | output_relative_sample_feature <- plot_profile_region(input, mode = "relative_sample_feature") 11 | output_absolute <- plot_profile_region(input, mode = "absolute") 12 | output_condensed <- plot_profile_region(input, condensed = TRUE) 13 | 14 | test_that("Output has correct class", { 15 | expect_true(inherits(output, c("gg"))) 16 | expect_true(inherits(output_relative_sample_feature, c("gg"))) 17 | expect_true(inherits(output_absolute, c("gg"))) 18 | expect_true(inherits(output_condensed, c("gg"))) 19 | }) 20 | -------------------------------------------------------------------------------- /man/mut_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_type.R 3 | \name{mut_type} 4 | \alias{mut_type} 5 | \title{Retrieve base substitution types from a VCF object} 6 | \usage{ 7 | mut_type(vcf) 8 | } 9 | \arguments{ 10 | \item{vcf}{A CollapsedVCF object} 11 | } 12 | \value{ 13 | Character vector with base substitution types 14 | } 15 | \description{ 16 | A function to extract the base substitutions from a vcf and translate to 17 | the 6 common base substitution types. 18 | } 19 | \examples{ 20 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 21 | ## following data: 22 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 23 | package = "MutationalPatterns" 24 | )) 25 | 26 | mut_type(vcfs[[1]]) 27 | } 28 | \seealso{ 29 | \code{\link{read_vcfs_as_granges}} 30 | } 31 | -------------------------------------------------------------------------------- /man/mutations_from_vcf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mutations_from_vcf.R 3 | \name{mutations_from_vcf} 4 | \alias{mutations_from_vcf} 5 | \title{Retrieve base substitutions from vcf} 6 | \usage{ 7 | mutations_from_vcf(vcf) 8 | } 9 | \arguments{ 10 | \item{vcf}{A CollapsedVCF object} 11 | } 12 | \value{ 13 | Character vector with base substitutions 14 | } 15 | \description{ 16 | A function to extract base substitutions of each position in vcf 17 | } 18 | \examples{ 19 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 20 | ## following data: 21 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 22 | package = "MutationalPatterns" 23 | )) 24 | 25 | muts <- mutations_from_vcf(vcfs[[1]]) 26 | } 27 | \seealso{ 28 | \code{\link{read_vcfs_as_granges}} 29 | } 30 | -------------------------------------------------------------------------------- /man/binomial_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/binomial_test.R 3 | \name{binomial_test} 4 | \alias{binomial_test} 5 | \title{Binomial test for enrichment or depletion testing} 6 | \usage{ 7 | binomial_test(p, n, x, p_cutoffs = 0.05) 8 | } 9 | \arguments{ 10 | \item{p}{Probability of success} 11 | 12 | \item{n}{Number of trials} 13 | 14 | \item{x}{Observed number of successes} 15 | 16 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05} 17 | } 18 | \value{ 19 | A data.frame with direction of effect (enrichment/depletion), 20 | P-value and significance asterisks 21 | } 22 | \description{ 23 | This function performs lower-tail binomial test for depletion and 24 | upper-tail test for enrichment 25 | } 26 | \examples{ 27 | binomial_test(0.5, 1200, 543) 28 | binomial_test(0.2, 800, 150) 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_96_profile.R: -------------------------------------------------------------------------------- 1 | context("test-plot_96_profile") 2 | 3 | 4 | # Load mutation matrix 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | # Plot the 96-profile of three samples 10 | output <- plot_96_profile(mut_mat[, c(1, 4, 7)]) 11 | 12 | # Plot a condensed profile 13 | output_condensed <- plot_96_profile(mut_mat[, c(1, 4, 7)], condensed = TRUE) 14 | 15 | # Load extracted signatures and plot 16 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 17 | package = "MutationalPatterns" 18 | )) 19 | output_signatures <- plot_96_profile(nmf_res$signatures) 20 | 21 | 22 | # Perform tests 23 | test_that("Output has correct class", { 24 | expect_true(inherits(output, c("gg"))) 25 | expect_true(inherits(output_condensed, c("gg"))) 26 | expect_true(inherits(output_signatures, c("gg"))) 27 | }) 28 | -------------------------------------------------------------------------------- /man/get_sim_tb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S4_class.R 3 | \name{get_sim_tb} 4 | \alias{get_sim_tb} 5 | \alias{get_sim_tb,region_cossim-method} 6 | \title{An S4 generic to get the sim_tb from a region_cossim object.} 7 | \usage{ 8 | get_sim_tb(x) 9 | 10 | \S4method{get_sim_tb}{region_cossim}(x) 11 | } 12 | \arguments{ 13 | \item{x}{A region_cossim object} 14 | 15 | \item{region_cossim}{A region_cossim object} 16 | } 17 | \value{ 18 | A tibble containing the calculated similarities of the windows. 19 | 20 | A tibble containing the calculated similarities of the windows. 21 | } 22 | \description{ 23 | An S4 generic to get the sim_tb from a region_cossim object. 24 | 25 | An S4 method for the get_sim_tb generic 26 | } 27 | \section{Methods (by class)}{ 28 | \itemize{ 29 | \item \code{region_cossim}: Get the sim_tb from a region_cossim object. 30 | }} 31 | 32 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_compare_mbs.R: -------------------------------------------------------------------------------- 1 | context("test-plot_compare_mbs") 2 | 3 | # Get the mbs counts 4 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | 9 | # Run default function 10 | output <- plot_compare_mbs( 11 | mbs_counts[, 1], 12 | mbs_counts[, 2] 13 | ) 14 | 15 | # Change the names of the profiles 16 | output_name <- plot_compare_mbs(mbs_counts[, 1], 17 | mbs_counts[, 2], 18 | profile_names = c("Original", "Reconstructed") 19 | ) 20 | 21 | # Change the y_limits 22 | output_yaxis <- plot_compare_mbs(mbs_counts[, 1], 23 | mbs_counts[, 2], 24 | profile_ymax = 0.9, 25 | diff_ylim = c(-0.8, 0.8) 26 | ) 27 | 28 | # Perform tests 29 | test_that("Output has correct class", { 30 | expect_true(inherits(output, c("gg"))) 31 | expect_true(inherits(output_name, c("gg"))) 32 | expect_true(inherits(output_yaxis, c("gg"))) 33 | }) 34 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_192_profile.R: -------------------------------------------------------------------------------- 1 | context("test-plot_192_profile") 2 | 3 | # Load mutation matrix 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Plot profile for some of the samples 9 | output <- plot_192_profile(mut_mat_s[, c(1, 4, 7)]) 10 | 11 | # You can create a more condensed version of the plot 12 | output_condensed <- plot_192_profile(mut_mat_s[, c(1, 4, 7)], condensed = TRUE) 13 | 14 | # Load extracted signatures and plot 15 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds", 16 | package = "MutationalPatterns" 17 | )) 18 | output_signatures <- plot_192_profile(nmf_res_strand$signatures) 19 | 20 | # Perform tests 21 | test_that("Output has correct class", { 22 | expect_true(inherits(output, c("gg"))) 23 | expect_true(inherits(output_condensed, c("gg"))) 24 | expect_true(inherits(output_signatures, c("gg"))) 25 | }) 26 | -------------------------------------------------------------------------------- /R/mutations_from_vcf.R: -------------------------------------------------------------------------------- 1 | #' Retrieve base substitutions from vcf 2 | #' 3 | #' A function to extract base substitutions of each position in vcf 4 | #' @param vcf A CollapsedVCF object 5 | #' @return Character vector with base substitutions 6 | #' @import GenomicRanges 7 | #' 8 | #' @examples 9 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the 10 | #' ## following data: 11 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 12 | #' package = "MutationalPatterns" 13 | #' )) 14 | #' 15 | #' muts <- mutations_from_vcf(vcfs[[1]]) 16 | #' @seealso 17 | #' \code{\link{read_vcfs_as_granges}} 18 | #' 19 | #' @export 20 | 21 | mutations_from_vcf <- function(vcf) { 22 | 23 | # Check that no indels are present. 24 | .check_no_indels(vcf) 25 | 26 | ref <- as.character(.get_ref(vcf)) 27 | alt <- as.character(unlist(.get_alt(vcf))) 28 | 29 | muts <- paste(ref, alt, sep = ">") 30 | return(muts) 31 | } 32 | -------------------------------------------------------------------------------- /man/pool_mut_mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pool_mut_matrix.R 3 | \name{pool_mut_mat} 4 | \alias{pool_mut_mat} 5 | \title{Pool multiple samples from a mutation matrix together} 6 | \usage{ 7 | pool_mut_mat(mut_matrix, grouping) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{Mutation count matrix (dimensions: x mutation types 11 | X n samples)} 12 | 13 | \item{grouping}{Grouping variable} 14 | } 15 | \value{ 16 | Mutation count matrix (dimensions: x mutation types 17 | X n groups) 18 | } 19 | \description{ 20 | The mutation counts of columns (samples) are added up according to the grouping variable. 21 | } 22 | \examples{ 23 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 24 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 25 | package = "MutationalPatterns" 26 | )) 27 | grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3)) 28 | pool_mut_mat(mut_mat, grouping) 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat/test-count_dbs_contexts.R: -------------------------------------------------------------------------------- 1 | context("test-count_dbs_contexts") 2 | 3 | ## Get a GRangesList object with dbs contexts. 4 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | output <- count_dbs_contexts(grl_dbs_context) 9 | expected <- readRDS(system.file("states/blood_dbs_counts.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | test_that("Output has correct class", { 14 | expect_true(inherits(output, c("matrix"))) 15 | }) 16 | 17 | test_that("Output is identical to expected", { 18 | expect_identical(output, expected) 19 | }) 20 | 21 | 22 | grl_dbs_context_bad <- readRDS(system.file("states/blood_grl_dbs.rds", 23 | package = "MutationalPatterns" 24 | )) 25 | 26 | 27 | test_that("An error occurs when the context has not been set.", { 28 | expect_error({count_dbs_contexts(grl_dbs_context_bad)}, 29 | "There are some REF or ALT bases, that do not belong") 30 | }) 31 | -------------------------------------------------------------------------------- /R/mut_type.R: -------------------------------------------------------------------------------- 1 | #' Retrieve base substitution types from a VCF object 2 | #' 3 | #' A function to extract the base substitutions from a vcf and translate to 4 | #' the 6 common base substitution types. 5 | #' 6 | #' @param vcf A CollapsedVCF object 7 | #' @return Character vector with base substitution types 8 | #' 9 | #' @examples 10 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the 11 | #' ## following data: 12 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 13 | #' package = "MutationalPatterns" 14 | #' )) 15 | #' 16 | #' mut_type(vcfs[[1]]) 17 | #' @seealso 18 | #' \code{\link{read_vcfs_as_granges}} 19 | #' 20 | #' @export 21 | 22 | mut_type <- function(vcf) { 23 | muts <- mutations_from_vcf(vcf) 24 | types <- unlist(muts) 25 | types <- gsub("G>T", "C>A", types) 26 | types <- gsub("G>C", "C>G", types) 27 | types <- gsub("G>A", "C>T", types) 28 | types <- gsub("A>T", "T>A", types) 29 | types <- gsub("A>G", "T>C", types) 30 | types <- gsub("A>C", "T>G", types) 31 | return(types) 32 | } 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Cuppen Research 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /tests/testthat/test-bin_mutation_density.R: -------------------------------------------------------------------------------- 1 | context("test-bin_mutation_density") 2 | 3 | 4 | # Read grl 5 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | ## Load the corresponding reference genome. 10 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 11 | library(ref_genome, character.only = TRUE) 12 | 13 | ## Determine region density 14 | output <- bin_mutation_density(grl, ref_genome, nrbins = 3) 15 | 16 | 17 | # Use manual cutoffs 18 | output_man <- bin_mutation_density(grl, ref_genome, man_dens_cutoffs = c(0, 2e-08, 1)) 19 | 20 | 21 | # Tests 22 | test_that("Output has correct class", { 23 | expect_true(inherits(output, "CompressedGRangesList")) 24 | expect_true(inherits(output_man, "CompressedGRangesList")) 25 | }) 26 | 27 | test_that("Output has correct dimensions", { 28 | expect_equal(length(output), 3) 29 | expect_equal(length(output_man), 2) 30 | expect_equal(as.vector(S4Vectors::elementNROWS(output)), c(30, 11, 2)) 31 | expect_equal(as.vector(S4Vectors::elementNROWS(output_man)), c(25, 4)) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_compare_profiles.R: -------------------------------------------------------------------------------- 1 | context("test-plot_compare_profiles") 2 | 3 | # Load mutation data 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Load nmf data 9 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | # Compare profiles 14 | output <- plot_compare_profiles(mut_mat[, 1], 15 | nmf_res$reconstructed[, 1], 16 | profile_names = c("Original", "Reconstructed") 17 | ) 18 | 19 | output_condensed <- plot_compare_profiles(mut_mat[, 1], 20 | nmf_res$reconstructed[, 1], 21 | profile_names = c("Original", "Reconstructed"), 22 | condensed = TRUE 23 | ) 24 | 25 | output_yaxis <- plot_compare_profiles(mut_mat[, 1], 26 | nmf_res$reconstructed[, 1], 27 | profile_ymax = 0.3, 28 | diff_ylim = c(-0.03, 0.03) 29 | ) 30 | 31 | # Perform tests 32 | test_that("Output has correct class", { 33 | expect_true(inherits(output, c("gg"))) 34 | expect_true(inherits(output_condensed, c("gg"))) 35 | expect_true(inherits(output_yaxis, c("gg"))) 36 | }) 37 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citEntry( 2 | entry = "article", 3 | title = "MutationalPatterns: The one stop shop for the analysis of mutational processes", 4 | author = personList( 5 | as.person("Freek Manders"), 6 | as.person("Arianne M. Brandsma"), 7 | as.person("Jurrian de Kanter"), 8 | as.person("Mark Verheul"), 9 | as.person("Rurika Oka"), 10 | as.person("Markus J. van Roosmalen"), 11 | as.person("Bastiaan van der Roest"), 12 | as.person("Arne van Hoeck"), 13 | as.person("Edwin Cuppen"), 14 | as.person("Ruben van Boxtel") 15 | ), 16 | year = 2022, 17 | journal = "BMC Genomics", 18 | doi = "10.1186/s12864-022-08357-3", 19 | textVersion = 20 | paste("Freek Manders, Arianne M. Brandsma, Jurrian de Kanter, Mark Verheul, Rurika Oka, Markus J. van Roosmalen, Bastiaan van der Roest, Arne van Hoeck, Edwin Cuppen, Ruben van Boxtel (2022):", 21 | "MutationalPatterns: The one stop shop for the analysis of mutational processes", 22 | "BMC Genomics", "doi:10.1186/s12864-022-08357-3") 23 | ) -------------------------------------------------------------------------------- /tests/testthat/test-strand_occurrences.R: -------------------------------------------------------------------------------- 1 | context("test-strand_occurrences") 2 | 3 | # Read in stranded mutation matrix 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Load a reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | # Set tissue names 13 | tissue <- c( 14 | "colon", "colon", "colon", 15 | "intestine", "intestine", "intestine", 16 | "liver", "liver", "liver" 17 | ) 18 | 19 | output <- strand_occurrences(mut_mat_s, by = tissue) 20 | 21 | # Repeat for replication bias. 22 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds", 23 | package = "MutationalPatterns" 24 | )) 25 | output_repli <- strand_occurrences(mut_mat_repli, by = tissue) 26 | 27 | # Tests 28 | test_that("Output has correct class", { 29 | expect_true(inherits(output, c("tbl_df"))) 30 | expect_true(inherits(output_repli, c("tbl_df"))) 31 | }) 32 | 33 | test_that("Output has correct size", { 34 | expect_equal(dim(output), c(36, 5)) 35 | expect_equal(dim(output_repli), c(36, 5)) 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_compare_dbs.R: -------------------------------------------------------------------------------- 1 | context("test-plot_compare_dbs") 2 | 3 | # Get dbs counts 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get dbs refit 9 | fit_res <- readRDS(system.file("states/dbs_refit.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | # Run default function 14 | output <- plot_compare_dbs(dbs_counts[, 1], fit_res$reconstructed[, 1]) 15 | 16 | # Test you can change the name 17 | output_name <- plot_compare_dbs(dbs_counts[, 1], 18 | fit_res$reconstructed[, 2], 19 | profile_names = c("Original", "Reconstructed") 20 | ) 21 | 22 | ## You can also change the y limits. 23 | ## This can be done separately for the profiles and the different facets. 24 | output_yaxis <- plot_compare_dbs(dbs_counts[, 1], 25 | fit_res$reconstructed[, 2], 26 | profile_ymax = 0.3, 27 | diff_ylim = c(-0.03, 0.03) 28 | ) 29 | 30 | # Perform tests 31 | test_that("Output has correct class", { 32 | expect_true(inherits(output, c("gg"))) 33 | expect_true(inherits(output_name, c("gg"))) 34 | expect_true(inherits(output_yaxis, c("gg"))) 35 | }) 36 | -------------------------------------------------------------------------------- /man/mut_192_occurrences.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_192_occurrences.R 3 | \name{mut_192_occurrences} 4 | \alias{mut_192_occurrences} 5 | \title{Count 192 trinucleotide mutation occurrences} 6 | \usage{ 7 | mut_192_occurrences(type_context, strand, gr_sizes) 8 | } 9 | \arguments{ 10 | \item{type_context}{result from type_context function} 11 | 12 | \item{strand}{factor with strand information for each 13 | position, for example "U" for untranscribed, "T" for transcribed strand, 14 | and "-" for unknown} 15 | 16 | \item{gr_sizes}{A vector indicating the number of variants per GRanges} 17 | } 18 | \value{ 19 | Mutation matrix with 192 mutation occurrences and 96 trinucleotides 20 | for two strands 21 | } 22 | \description{ 23 | @details 24 | This function is called by mut_matrix_stranded. 25 | The 192 trinucleotide context is the 96 trinucleotide context combined with the strands. 26 | This function calculates the 192 trinucleotide context for all variants. 27 | and then splits these per GRanges (samples). It then calculates how often each 192 trinucleotide context occurs. 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-cos_sim_matrix.R: -------------------------------------------------------------------------------- 1 | context("test-cos_sim_matrix") 2 | 3 | # Read signatures 4 | signatures <- get_known_signatures() 5 | 6 | 7 | # Read mut_matrix 8 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 9 | package = "MutationalPatterns" 10 | )) 11 | 12 | 13 | # Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile 14 | output <- cos_sim_matrix(mut_mat, signatures) 15 | 16 | # Perform tests. 17 | test_that("Output has correct class and data type", { 18 | expect_true(inherits(output, c("matrix"))) 19 | expect_equal(typeof(output), "double") 20 | }) 21 | 22 | test_that("Output has expected size", { 23 | expect_equal(dim(output), c(9, 60)) 24 | }) 25 | 26 | mut_mat_df = as.data.frame(mut_mat) 27 | mut_mat_chr = mut_mat_df 28 | mut_mat_chr[,1] <- as.character(mut_mat_chr[,1, drop = TRUE]) 29 | test_that("Non-numeric inputs give an error", { 30 | expect_error(cos_sim_matrix(mut_mat_chr, signatures)) 31 | }) 32 | 33 | test_that("Tibble inputs are converted into data.frames.", { 34 | output2 = cos_sim_matrix(tibble::as_tibble(mut_mat_df), signatures) 35 | expect_equal(output, output2) 36 | }) -------------------------------------------------------------------------------- /tests/testthat/test-plot_compare_indels.R: -------------------------------------------------------------------------------- 1 | context("test-plot_compare_indels") 2 | 3 | # Get indel counts 4 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get indel refit 9 | fit_res <- readRDS(system.file("states/indel_refit.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | # Run default function 14 | output <- plot_compare_indels(indel_counts[, 1], fit_res$reconstructed[, 1]) 15 | 16 | # Test you can change the name 17 | output_name <- plot_compare_indels(indel_counts[, 1], 18 | fit_res$reconstructed[, 2], 19 | profile_names = c("Original", "Reconstructed") 20 | ) 21 | 22 | ## You can also change the y limits. 23 | ## This can be done separately for the profiles and the different facets. 24 | output_yaxis <- plot_compare_indels(indel_counts[, 1], 25 | fit_res$reconstructed[, 2], 26 | profile_ymax = 0.3, 27 | diff_ylim = c(-0.03, 0.03) 28 | ) 29 | 30 | # Perform tests 31 | test_that("Output has correct class", { 32 | expect_true(inherits(output, c("gg"))) 33 | expect_true(inherits(output_name, c("gg"))) 34 | expect_true(inherits(output_yaxis, c("gg"))) 35 | }) 36 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_original_vs_reconstructed.R: -------------------------------------------------------------------------------- 1 | context("test-plot_original_vs_reconstructed") 2 | 3 | # Load mutation matrix 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Load the nmf res 9 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | 14 | # Load signature refit. 15 | fit_res <- readRDS(system.file("states/snv_refit.rds", 16 | package = "MutationalPatterns" 17 | )) 18 | 19 | # Run function 20 | output <- plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed) 21 | output_fit <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed) 22 | output_intercept <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, y_intercept = 0.90) 23 | output_lims <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, ylims = c(0, 1)) 24 | 25 | # Test 26 | test_that("Output has correct class", { 27 | expect_true(inherits(output, c("gg"))) 28 | expect_true(inherits(output_fit, c("gg"))) 29 | expect_true(inherits(output_intercept, c("gg"))) 30 | expect_true(inherits(output_lims, c("gg"))) 31 | }) 32 | -------------------------------------------------------------------------------- /man/mut_type_occurrences.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_type_occurrences.R 3 | \name{mut_type_occurrences} 4 | \alias{mut_type_occurrences} 5 | \title{Count the occurrences of each base substitution type} 6 | \usage{ 7 | mut_type_occurrences(vcf_list, ref_genome) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRangesList or GRanges object.} 11 | 12 | \item{ref_genome}{BSgenome reference genome object} 13 | } 14 | \value{ 15 | data.frame with counts of each base substitution type for 16 | each sample in vcf_list 17 | } 18 | \description{ 19 | Count the occurrences of each base substitution type 20 | } 21 | \examples{ 22 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 23 | ## following data: 24 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 25 | package = "MutationalPatterns" 26 | )) 27 | 28 | ## Load a reference genome. 29 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 30 | library(ref_genome, character.only = TRUE) 31 | 32 | ## Get the type occurrences for all VCF objects. 33 | type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 34 | } 35 | \seealso{ 36 | \code{\link{read_vcfs_as_granges}}, 37 | } 38 | -------------------------------------------------------------------------------- /tests/testthat/test-binomial_test.R: -------------------------------------------------------------------------------- 1 | context("test-binomial_test") 2 | 3 | output_signi <- binomial_test(0.5, 1200, 543) 4 | output_notsigni <- binomial_test(0.2, 800, 170) 5 | output_strictcutoff <- binomial_test(0.5, 1200, 543, p_cutoffs = 0.00001) 6 | 7 | 8 | test_that("Output has correct class", { 9 | expect_true(inherits(output_signi, c("data.frame"))) 10 | expect_true(inherits(output_notsigni, c("data.frame"))) 11 | expect_true(inherits(output_strictcutoff, c("data.frame"))) 12 | }) 13 | 14 | test_that("Output has correct size", { 15 | expect_equal(dim(output_signi), c(1, 3)) 16 | expect_equal(dim(output_notsigni), c(1, 3)) 17 | expect_equal(dim(output_strictcutoff), c(1, 3)) 18 | }) 19 | 20 | test_that("Output has correct significance level", { 21 | expect_equal(round(output_signi$pval, 5), 0.0011) 22 | expect_equal(round(output_notsigni$pval, 5), 0.39961) 23 | expect_equal(round(output_strictcutoff$pval, 5), 0.0011) 24 | }) 25 | 26 | test_that("enrichment/depletion correctly determined", { 27 | expect_equal(output_signi$effect, factor("depletion")) 28 | expect_equal(output_notsigni$effect, factor("enrichment")) 29 | expect_equal(output_strictcutoff$effect, factor("depletion")) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_profile_heatmap.R: -------------------------------------------------------------------------------- 1 | context("test-plot_profile_heatmap") 2 | 3 | 4 | # Get input data 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | 14 | ## Create heatmap of profile 15 | output_basic <- plot_profile_heatmap(mut_mat, max = 0.1) 16 | 17 | ## Create heatmap of extended profile 18 | output <- plot_profile_heatmap(mut_mat_extended) 19 | 20 | ## Or plot heatmap per tissue 21 | tissue <- c( 22 | "colon", "colon", "colon", 23 | "intestine", "intestine", "intestine", 24 | "liver", "liver", "liver" 25 | ) 26 | 27 | output_tissue <- plot_profile_heatmap(mut_mat_extended, by = tissue) 28 | 29 | ## Or plot the heatmap per sample. 30 | output_sample <- plot_profile_heatmap(mut_mat_extended, 31 | by = colnames(mut_mat_extended), 32 | max = 0.05 33 | ) 34 | 35 | 36 | test_that("Output has correct class", { 37 | expect_true(inherits(output_basic, c("gg"))) 38 | expect_true(inherits(output, c("gg"))) 39 | expect_true(inherits(output_tissue, c("gg"))) 40 | expect_true(inherits(output_sample, c("gg"))) 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_strand.R: -------------------------------------------------------------------------------- 1 | context("test-plot_strand") 2 | 3 | # Load stranded mutation matrix data 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Load a reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | tissue <- c( 13 | "colon", "colon", "colon", 14 | "intestine", "intestine", "intestine", 15 | "liver", "liver", "liver" 16 | ) 17 | 18 | # Calculate strand counts 19 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 20 | 21 | # Plot the strand in relative mode. 22 | output <- plot_strand(strand_counts) 23 | 24 | # Plot in absolute mode. 25 | output_absolute <- plot_strand(strand_counts, mode = "absolute") 26 | 27 | # Repeat for replication bias. 28 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | strand_counts <- strand_occurrences(mut_mat_repli, by = tissue) 32 | output_repli <- plot_strand(strand_counts) 33 | 34 | 35 | test_that("Output has correct class", { 36 | expect_true(inherits(output, c("gg"))) 37 | expect_true(inherits(output_absolute, c("gg"))) 38 | expect_true(inherits(output_repli, c("gg"))) 39 | }) 40 | -------------------------------------------------------------------------------- /man/get_dbs_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_dbs_context.R 3 | \name{get_dbs_context} 4 | \alias{get_dbs_context} 5 | \title{Get DBS context} 6 | \usage{ 7 | get_dbs_context(vcf_list) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRanges/GRangesList} 11 | } 12 | \value{ 13 | A version of the GRanges/GRangesList object, with modified REF and ALT columns. 14 | } 15 | \description{ 16 | Get the DBS COSMIC context on an GRanges/GRangesList object. 17 | It applies the get_dbs_context_gr function to each gr in the input, 18 | which works by changing the REF and ALT columns of the GRanges into the COSMIC types. 19 | } 20 | \examples{ 21 | ## Get GRangesList with DBS. 22 | ## See 'get_mut_type' or 'read_vcfs_as_granges' for more info on how to do this. 23 | dbs_grl <- readRDS(system.file("states/blood_grl_dbs.rds", 24 | package = "MutationalPatterns" 25 | )) 26 | 27 | ## Set context DBS 28 | get_dbs_context(dbs_grl) 29 | } 30 | \seealso{ 31 | \code{\link{get_mut_type}}, \code{\link{read_vcfs_as_granges}} 32 | 33 | Other DBS: 34 | \code{\link{count_dbs_contexts}()}, 35 | \code{\link{plot_compare_dbs}()}, 36 | \code{\link{plot_dbs_contexts}()}, 37 | \code{\link{plot_main_dbs_contexts}()} 38 | } 39 | \concept{DBS} 40 | -------------------------------------------------------------------------------- /man/count_mbs_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_mbs_contexts.R 3 | \name{count_mbs_contexts} 4 | \alias{count_mbs_contexts} 5 | \title{Count MBS variants grouped by length.} 6 | \usage{ 7 | count_mbs_contexts(vcf_list) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRanges or GRangesList object containing mbs variants.} 11 | } 12 | \value{ 13 | A tibble containing the number of MBS per MBS length per gr. 14 | } 15 | \description{ 16 | Count MBS variants grouped by length. 17 | } 18 | \details{ 19 | Counts the number of mbs grouped by length from a GRanges or GRangesList object containing mbs variants. 20 | This is used, since a COSMIC context has to our knowledge not yet been defined. 21 | This function applies the count_mbs_contexts_gr function to each gr in its input. 22 | It then combines the results in a single tibble and returns this. 23 | } 24 | \examples{ 25 | ## Get a GRangesList or GRanges object with mbs variants. 26 | mbs_grl <- readRDS(system.file("states/blood_grl_mbs.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | # Count the MBSs 31 | count_mbs_contexts(mbs_grl) 32 | } 33 | \seealso{ 34 | Other MBS: 35 | \code{\link{plot_compare_mbs}()}, 36 | \code{\link{plot_mbs_contexts}()} 37 | } 38 | \concept{MBS} 39 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_regional_similarity.R: -------------------------------------------------------------------------------- 1 | context("test-plot_regional_similarity") 2 | 3 | 4 | 5 | # Load local_cossim object 6 | regional_sims <- readRDS(system.file("states/regional_sims.rds", 7 | package = "MutationalPatterns" 8 | )) 9 | 10 | # Plot the regional similarity 11 | output = plot_regional_similarity(regional_sims) 12 | 13 | # Plot outlier samples with a different color. 14 | output_outlier = plot_regional_similarity(regional_sims, max_cossim = 0.5) 15 | 16 | # Plot samples per chromosome 17 | output_l = plot_regional_similarity(regional_sims, per_chrom = TRUE) 18 | 19 | # Plot samples with a rug 20 | output_rug = plot_regional_similarity(regional_sims, plot_rug = TRUE) 21 | 22 | # Use custom x-axis breaks 23 | output_xbreaks = plot_regional_similarity(regional_sims, x_axis_breaks = c(30, 66, 300)) 24 | 25 | # Run tests 26 | test_that("Output has correct class", { 27 | expect_true(inherits(output, c("gg"))) 28 | expect_true(inherits(output_outlier, c("gg"))) 29 | expect_true(inherits(output_l, c("list"))) 30 | expect_true(inherits(output_l[[1]], c("gg"))) 31 | expect_true(inherits(output_rug, c("gg"))) 32 | expect_true(inherits(output_xbreaks, c("gg"))) 33 | }) 34 | 35 | test_that("Output per chromosome has correct length", { 36 | expect_equal(length(output_l), 3) 37 | }) -------------------------------------------------------------------------------- /R/get_ref_alt.R: -------------------------------------------------------------------------------- 1 | #' get REF column from GRanges 2 | #' 3 | #' Retreives the REF column from a GRanges object. 4 | #' This can be spelled as REF, Ref or ref. 5 | #' 6 | #' @param gr GRanges object 7 | #' 8 | #' @return DNAStringSet 9 | #' @noRd 10 | #' 11 | .get_ref <- function(gr) { 12 | gr_cols <- colnames(S4Vectors::mcols(gr)) 13 | if ("REF" %in% gr_cols) { 14 | ref <- gr$REF 15 | } else if ("ref" %in% gr_cols) { 16 | ref <- gr$ref 17 | } else if ("Ref" %in% gr_cols) { 18 | ref <- gr$Ref 19 | } else { 20 | stop("Some of your data is missing a REF column.", call. = FALSE) 21 | ref <- Biostrings::DNAStringSet() 22 | } 23 | return(ref) 24 | } 25 | 26 | #' get ALT column from GRanges 27 | #' 28 | #' Retreives the ALT column from a GRanges object. 29 | #' This can be spelled as ALT, Alt or alt 30 | #' 31 | #' @param gr GRanges object 32 | #' 33 | #' @return DNAStringSetList 34 | #' @noRd 35 | #' 36 | .get_alt <- function(gr) { 37 | gr_cols <- colnames(S4Vectors::mcols(gr)) 38 | if ("ALT" %in% gr_cols) { 39 | alt <- gr$ALT 40 | } else if ("alt" %in% gr_cols) { 41 | alt <- gr$alt 42 | } else if ("Alt" %in% gr_cols) { 43 | alt <- gr$Alt 44 | } else { 45 | stop("Some of your data is missing a ALT column.", call. = FALSE) 46 | alt <- Biostrings::DNAStringSetList() 47 | } 48 | return(alt) 49 | } 50 | -------------------------------------------------------------------------------- /R/pool_mut_matrix.R: -------------------------------------------------------------------------------- 1 | #' Pool multiple samples from a mutation matrix together 2 | #' 3 | #' The mutation counts of columns (samples) are added up according to the grouping variable. 4 | #' 5 | #' @param mut_matrix Mutation count matrix (dimensions: x mutation types 6 | #' X n samples) 7 | #' @param grouping Grouping variable 8 | #' 9 | #' @return Mutation count matrix (dimensions: x mutation types 10 | #' X n groups) 11 | #' @export 12 | #' @importFrom magrittr %>% 13 | #' 14 | #' @examples 15 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 16 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 17 | #' package = "MutationalPatterns" 18 | #' )) 19 | #' grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3)) 20 | #' pool_mut_mat(mut_mat, grouping) 21 | pool_mut_mat <- function(mut_matrix, grouping) { 22 | # These variables use non standard evaluation. 23 | # To avoid R CMD check complaints we initialize them to NULL. 24 | . <- NULL 25 | 26 | grouping <- factor(grouping) 27 | mut_mat_group <- mut_matrix %>% 28 | t(.) %>% 29 | tibble::as_tibble() %>% 30 | dplyr::mutate(factor = grouping) %>% 31 | dplyr::group_by(factor) %>% 32 | dplyr::summarise_all(sum) %>% 33 | dplyr::select(-factor) %>% 34 | t(.) 35 | colnames(mut_mat_group) <- levels(grouping) 36 | return(mut_mat_group) 37 | } 38 | -------------------------------------------------------------------------------- /man/convert_sigs_to_ref.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_sigs_to_ref.R 3 | \name{convert_sigs_to_ref} 4 | \alias{convert_sigs_to_ref} 5 | \title{Convert tissue specific signature exposures to reference} 6 | \usage{ 7 | convert_sigs_to_ref(fit_res) 8 | } 9 | \arguments{ 10 | \item{fit_res}{Named list with signature contributions and reconstructed 11 | mutation matrix} 12 | } 13 | \value{ 14 | The input fit_res, but with converted signature contributions. 15 | } 16 | \description{ 17 | This function converts tissue specific signature contributions into 18 | reference signature contributions. This works on SNV signatures from SIGNAL. 19 | It uses a conversion matrix to do the conversion. 20 | The output can include possible artifact signatures. 21 | } 22 | \examples{ 23 | 24 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 25 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 26 | package = "MutationalPatterns" 27 | )) 28 | 29 | ## Get tissue specific signatures 30 | signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin") 31 | 32 | ## Fit tissue specific signatures 33 | fit_res <- fit_to_signatures(mut_mat, signatures) 34 | 35 | ## Convert the tissue specific signatures exposures to reference 36 | fit_res <- convert_sigs_to_ref(fit_res) 37 | } 38 | -------------------------------------------------------------------------------- /R/binomial_test.R: -------------------------------------------------------------------------------- 1 | #' Binomial test for enrichment or depletion testing 2 | #' 3 | #' This function performs lower-tail binomial test for depletion and 4 | #' upper-tail test for enrichment 5 | #' 6 | #' @param p Probability of success 7 | #' @param n Number of trials 8 | #' @param x Observed number of successes 9 | #' @param p_cutoffs Significance cutoff for the p value. Default: 0.05 10 | #' @return A data.frame with direction of effect (enrichment/depletion), 11 | #' P-value and significance asterisks 12 | #' 13 | #' @examples 14 | #' binomial_test(0.5, 1200, 543) 15 | #' binomial_test(0.2, 800, 150) 16 | #' @export 17 | 18 | binomial_test <- function(p, n, x, p_cutoffs = 0.05) { 19 | # Calculate expected number of successes 20 | expected <- p * n 21 | 22 | # Handle depletion 23 | if (x < expected) { 24 | # do lower tail test 25 | pval <- stats::pbinom(x, n, p, lower.tail = TRUE) 26 | effect <- "depletion" 27 | } 28 | 29 | # Handle enrichment 30 | else { 31 | # do upper tail test 32 | pval <- stats::pbinom(x - 1, n, p, lower.tail = FALSE) 33 | effect <- "enrichment" 34 | } 35 | 36 | # make test two sided. 37 | pval <- 2 * min(pval, 1 - pval) 38 | 39 | # Add significance asteriks 40 | significant <- .get_sig_star(pval, p_cutoffs) 41 | 42 | res <- data.frame("effect" = factor(effect), pval, significant) 43 | return(res) 44 | } 45 | -------------------------------------------------------------------------------- /man/cluster_signatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cluster_signatures.R 3 | \name{cluster_signatures} 4 | \alias{cluster_signatures} 5 | \title{Signature clustering function} 6 | \usage{ 7 | cluster_signatures(signatures, method = "complete") 8 | } 9 | \arguments{ 10 | \item{signatures}{Matrix with 96 trinucleotides (rows) and any number of 11 | signatures (columns)} 12 | 13 | \item{method}{The agglomeration method to be used for hierarchical 14 | clustering. This should be one of "ward.D", "ward.D2", "single", "complete", 15 | "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or 16 | "centroid" (= UPGMC). Default = "complete".} 17 | } 18 | \value{ 19 | hclust object 20 | } 21 | \description{ 22 | Hierarchical clustering of signatures based on cosine similarity 23 | } 24 | \examples{ 25 | ## Get signatures 26 | signatures <- get_known_signatures() 27 | 28 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 29 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 30 | package = "MutationalPatterns" 31 | )) 32 | 33 | 34 | ## Hierarchically cluster the cancer signatures based on cosine similarity 35 | hclust_signatures <- cluster_signatures(signatures) 36 | 37 | ## Plot dendrogram 38 | plot(hclust_signatures) 39 | } 40 | \seealso{ 41 | \code{\link{plot_contribution_heatmap}} 42 | } 43 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_enrichment_depletion.R: -------------------------------------------------------------------------------- 1 | context("test-plot_enrichment_depletion") 2 | 3 | # Read distribution data 4 | distr <- readRDS(system.file("states/distr_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | # Set tissue 8 | tissue <- c( 9 | "colon", "colon", "colon", 10 | "intestine", "intestine", "intestine", 11 | "liver", "liver", "liver" 12 | ) 13 | 14 | ## Perform the enrichment/depletion test. 15 | distr_test <- enrichment_depletion_test(distr, by = tissue) 16 | distr_test2 <- enrichment_depletion_test(distr) 17 | 18 | ## Plot the enrichment/depletion 19 | output <- plot_enrichment_depletion(distr_test) 20 | output_persample <- plot_enrichment_depletion(distr_test2) 21 | 22 | ## Test with p instead of fdr 23 | output_pval <- plot_enrichment_depletion(distr_test, sig_type = "p") 24 | 25 | ## Use multiple (max 3) significance cutoffs. 26 | distr_multistars <- enrichment_depletion_test(distr, 27 | by = tissue, 28 | p_cutoffs = c(0.05, 0.01, 0.005), 29 | fdr_cutoffs = c(0.1, 0.05, 0.01) 30 | ) 31 | output_multistars <- plot_enrichment_depletion(distr_multistars) 32 | 33 | 34 | # Perform tests 35 | test_that("Output has correct class", { 36 | expect_true(inherits(output, c("gg"))) 37 | expect_true(inherits(output_persample, c("gg"))) 38 | expect_true(inherits(output_pval, c("gg"))) 39 | expect_true(inherits(output_multistars, c("gg"))) 40 | }) 41 | -------------------------------------------------------------------------------- /tests/testthat/test-fit_to_signatures_bootstrapped.R: -------------------------------------------------------------------------------- 1 | context("test-fit_to_signatures_bootstrapped") 2 | 3 | # Get mut_mat 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get signatures 9 | signatures <- get_known_signatures() 10 | 11 | test_that("Output has correct class", { 12 | output <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05) 13 | expect_true(inherits(output, "matrix")) 14 | 15 | output_ori <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05, method = "regular") 16 | expect_true(inherits(output_ori, "matrix")) 17 | 18 | output_ori_10 <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05, method = "regular_10+") 19 | expect_true(inherits(output_ori_10, "matrix")) 20 | 21 | output_best <- fit_to_signatures_bootstrapped(mut_mat, signatures[,1:5], n_boots = 2, max_delta = 0.05, method = "strict_best_subset") 22 | expect_true(inherits(output_best, "matrix")) 23 | }) 24 | 25 | expected <- readRDS(system.file("states/bootstrapped_snv_refit.rds", 26 | package = "MutationalPatterns" 27 | )) 28 | 29 | test_that("Output is equal to expected", { 30 | set.seed(42) 31 | output <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05) 32 | expect_equal(output, expected) 33 | }) 34 | -------------------------------------------------------------------------------- /man/type_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/type_context.R 3 | \name{type_context} 4 | \alias{type_context} 5 | \title{Retrieve context of base substitution types} 6 | \usage{ 7 | type_context(vcf, ref_genome, extension = 1) 8 | } 9 | \arguments{ 10 | \item{vcf}{A CollapsedVCF object} 11 | 12 | \item{ref_genome}{Reference genome} 13 | 14 | \item{extension}{The number of bases, that's extracted upstream and 15 | downstream of the base substitutions. (Default: 1).} 16 | } 17 | \value{ 18 | Mutation types and context character vectors in a named list 19 | } 20 | \description{ 21 | A function to extract the bases 3' upstream and 5' downstream of the base 22 | substitution types. 23 | } 24 | \examples{ 25 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 26 | ## following data: 27 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Load the corresponding reference genome. 32 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 33 | library(ref_genome, character.only = TRUE) 34 | 35 | ## Get type context 36 | type_context <- type_context(vcfs[[1]], ref_genome) 37 | 38 | ## Get larger type context 39 | type_context_larger <- type_context(vcfs[[1]], ref_genome, extension = 2) 40 | } 41 | \seealso{ 42 | \code{\link{read_vcfs_as_granges}}, 43 | \code{\link{mut_context}} 44 | } 45 | -------------------------------------------------------------------------------- /man/plot_river.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_river.R 3 | \name{plot_river} 4 | \alias{plot_river} 5 | \title{Plot a riverplot} 6 | \usage{ 7 | plot_river(mut_matrix, condensed = FALSE) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{Matrix containing mutation counts.} 11 | 12 | \item{condensed}{More condensed plotting format. Default = F.} 13 | } 14 | \value{ 15 | A ggplot object 16 | } 17 | \description{ 18 | Function to plot a SNV mutation matrix as a riverplot. 19 | This is especially useful when looking at a wide 20 | mutational context 21 | } 22 | \examples{ 23 | 24 | ## See the 'mut_matrix()' examples for how we obtained the 25 | ## mutation matrix information: 26 | ## Get regular matrix 27 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Create heatmap of profile 32 | plot_river(mut_mat[,c(1,4)]) 33 | 34 | ## Get extended matrix 35 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds", 36 | package = "MutationalPatterns" 37 | )) 38 | 39 | ## Create heatmap of extended profile 40 | plot_river(mut_mat_extended[,c(1,4)]) 41 | 42 | ## Create condensed version of riverplot 43 | plot_river(mut_mat_extended[,c(1,4)], condensed = TRUE) 44 | 45 | } 46 | \seealso{ 47 | \code{\link{mut_matrix}}, 48 | \code{\link{plot_96_profile}}, 49 | \code{\link{plot_profile_heatmap}} 50 | } 51 | -------------------------------------------------------------------------------- /R/get_sig_start.R: -------------------------------------------------------------------------------- 1 | #' Determine the number of significance stars 2 | #' 3 | #' The number of significance stars is determined based on the statistical value 4 | #' and the significance cutoffs. 5 | #' 6 | #' @param val Statistical value. Either a p value or fdr. 7 | #' @param cutoffs Significance cutoffs for the statistical value. 8 | #' 9 | #' @return A vector of significance stars and empty strings (not significant). 10 | #' @noRd 11 | #' 12 | .get_sig_star <- function(val, cutoffs) { 13 | 14 | # Get name of cutoffs argument 15 | cutoffs_name <- deparse(substitute(cutoffs)) 16 | 17 | # Validate cutoff argument 18 | if (length(cutoffs) > 3) { 19 | stop(paste0("The length of the ", cutoffs_name, " argument can't be higher than 3."), 20 | call. = FALSE 21 | ) 22 | } 23 | 24 | if (!all.equal(cutoffs, sort(cutoffs, decreasing = TRUE))) { 25 | stop(paste0("The ", cutoffs_name, " argument should be in decreasing order."), 26 | call. = FALSE 27 | ) 28 | } 29 | 30 | # Add -Infs to cutoffs if the length is lower than 3. 31 | # Since a val cant be lower than -Inf, these cutoffs will never be reached. 32 | cutoffs <- c(cutoffs, rep(-Inf, 3 - length(cutoffs))) 33 | 34 | 35 | # Determine significance level 36 | stars <- dplyr::case_when( 37 | val < cutoffs[3] ~ "***", 38 | val < cutoffs[2] ~ "**", 39 | val < cutoffs[1] ~ "*", 40 | TRUE ~ "" 41 | ) 42 | return(stars) 43 | } 44 | -------------------------------------------------------------------------------- /man/count_dbs_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_dbs_contexts.R 3 | \name{count_dbs_contexts} 4 | \alias{count_dbs_contexts} 5 | \title{Count DBS contexts} 6 | \usage{ 7 | count_dbs_contexts(vcf_list) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRanges or GRangesList object containing DBS mutations in which the context was added with get_dbs_context.} 11 | } 12 | \value{ 13 | A tibble containing the number of DBS per COSMIC context per gr. 14 | } 15 | \description{ 16 | Count DBS contexts 17 | } 18 | \details{ 19 | Counts the number of DBS per COSMIC context from a GRanges or GRangesList object containing DBS variants. 20 | This function applies the count_dbs_contexts_gr function to each gr in its input. 21 | It then combines the results in a single tibble and returns this. 22 | } 23 | \examples{ 24 | ## Get a GRangesList or GRanges object with DBS contexts. 25 | ## See 'dbs_get_context' for more info on how to do this. 26 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | # Count the DBS contexts 31 | count_dbs_contexts(grl_dbs_context) 32 | } 33 | \seealso{ 34 | \code{\link{get_dbs_context}} 35 | 36 | Other DBS: 37 | \code{\link{get_dbs_context}()}, 38 | \code{\link{plot_compare_dbs}()}, 39 | \code{\link{plot_dbs_contexts}()}, 40 | \code{\link{plot_main_dbs_contexts}()} 41 | } 42 | \concept{DBS} 43 | -------------------------------------------------------------------------------- /man/mut_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_context.R 3 | \name{mut_context} 4 | \alias{mut_context} 5 | \title{Retrieve context of base substitutions} 6 | \usage{ 7 | mut_context(vcf, ref_genome, extension = 1) 8 | } 9 | \arguments{ 10 | \item{vcf}{A Granges object} 11 | 12 | \item{ref_genome}{Reference genome} 13 | 14 | \item{extension}{The number of bases, that's extracted upstream and 15 | downstream of the base substitutions. (Default: 1).} 16 | } 17 | \value{ 18 | Character vector with the context of the base substitutions 19 | } 20 | \description{ 21 | A function to extract the bases 3' upstream and 5' downstream of the base 22 | substitutions from the reference genome. The user an choose how many bases 23 | are extracted. 24 | } 25 | \examples{ 26 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 27 | ## following data: 28 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | 32 | ## Load the corresponding reference genome. 33 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 34 | library(ref_genome, character.only = TRUE) 35 | 36 | ## Get the standard context 37 | mut_context <- mut_context(vcfs[[1]], ref_genome) 38 | 39 | ## Get larger context 40 | mut_context_larger <- mut_context(vcfs[[1]], ref_genome, extension = 2) 41 | } 42 | \seealso{ 43 | \code{\link{read_vcfs_as_granges}}, 44 | } 45 | -------------------------------------------------------------------------------- /man/plot_mbs_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_mbs_contexts.R 3 | \name{plot_mbs_contexts} 4 | \alias{plot_mbs_contexts} 5 | \title{Plot the MBS contexts} 6 | \usage{ 7 | plot_mbs_contexts(counts, same_y = TRUE) 8 | } 9 | \arguments{ 10 | \item{counts}{A tibble containing the number of MBS per MBS length.} 11 | 12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.} 13 | } 14 | \value{ 15 | A ggplot figure. 16 | } 17 | \description{ 18 | Plot the MBS contexts 19 | } 20 | \details{ 21 | Plots the number of MBS per MBS length per sample. 22 | It takes a tibble with counts as its input. This tibble can be generated by count_mbs_contexts 23 | Each sample is plotted in a separate facet. 24 | The same y axis can be used for all samples or a separate y axis can be used. 25 | } 26 | \examples{ 27 | ## Get The mbs counts 28 | ## See 'count_mbs_contexts()' for more info on how to do this. 29 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds", 30 | package = "MutationalPatterns" 31 | )) 32 | 33 | ## Plot contexts 34 | plot_mbs_contexts(mbs_counts) 35 | 36 | ## Use a different y axis for all samples. 37 | plot_mbs_contexts(mbs_counts, same_y = FALSE) 38 | } 39 | \seealso{ 40 | \code{\link{count_mbs_contexts}} 41 | 42 | Other MBS: 43 | \code{\link{count_mbs_contexts}()}, 44 | \code{\link{plot_compare_mbs}()} 45 | } 46 | \concept{MBS} 47 | -------------------------------------------------------------------------------- /tests/testthat/test-convert_sigs_to_ref.R: -------------------------------------------------------------------------------- 1 | context("test-convert_sigs_to_ref") 2 | 3 | # Load mutation matrix 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get signatures 9 | signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin") 10 | 11 | # Fit tissue specific signatures 12 | fit_res <- fit_to_signatures(mut_mat, signatures) 13 | 14 | # Convert the tissue specific signatures exposures to reference 15 | output <- convert_sigs_to_ref(fit_res) 16 | 17 | # Run tests 18 | test_that("Output has correct class", { 19 | expect_true(inherits(output, "list")) 20 | expect_true(inherits(output$contribution, "matrix")) 21 | expect_true(inherits(output$reconstructed, "matrix")) 22 | }) 23 | 24 | test_that("Output has correct dimensions", { 25 | expect_equal(dim(output$contribution), c(38, 9)) 26 | }) 27 | 28 | test_that("Nr. mutations hasn't changed", { 29 | expect_equal(colSums(output$contribution), colSums(fit_res$contribution)) 30 | }) 31 | 32 | # Test that an error is thrown when the sig names don't match. 33 | fit_res_badname <- fit_res 34 | rownames(fit_res_badname$contribution)[1] <- "fakename" 35 | test_that("An error is thrown when the sig names don't match", { 36 | expect_error( 37 | { 38 | convert_sigs_to_ref(fit_res_badname) 39 | }, 40 | "The signature names of the fit_res don't match that of" 41 | ) 42 | }) 43 | -------------------------------------------------------------------------------- /tests/testthat/test-context_potential_damage_analysis.R: -------------------------------------------------------------------------------- 1 | context("test-context_potential_damage_analysis") 2 | 3 | 4 | # Get contexts 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | contexts <- rownames(mut_mat)[1:6] 10 | 11 | # Load the corresponding reference genome. 12 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 13 | library(ref_genome, character.only = TRUE) 14 | 15 | # Load transcription database 16 | library("TxDb.Hsapiens.UCSC.hg19.knownGene") 17 | txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene 18 | 19 | # Set gene ids 20 | # TP53 21 | gene_ids <- c(7157) 22 | 23 | # Run the function 24 | output <- context_potential_damage_analysis(contexts, txdb, ref_genome, gene_ids) 25 | 26 | # Run the function with verbosity 27 | output_verbose <- context_potential_damage_analysis(contexts, txdb, ref_genome, gene_ids, verbose = TRUE) 28 | 29 | # Run unit tests 30 | test_that("Output has correct class", { 31 | expect_true(inherits(output, "tbl_df")) 32 | expect_true(inherits(output_verbose, "tbl_df")) 33 | }) 34 | 35 | test_that("Output has correct size", { 36 | expect_equal(dim(output), c(24, 5)) 37 | expect_equal(dim(output_verbose), c(24, 5)) 38 | }) 39 | 40 | # Expected 41 | expected <- readRDS(system.file("states/context_mismatches.rds", 42 | package = "MutationalPatterns" 43 | )) 44 | 45 | test_that("Output is equal to expected", { 46 | expect_equal(output, expected) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_lesion_segregation.R: -------------------------------------------------------------------------------- 1 | context("test-plot_lesion_segregation") 2 | 3 | 4 | # Load GRangesList 5 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | # Only use two samples to reduce runtime 10 | grl <- grl[1:2] 11 | 12 | # Select sample 13 | gr <- grl[[1]] 14 | 15 | # Perform function 16 | output <- plot_lesion_segregation(grl) 17 | output_singlesample <- plot_lesion_segregation(gr, sample_name = "Colon1") 18 | output_noname <- plot_lesion_segregation(gr) 19 | output_l <- plot_lesion_segregation(gr, per_chrom = TRUE, sample_name = "Colon1") 20 | output_chr_filter = plot_lesion_segregation(grl, chromosomes = c("chr2", "chr3")) 21 | output_chr_filter2 = plot_lesion_segregation(grl, chromosomes = c("2", "3")) 22 | output_subsample <- plot_lesion_segregation(grl, subsample = 0.1) 23 | 24 | test_that("Output has correct class", { 25 | expect_true(inherits(output, c("gg"))) 26 | expect_true(inherits(output_singlesample, c("gg"))) 27 | expect_true(inherits(output_noname, c("gg"))) 28 | expect_true(inherits(output_l, c("list"))) 29 | expect_true(inherits(output_l[[1]], c("gg"))) 30 | expect_true(inherits(output_chr_filter, c("gg"))) 31 | expect_true(inherits(output_chr_filter2, c("gg"))) 32 | expect_true(inherits(output_subsample, c("gg"))) 33 | }) 34 | 35 | test_that("Output per chromosome has correct length", { 36 | expect_equal(length(output_l), 22) 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test-mut_type_occurrences.R: -------------------------------------------------------------------------------- 1 | context("mut_type_occurrences") 2 | 3 | # Read vcfs 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Load a reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | # Get the type occurrences for all VCF objects. 13 | output <- mut_type_occurrences(vcfs, ref_genome) 14 | 15 | # Get type occurence for single sample 16 | output_single_sample <- mut_type_occurrences(vcfs[[1]], ref_genome) 17 | 18 | # Get type occurence for few muts 19 | output_fewmuts <- mut_type_occurrences(vcfs[[1]][1:2], ref_genome) 20 | 21 | test_that("Output has correct class", { 22 | expect_true(inherits(output, "data.frame")) 23 | expect_true(inherits(output_single_sample, "data.frame")) 24 | expect_true(inherits(output_fewmuts, "data.frame")) 25 | }) 26 | 27 | test_that("Outpus has correct dimensions", { 28 | expect_equal(dim(output), c(9, 8)) 29 | expect_equal(dim(output_single_sample), c(1, 8)) 30 | expect_equal(dim(output_fewmuts), c(1, 8)) 31 | }) 32 | 33 | test_that("Transforms correctly", { 34 | expect_equal( 35 | output_single_sample, 36 | structure(list( 37 | `C>A` = 28L, `C>G` = 5L, `C>T` = 109L, `T>A` = 12L, 38 | `T>C` = 30L, `T>G` = 12L, `C>T at CpG` = 59L, `C>T other` = 50L 39 | ), 40 | row.names = "My_sample", class = "data.frame" 41 | ) 42 | ) 43 | }) 44 | -------------------------------------------------------------------------------- /man/cos_sim_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cos_sim_matrix.R 3 | \name{cos_sim_matrix} 4 | \alias{cos_sim_matrix} 5 | \title{Compute all pairwise cosine similarities between mutational profiles/signatures} 6 | \usage{ 7 | cos_sim_matrix(mut_matrix1, mut_matrix2) 8 | } 9 | \arguments{ 10 | \item{mut_matrix1}{mutation count matrix (dimensions: a mutation features X n samples)} 11 | 12 | \item{mut_matrix2}{96 mutation count matrix (dimensions: a mutation features X m samples)} 13 | } 14 | \value{ 15 | Matrix with pairwise cosine similarities (dimensions: n mutational profiles X m mutational profiles) 16 | } 17 | \description{ 18 | Computes all pairwise cosine similarities between the mutational profiles provided in the two mutation count matrices. 19 | The cosine similarity is a value between 0 (distinct) and 1 (identical) and indicates how much two vectors are alike. 20 | } 21 | \examples{ 22 | ## Get signatures 23 | signatures <- get_known_signatures() 24 | 25 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 26 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | 31 | ## Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile 32 | cos_sim_matrix(mut_mat, signatures) 33 | } 34 | \seealso{ 35 | \code{\link{mut_matrix}}, 36 | \code{\link{fit_to_signatures}}, 37 | \code{\link{plot_cosine_heatmap}} 38 | } 39 | -------------------------------------------------------------------------------- /man/plot_correlation_bootstrap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_correlation_bootstrap.R 3 | \name{plot_correlation_bootstrap} 4 | \alias{plot_correlation_bootstrap} 5 | \title{Plots the correlation between bootstrapped signature contributions} 6 | \usage{ 7 | plot_correlation_bootstrap(contri_boots, per_sample = TRUE) 8 | } 9 | \arguments{ 10 | \item{contri_boots}{A dataframe with bootstrapped signature contributions.} 11 | 12 | \item{per_sample}{Whether or not a plot should be made per sample. Default: TRUE.} 13 | } 14 | \value{ 15 | A list of ggplot2 objects if run per sample. 16 | Else it returns a single ggplot2 object. 17 | } 18 | \description{ 19 | This function plots the pearson correlation between signatures. 20 | This can be done per sample or for all samples together. 21 | It returns a list of the created figures. 22 | } 23 | \examples{ 24 | 25 | ## Get a dataframe with bootstrapped signature contributions. 26 | ## See 'fit_to_signatures_bootstrapped()' for how to do this. 27 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Plot the correlations between signatures per sample 32 | fig_l <- plot_correlation_bootstrap(contri_boots) 33 | 34 | ## Look at the figure of the first sample. 35 | fig_l[[1]] 36 | 37 | ## You can also look at the correlation for all samples combined 38 | plot_correlation_bootstrap(contri_boots, per_sample = FALSE) 39 | } 40 | -------------------------------------------------------------------------------- /man/plot_signature_strand_bias.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_signature_strand_bias.R 3 | \name{plot_signature_strand_bias} 4 | \alias{plot_signature_strand_bias} 5 | \title{Plot signature strand bias} 6 | \usage{ 7 | plot_signature_strand_bias(signatures_strand_bias) 8 | } 9 | \arguments{ 10 | \item{signatures_strand_bias}{Signature matrix with 192 features} 11 | } 12 | \value{ 13 | Barplot 14 | } 15 | \description{ 16 | Plot strand bias per mutation type for each signature. 17 | } 18 | \examples{ 19 | ## See the 'mut_matrix()' example for how we obtained the following 20 | ## mutation matrix. 21 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 22 | package = "MutationalPatterns" 23 | )) 24 | 25 | ## Extracting signatures can be computationally intensive, so 26 | ## we use pre-computed data generated with the following command: 27 | # nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2) 28 | 29 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds", 30 | package = "MutationalPatterns" 31 | )) 32 | 33 | ## Provide column names for the plot. 34 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B") 35 | 36 | ## Creat figure 37 | plot_signature_strand_bias(nmf_res_strand$signatures) 38 | 39 | ## You can also plot the bias of samples 40 | plot_signature_strand_bias(mut_mat_s[, c(1, 2)]) 41 | } 42 | \seealso{ 43 | \code{link{extract_signatures}}, 44 | \code{link{mut_matrix}} 45 | } 46 | -------------------------------------------------------------------------------- /man/count_indel_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count_indel_contexts.R 3 | \name{count_indel_contexts} 4 | \alias{count_indel_contexts} 5 | \title{Count indel contexts} 6 | \usage{ 7 | count_indel_contexts(vcf_list) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRanges or GRangesList object containing indel mutations in which the context was added with get_indel_context.} 11 | } 12 | \value{ 13 | A tibble containing the number of indels per COSMIC context per gr. 14 | } 15 | \description{ 16 | Count indel contexts 17 | } 18 | \details{ 19 | Counts the number of indels per COSMIC context from a GRanges or GRangesList object containing indel mutations. 20 | This function applies the count_indel_contexts_gr function to each gr in its input. 21 | It then combines the results in a single tibble and returns this. 22 | } 23 | \examples{ 24 | ## Get a GRangesList or GRanges object with indel contexts. 25 | ## See 'indel_get_context' for more info on how to do this. 26 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | # Count the indel contexts 31 | count_indel_contexts(grl_indel_context) 32 | } 33 | \seealso{ 34 | \code{\link{get_indel_context}} 35 | 36 | Other Indels: 37 | \code{\link{get_indel_context}()}, 38 | \code{\link{plot_compare_indels}()}, 39 | \code{\link{plot_indel_contexts}()}, 40 | \code{\link{plot_main_indel_contexts}()} 41 | } 42 | \concept{Indels} 43 | -------------------------------------------------------------------------------- /man/mut_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mut_matrix.R 3 | \name{mut_matrix} 4 | \alias{mut_matrix} 5 | \title{Make mutation count matrix of 96 trinucleotides} 6 | \usage{ 7 | mut_matrix(vcf_list, ref_genome, extension = 1) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRangesList or GRanges object.} 11 | 12 | \item{ref_genome}{BSgenome reference genome object} 13 | 14 | \item{extension}{The number of bases, that's extracted upstream and 15 | downstream of the base substitutions. (Default: 1).} 16 | } 17 | \value{ 18 | 96 mutation count matrix 19 | } 20 | \description{ 21 | Make 96 trinucleotide mutation count matrix 22 | } 23 | \examples{ 24 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 25 | ## following data: 26 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | ## Load the corresponding reference genome. 31 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 32 | library(ref_genome, character.only = TRUE) 33 | 34 | ## Construct a mutation matrix from the loaded VCFs in comparison to the 35 | ## ref_genome. 36 | mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome) 37 | 38 | ## Construct a mutation matrix with a larger context. 39 | ## This is most usefull when you have many mutations per sample. 40 | mut_mat_extended <- mut_matrix(vcf_list = grl, ref_genome = ref_genome, extension = 2) 41 | } 42 | \seealso{ 43 | \code{\link{read_vcfs_as_granges}} 44 | } 45 | -------------------------------------------------------------------------------- /R/cluster_signatures.R: -------------------------------------------------------------------------------- 1 | #' Signature clustering function 2 | #' 3 | #' Hierarchical clustering of signatures based on cosine similarity 4 | #' 5 | #' @param signatures Matrix with 96 trinucleotides (rows) and any number of 6 | #' signatures (columns) 7 | #' @param method The agglomeration method to be used for hierarchical 8 | #' clustering. This should be one of "ward.D", "ward.D2", "single", "complete", 9 | #' "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or 10 | #' "centroid" (= UPGMC). Default = "complete". 11 | #' @return hclust object 12 | #' 13 | #' @examples 14 | #' ## Get signatures 15 | #' signatures <- get_known_signatures() 16 | #' 17 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 18 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 19 | #' package = "MutationalPatterns" 20 | #' )) 21 | #' 22 | #' 23 | #' ## Hierarchically cluster the cancer signatures based on cosine similarity 24 | #' hclust_signatures <- cluster_signatures(signatures) 25 | #' 26 | #' ## Plot dendrogram 27 | #' plot(hclust_signatures) 28 | #' @seealso 29 | #' \code{\link{plot_contribution_heatmap}} 30 | #' 31 | #' @export 32 | 33 | cluster_signatures <- function(signatures, method = "complete") { 34 | # construct cosine similarity matrix 35 | sim <- cos_sim_matrix(signatures, signatures) 36 | # transform to distance 37 | dist <- as.dist(1 - sim) 38 | # perform hierarchical clustering 39 | hc_sig_cos <- hclust(dist, method = method) 40 | return(hc_sig_cos) 41 | } 42 | -------------------------------------------------------------------------------- /man/strand_occurrences.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/strand_occurrences.R 3 | \name{strand_occurrences} 4 | \alias{strand_occurrences} 5 | \title{Count occurrences per base substitution type and strand} 6 | \usage{ 7 | strand_occurrences(mut_mat_s, by = NA) 8 | } 9 | \arguments{ 10 | \item{mut_mat_s}{192 feature mutation count matrix, result from 11 | 'mut_matrix_stranded()'} 12 | 13 | \item{by}{Character vector with grouping info, optional} 14 | } 15 | \value{ 16 | A data.frame with the total number of mutations and relative 17 | contribution within group per base substitution type and strand 18 | } 19 | \description{ 20 | For each base substitution type and strand the total number 21 | of mutations and the relative contribution within a group is returned. 22 | } 23 | \examples{ 24 | ## See the 'mut_matrix_stranded()' example for how we obtained the 25 | ## following mutation matrix. 26 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 27 | package = "MutationalPatterns" 28 | )) 29 | 30 | ## Load a reference genome. 31 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 32 | library(ref_genome, character.only = TRUE) 33 | 34 | tissue <- c( 35 | "colon", "colon", "colon", 36 | "intestine", "intestine", "intestine", 37 | "liver", "liver", "liver" 38 | ) 39 | 40 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 41 | } 42 | \seealso{ 43 | \code{\link{mut_matrix_stranded}}, 44 | \code{\link{plot_strand}}, 45 | \code{\link{plot_strand_bias}} 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_contribution_heatmap.R: -------------------------------------------------------------------------------- 1 | context("test-plot_contribution_heatmap") 2 | 3 | 4 | # Read in nmf results 5 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | rownames(nmf_res$contribution) <- c("Signature A", "Signature B") 9 | 10 | # Plot with clustering. 11 | output <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE, cluster_sigs = TRUE) 12 | 13 | # Define signature and sample order for plotting. 14 | sig_order <- c("Signature B", "Signature A") 15 | sample_order <- c( 16 | "colon1", "colon2", "colon3", "intestine1", "intestine2", 17 | "intestine3", "liver3", "liver2", "liver1" 18 | ) 19 | output_supplied_order <- plot_contribution_heatmap(nmf_res$contribution, 20 | cluster_samples = FALSE, 21 | sig_order = sig_order, sample_order = sample_order 22 | ) 23 | 24 | ## Contribution heatmap with text values 25 | output_text <- plot_contribution_heatmap(nmf_res$contribution, plot_values = TRUE) 26 | 27 | # Read in signature refitting results 28 | snv_refit <- readRDS(system.file("states/strict_snv_refit.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | output_refit <- plot_contribution_heatmap(snv_refit$contribution, cluster_samples = TRUE, cluster_sigs = TRUE) 32 | 33 | 34 | test_that("Output has correct class", { 35 | expect_true(inherits(output, "gg")) 36 | expect_true(inherits(output_supplied_order, "gg")) 37 | expect_true(inherits(output_text, "gg")) 38 | expect_true(inherits(output_refit, "gg")) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_strand_bias.R: -------------------------------------------------------------------------------- 1 | context("test-plot_strand_bias") 2 | 3 | # Read stranded mut_mat 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | tissue <- c( 9 | "colon", "colon", "colon", 10 | "intestine", "intestine", "intestine", 11 | "liver", "liver", "liver" 12 | ) 13 | 14 | ## Perform the strand bias test. 15 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 16 | strand_bias <- strand_bias_test(strand_counts) 17 | 18 | ## Plot the strand bias. 19 | output <- plot_strand_bias(strand_bias) 20 | 21 | # Repeat for replication bias. 22 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds", 23 | package = "MutationalPatterns" 24 | )) 25 | strand_counts <- strand_occurrences(mut_mat_repli, by = tissue) 26 | strand_bias <- strand_bias_test(strand_counts) 27 | output_repli <- plot_strand_bias(strand_bias) 28 | 29 | ## Test with p instead of fdr 30 | output_pval <- plot_strand_bias(strand_bias, sig_type = "p") 31 | 32 | 33 | ## Use multiple (max 3) significance cutoffs. 34 | strand_bias_multistars <- strand_bias_test(strand_counts, 35 | p_cutoffs = c(0.05, 0.01, 0.005), 36 | fdr_cutoffs = c(0.1, 0.05, 0.01) 37 | ) 38 | output_multistars <- plot_strand_bias(strand_bias_multistars) 39 | 40 | 41 | test_that("Output has correct class", { 42 | expect_true(inherits(output, c("gg"))) 43 | expect_true(inherits(output_repli, c("gg"))) 44 | expect_true(inherits(output_pval, c("gg"))) 45 | expect_true(inherits(output_multistars, c("gg"))) 46 | }) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MutationalPatterns 2 | 3 | The MutationalPatterns R package provides a comprehensive set of flexible 4 | functions for easy finding and plotting of mutational patterns in mutation 5 | catalogues. Single nucleotide variants (SNVs), insertions and deletions 6 | (Indels), double base substitutions (DBSs) and larger multi base substitutions 7 | (MBSs) are supported. 8 | 9 | ## Citation 10 | 11 | Please give credit and cite the MutationalPatterns R Package when you use it for 12 | your data analysis. A paper for the newest version of the package is published in 13 | [BMC Genomics](https://doi.org/doi:10.1186/s12864-022-08357-3). 14 | The original MutationalPatterns paper is published in Genome Medicine 15 | [Blokzijl et al. 2018](https://doi.org/10.1186/s13073-018-0539-0). 16 | 17 | 18 | ## Dev version 19 | 20 | To use the current development version of the package, you should first clone it 21 | from GitHub. Next, you can load it using: 22 | ```r 23 | devtools::load_all(DIR) 24 | ``` 25 | 26 | A raw version of the new vignette can be found 27 | [here](../master/vignettes/Introduction_to_MutationalPatterns.Rmd) 28 | 29 | ## Bioconductor version 30 | 31 | Please use [Bioconductor](http://bioconductor.org/packages/MutationalPatterns/) 32 | to install the released version of this package. In [Introduction to MutationalPatterns](https://bioconductor.org/packages/release/bioc/vignettes/MutationalPatterns/inst/doc/Introduction_to_MutationalPatterns.html) you can find 33 | comprehensive examples and explanations for the functions this package 34 | provides. 35 | -------------------------------------------------------------------------------- /tests/testthat/test-type_context.R: -------------------------------------------------------------------------------- 1 | context("test-type_context") 2 | 3 | # Read vcfs 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | ## Load the corresponding reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | # Get type_context 13 | input <- vcfs[[1]] 14 | output <- type_context(input, ref_genome) 15 | output_longer <- type_context(input, ref_genome, extension = 2) 16 | 17 | 18 | # Unit tests 19 | test_that("Output has correct class", { 20 | expect_true(inherits(output, c("list"))) 21 | expect_true(inherits(output$types, c("character"))) 22 | expect_true(inherits(output$context, c("character"))) 23 | expect_true(inherits(output_longer, c("list"))) 24 | expect_true(inherits(output_longer$types, c("character"))) 25 | expect_true(inherits(output_longer$context, c("character"))) 26 | }) 27 | 28 | test_that("Output size is correct", { 29 | expect_equal(length(output$types), length(input)) 30 | expect_equal(length(output$context), length(input)) 31 | expect_equal(length(output_longer$types), length(input)) 32 | expect_equal(length(output_longer$context), length(input)) 33 | }) 34 | 35 | test_that("GRanges with 0 muts as input gives list with two empty vectors", { 36 | expect_warning({ 37 | output_empty <- type_context(input[0], ref_genome) 38 | }) 39 | expect_true(inherits(output_empty, "list")) 40 | expect_equal(length(output_empty$types), 0) 41 | expect_equal(length(output_empty$context), 0) 42 | }) 43 | -------------------------------------------------------------------------------- /tests/testthat/test-lengthen_mut_matrix.R: -------------------------------------------------------------------------------- 1 | context("test-lengthen_mut_matrix") 2 | 3 | # Read in mut_matrix 4 | input <- readRDS(system.file("states/mut_mat_splitregions.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Read in indel 9 | input_indel <- readRDS(system.file("states/blood_indels_counts_split_region.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | ## Lengthen the matrix 14 | 15 | # Run function 16 | output <- lengthen_mut_matrix(input) 17 | output_indel <- lengthen_mut_matrix(input_indel) 18 | 19 | 20 | 21 | test_that("Output has correct class", { 22 | expect_true(inherits(output, "matrix")) 23 | expect_true(inherits(output_indel, "matrix")) 24 | }) 25 | 26 | nr_regions <- input %>% 27 | colnames() %>% 28 | stringr::str_remove(".*\\.") %>% 29 | unique() %>% 30 | length() 31 | 32 | nr_regions_indel <- input_indel %>% 33 | colnames() %>% 34 | stringr::str_remove(".*\\.") %>% 35 | unique() %>% 36 | length() 37 | 38 | test_that("Output has correct size", { 39 | expect_equal(dim(output), c(nrow(input) * nr_regions, ncol(input) / nr_regions)) 40 | expect_equal(dim(output_indel), c(nrow(input_indel) * nr_regions_indel, ncol(input_indel) / nr_regions_indel)) 41 | }) 42 | 43 | expected <- readRDS(system.file("states/mut_mat_longregions.rds", 44 | package = "MutationalPatterns" 45 | )) 46 | 47 | expected_indel <- readRDS(system.file("states/blood_indels_longmatrix_split_region.rds", 48 | package = "MutationalPatterns" 49 | )) 50 | 51 | test_that("Output transforms correctly", { 52 | expect_equal(output, expected) 53 | expect_equal(output_indel, expected_indel) 54 | }) 55 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_contribution.R: -------------------------------------------------------------------------------- 1 | context("test-plot_contribution") 2 | 3 | 4 | # Load nmf data 5 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 6 | package = "MutationalPatterns" 7 | )) 8 | 9 | ## Plot the relative contribution 10 | output <- plot_contribution(nmf_res$contribution) 11 | 12 | ## Plot the absolute contribution. 13 | ## When plotting absolute NMF results, the signatures need to be included. 14 | output_absolute <- plot_contribution(nmf_res$contribution, 15 | nmf_res$signature, 16 | mode = "absolute" 17 | ) 18 | 19 | 20 | ## Only plot a subset of samples 21 | output_subset <- plot_contribution(nmf_res$contribution, 22 | nmf_res$signature, 23 | mode = "absolute", 24 | index = c(1, 2) 25 | ) 26 | ## Flip the coordinates 27 | output_flipcoord <- plot_contribution(nmf_res$contribution, 28 | nmf_res$signature, 29 | mode = "absolute", 30 | coord_flip = TRUE 31 | ) 32 | 33 | # Use signature refitting results 34 | fit_res <- readRDS(system.file("states/snv_refit.rds", 35 | package = "MutationalPatterns" 36 | )) 37 | 38 | output_sigfit <- plot_contribution(fit_res$contribution) 39 | 40 | ## refitting results in absolute mode 41 | output_sigfit_absolute <- plot_contribution(fit_res$contribution, 42 | mode = "absolute" 43 | ) 44 | 45 | test_that("Output has correct class", { 46 | expect_true(inherits(output, "gg")) 47 | expect_true(inherits(output_absolute, "gg")) 48 | expect_true(inherits(output_subset, "gg")) 49 | expect_true(inherits(output_flipcoord, "gg")) 50 | expect_true(inherits(output_sigfit, "gg")) 51 | expect_true(inherits(output_sigfit_absolute, "gg")) 52 | }) 53 | -------------------------------------------------------------------------------- /man/plot_main_dbs_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_main_dbs_contexts.R 3 | \name{plot_main_dbs_contexts} 4 | \alias{plot_main_dbs_contexts} 5 | \title{Plot the main DBS contexts} 6 | \usage{ 7 | plot_main_dbs_contexts(counts, same_y = FALSE) 8 | } 9 | \arguments{ 10 | \item{counts}{A tibble containing the number of DBS per COSMIC context.} 11 | 12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.} 13 | } 14 | \value{ 15 | A ggplot figure. 16 | } 17 | \description{ 18 | Plot the main DBS contexts 19 | } 20 | \details{ 21 | Plots the number of DBS per main COSMIC context per sample. 22 | The contexts are only divided by REF and not by ALT. 23 | It takes a tibble with counts as its input. This tibble can be generated by count_dbs_contexts 24 | Each sample is plotted in a separate facet. 25 | The same y axis can be used for all samples or a separate y axis can be used. 26 | } 27 | \examples{ 28 | ## Get The DBS counts 29 | ## See 'count_dbs_contexts()' for more info on how to do this. 30 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 31 | package = "MutationalPatterns" 32 | )) 33 | 34 | ## Plot contexts 35 | plot_main_dbs_contexts(dbs_counts) 36 | 37 | ## Use the same y axis for all samples. 38 | plot_main_dbs_contexts(dbs_counts, same_y = TRUE) 39 | } 40 | \seealso{ 41 | \code{\link{count_dbs_contexts}}, \code{\link{plot_dbs_contexts}} 42 | 43 | Other DBS: 44 | \code{\link{count_dbs_contexts}()}, 45 | \code{\link{get_dbs_context}()}, 46 | \code{\link{plot_compare_dbs}()}, 47 | \code{\link{plot_dbs_contexts}()} 48 | } 49 | \concept{DBS} 50 | -------------------------------------------------------------------------------- /man/region_cossim-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S4_class.R 3 | \docType{class} 4 | \name{region_cossim-class} 5 | \alias{region_cossim-class} 6 | \title{An S4 class to store the results of a regional mutation pattern similarity 7 | analysis} 8 | \description{ 9 | An S4 class to store the results of a regional mutation pattern similarity 10 | analysis 11 | } 12 | \section{Slots}{ 13 | 14 | \describe{ 15 | \item{\code{sim_tb}}{A tibble containing the calculated similarities of the windows.} 16 | 17 | \item{\code{pos_tb}}{A tibble containing the mutation positions.} 18 | 19 | \item{\code{chr_lengths}}{Vector containing the chromosome lengths.} 20 | 21 | \item{\code{window_size}}{The number of mutations in a window.} 22 | 23 | \item{\code{max_window_size_gen}}{The maximum size of a window before it is removed.} 24 | 25 | \item{\code{ref_genome}}{BSgenome reference genome object} 26 | 27 | \item{\code{muts_per_chr}}{Vector containing the number of mutations per chromosome.} 28 | 29 | \item{\code{mean_window_size}}{The mean length of the genome covered by the windows.} 30 | 31 | \item{\code{stepsize}}{The number of mutations that a window slides in each step.} 32 | 33 | \item{\code{extension}}{The number of bases, that's extracted upstream and 34 | downstream of the base substitutions, to create the mutation matrices.} 35 | 36 | \item{\code{chromosomes}}{Vector of chromosome/contig names of the reference genome 37 | to be plotted.} 38 | 39 | \item{\code{exclude_self_mut_mat}}{Boolean describing whether the mutations in a 40 | window should be subtracted from the global mutation matrix.} 41 | }} 42 | 43 | -------------------------------------------------------------------------------- /R/mut_context.R: -------------------------------------------------------------------------------- 1 | #' Retrieve context of base substitutions 2 | #' 3 | #' A function to extract the bases 3' upstream and 5' downstream of the base 4 | #' substitutions from the reference genome. The user an choose how many bases 5 | #' are extracted. 6 | #' 7 | #' @param vcf A Granges object 8 | #' @param ref_genome Reference genome 9 | #' @param extension The number of bases, that's extracted upstream and 10 | #' downstream of the base substitutions. (Default: 1). 11 | #' @return Character vector with the context of the base substitutions 12 | #' 13 | #' @examples 14 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the 15 | #' ## following data: 16 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 17 | #' package = "MutationalPatterns" 18 | #' )) 19 | #' 20 | #' ## Load the corresponding reference genome. 21 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 22 | #' library(ref_genome, character.only = TRUE) 23 | #' 24 | #' ## Get the standard context 25 | #' mut_context <- mut_context(vcfs[[1]], ref_genome) 26 | #' 27 | #' ## Get larger context 28 | #' mut_context_larger <- mut_context(vcfs[[1]], ref_genome, extension = 2) 29 | #' @seealso 30 | #' \code{\link{read_vcfs_as_granges}}, 31 | #' 32 | #' @export 33 | 34 | mut_context <- function(vcf, ref_genome, extension = 1) { 35 | # Check that the seqnames of the gr and ref_genome match 36 | .check_chroms(vcf, ref_genome) 37 | 38 | # Get context of mutation. 39 | vcf_context <- as.character(Biostrings::getSeq( 40 | BSgenome::getBSgenome(ref_genome), 41 | seqnames(vcf), 42 | start(vcf) - extension, 43 | end(vcf) + extension 44 | )) 45 | return(vcf_context) 46 | } 47 | -------------------------------------------------------------------------------- /man/merge_signatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/merge_signatures.R 3 | \name{merge_signatures} 4 | \alias{merge_signatures} 5 | \title{Merge signatures based on cosine similarity} 6 | \usage{ 7 | merge_signatures( 8 | signatures, 9 | cos_sim_cutoff = 0.8, 10 | merge_char = ";", 11 | verbose = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{signatures}{Signature matrix (dimensions: x mutation types 16 | X n signatures)} 17 | 18 | \item{cos_sim_cutoff}{Cutoff for cosine similarity. Signatures are merged when their 19 | cosine similarity is higher than the limit. Default: 0.8} 20 | 21 | \item{merge_char}{Character used to merge the signature names. This character shouldn't 22 | be in the signature names beforehand. Default: ";"} 23 | 24 | \item{verbose}{Verbosity. If TRUE it shows which signatures got merged. Default: TRUE} 25 | } 26 | \value{ 27 | Signature matrix (dimensions: x mutation types 28 | X n signatures) 29 | } 30 | \description{ 31 | This function merges signatures based on their cosine similarity. 32 | It iteratively merges the two signatures with the highest cosine similarity. 33 | Merging is stopped when the maximum cosine similarity is lower than the limit. 34 | } 35 | \examples{ 36 | 37 | ## Get signatures 38 | signatures <- get_known_signatures() 39 | 40 | ## Merge signatures 41 | merge_signatures(signatures) 42 | 43 | 44 | ## Merge signatures using a stricter cutoff 45 | merge_signatures(signatures, cos_sim_cutoff = 0.9) 46 | 47 | ## Merge signatures using a different merging character 48 | merge_signatures(signatures, merge_char = "_") 49 | 50 | ## Merge signatures silently 51 | merge_signatures(signatures, verbose = FALSE) 52 | } 53 | -------------------------------------------------------------------------------- /man/plot_96_profile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_96_profile.R 3 | \name{plot_96_profile} 4 | \alias{plot_96_profile} 5 | \title{Plot 96 trinucleotide profile} 6 | \usage{ 7 | plot_96_profile(mut_matrix, colors = NA, ymax = 0.2, condensed = FALSE) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{96 trinucleotide profile matrix} 11 | 12 | \item{colors}{Optional 6 value color vector.} 13 | 14 | \item{ymax}{Y axis maximum value, default = 0.2} 15 | 16 | \item{condensed}{More condensed plotting format. Default = F.} 17 | } 18 | \value{ 19 | 96 trinucleotide profile plot 20 | } 21 | \description{ 22 | Plot relative contribution of 96 trinucleotides 23 | } 24 | \examples{ 25 | ## See the 'mut_matrix()' example for how we obtained the 26 | ## mutation matrix information: 27 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Plot the 96-profile of three samples 32 | plot_96_profile(mut_mat[, c(1, 4, 7)]) 33 | 34 | ## Plot a condensed profile 35 | plot_96_profile(mut_mat[, c(1, 4, 7)], condensed = TRUE) 36 | 37 | ## It's also possible to plot signatures, for example signatures 38 | ## generated with NMF 39 | ## See 'extract_signatures()' on how we obtained these signatures. 40 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 41 | package = "MutationalPatterns" 42 | )) 43 | 44 | ## Optionally, provide signature names 45 | colnames(nmf_res$signatures) <- c("Signature A", "Signature B") 46 | 47 | ## Generate the plot 48 | plot_96_profile(nmf_res$signatures) 49 | } 50 | \seealso{ 51 | \code{\link{mut_matrix}}, 52 | \code{\link{plot_profile_heatmap}}, 53 | \code{\link{plot_river}} 54 | } 55 | -------------------------------------------------------------------------------- /man/plot_strand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_strand.R 3 | \name{plot_strand} 4 | \alias{plot_strand} 5 | \title{Plot strand per base substitution type} 6 | \usage{ 7 | plot_strand(strand_bias_df, mode = c("relative", "absolute"), colors = NA) 8 | } 9 | \arguments{ 10 | \item{strand_bias_df}{data.frame, result from strand_bias function} 11 | 12 | \item{mode}{Either "absolute" for absolute number of mutations, or 13 | "relative" for relative contribution, default = "relative"} 14 | 15 | \item{colors}{Optional color vector for plotting with 6 values} 16 | } 17 | \value{ 18 | Barplot 19 | } 20 | \description{ 21 | For each base substitution type and transcriptional strand the total number 22 | of mutations and the relative contribution within a group is returned. 23 | } 24 | \examples{ 25 | ## See the 'mut_matrix_stranded()' example for how we obtained the 26 | ## following mutation matrix. 27 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Load a reference genome. 32 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 33 | library(ref_genome, character.only = TRUE) 34 | 35 | tissue <- c( 36 | "colon", "colon", "colon", 37 | "intestine", "intestine", "intestine", 38 | "liver", "liver", "liver" 39 | ) 40 | 41 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 42 | 43 | ## Plot the strand in relative mode. 44 | strand_plot <- plot_strand(strand_counts) 45 | 46 | ## Or absolute mode. 47 | strand_plot <- plot_strand(strand_counts, mode = "absolute") 48 | } 49 | \seealso{ 50 | \code{\link{mut_matrix_stranded}}, 51 | \code{\link{strand_occurrences}}, 52 | \code{\link{plot_strand_bias}} 53 | } 54 | -------------------------------------------------------------------------------- /man/plot_main_indel_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_main_indel_contexts.R 3 | \name{plot_main_indel_contexts} 4 | \alias{plot_main_indel_contexts} 5 | \title{Plot the main indel contexts} 6 | \usage{ 7 | plot_main_indel_contexts(counts, same_y = FALSE) 8 | } 9 | \arguments{ 10 | \item{counts}{A tibble containing the number of indels per COSMIC context.} 11 | 12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.} 13 | } 14 | \value{ 15 | A ggplot figure. 16 | } 17 | \description{ 18 | Plot the main indel contexts 19 | } 20 | \details{ 21 | Plots the number of indels per main COSMIC context per sample. 22 | The contexts are not subdivided into the number of repeats/microhomology length. 23 | It takes a tibble with counts as its input. This tibble can be generated by count_indel_contexts 24 | Each sample is plotted in a separate facet. 25 | The same y axis can be used for all samples or a separate y axis can be used. 26 | } 27 | \examples{ 28 | ## Get The indel counts 29 | ## See 'count_indel_contexts()' for more info on how to do this. 30 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 31 | package = "MutationalPatterns" 32 | )) 33 | 34 | ## Plot contexts 35 | plot_main_indel_contexts(indel_counts) 36 | 37 | ## Use the same y axis for all samples. 38 | plot_main_indel_contexts(indel_counts, same_y = TRUE) 39 | } 40 | \seealso{ 41 | \code{\link{count_indel_contexts}}, \code{\link{plot_indel_contexts}} 42 | 43 | Other Indels: 44 | \code{\link{count_indel_contexts}()}, 45 | \code{\link{get_indel_context}()}, 46 | \code{\link{plot_compare_indels}()}, 47 | \code{\link{plot_indel_contexts}()} 48 | } 49 | \concept{Indels} 50 | -------------------------------------------------------------------------------- /man/plot_dbs_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_dbs_contexts.R 3 | \name{plot_dbs_contexts} 4 | \alias{plot_dbs_contexts} 5 | \title{Plot the DBS contexts} 6 | \usage{ 7 | plot_dbs_contexts(counts, same_y = FALSE, condensed = FALSE) 8 | } 9 | \arguments{ 10 | \item{counts}{A tibble containing the number of DBS per COSMIC context.} 11 | 12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.} 13 | 14 | \item{condensed}{More condensed plotting format. Default = F.} 15 | } 16 | \value{ 17 | A ggplot figure. 18 | } 19 | \description{ 20 | Plot the DBS contexts 21 | } 22 | \details{ 23 | Plots the number of DBS COSMIC context per sample. 24 | It takes a tibble with counts as its input. This tibble can be generated by count_dbs_contexts 25 | Each sample is plotted in a separate facet. 26 | The same y axis can be used for all samples or a separate y axis can be used. 27 | } 28 | \examples{ 29 | ## Get The DBS counts 30 | ## See 'count_dbs_contexts()' for more info on how to do this. 31 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 32 | package = "MutationalPatterns" 33 | )) 34 | 35 | ## Plot contexts 36 | plot_dbs_contexts(dbs_counts) 37 | 38 | ## Use the same y axis for all samples. 39 | plot_dbs_contexts(dbs_counts, same_y = TRUE) 40 | 41 | ## Create a more condensed plot 42 | plot_dbs_contexts(dbs_counts, condensed = TRUE) 43 | } 44 | \seealso{ 45 | \code{\link{count_dbs_contexts}}, \code{\link{plot_main_dbs_contexts}} 46 | 47 | Other DBS: 48 | \code{\link{count_dbs_contexts}()}, 49 | \code{\link{get_dbs_context}()}, 50 | \code{\link{plot_compare_dbs}()}, 51 | \code{\link{plot_main_dbs_contexts}()} 52 | } 53 | \concept{DBS} 54 | -------------------------------------------------------------------------------- /man/get_mut_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_mut_type.R 3 | \name{get_mut_type} 4 | \alias{get_mut_type} 5 | \title{Get variants with mut_type from GRanges} 6 | \usage{ 7 | get_mut_type( 8 | vcf_list, 9 | type = c("snv", "indel", "dbs", "mbs"), 10 | predefined_dbs_mbs = FALSE 11 | ) 12 | } 13 | \arguments{ 14 | \item{vcf_list}{GRanges/GRangesList} 15 | 16 | \item{type}{The type of variant that will be returned.} 17 | 18 | \item{predefined_dbs_mbs}{Boolean. Whether dbs and mbs variants have been 19 | predefined in your vcf. This function by default assumes that dbs and mbs 20 | variants are present in the vcf as snvs, which are positioned next to each 21 | other. If your dbs/mbs variants are called separately you should set this 22 | argument to TRUE. (default = FALSE)} 23 | } 24 | \value{ 25 | GRanges/GRangesList of the desired mutation type. 26 | } 27 | \description{ 28 | Get the variants of a certain mutation type from a GRanges or GRangesList object. 29 | All other variants will be filtered out. 30 | It is assumed that DBS/MBSs are called as separate SNVs. 31 | They are merged into single variants. 32 | The type of variant can be chosen with type. 33 | } 34 | \examples{ 35 | ## Get a GRanges list object. 36 | ## See 'read_vcfs_as_granges' for more info how to do this. 37 | grl <- readRDS(system.file("states/blood_grl.rds", 38 | package = "MutationalPatterns" 39 | )) 40 | 41 | ## Here we only use two samples to reduce runtime 42 | grl <- grl[1:2] 43 | 44 | ## Get a specific mutation type. 45 | snv_grl <- get_mut_type(grl, "snv") 46 | indel_grl <- get_mut_type(grl, "indel") 47 | dbs_grl <- get_mut_type(grl, "dbs") 48 | mbs_grl <- get_mut_type(grl, "mbs") 49 | } 50 | \seealso{ 51 | \code{\link{read_vcfs_as_granges}} 52 | } 53 | -------------------------------------------------------------------------------- /tests/testthat/test-mut_matrix.R: -------------------------------------------------------------------------------- 1 | context("test-mut_matrix") 2 | 3 | # To test mut_matrix, we need to load the reference genome first. 4 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 5 | library(ref_genome, character.only = TRUE) 6 | 7 | # We re-use the data that is shipped with the package. 8 | input <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 9 | package = "MutationalPatterns" 10 | )) 11 | 12 | # Expected output 13 | expected <- readRDS(system.file("states/mut_mat_data.rds", 14 | package = "MutationalPatterns" 15 | )) 16 | 17 | # Run function 18 | output <- mut_matrix(input, ref_genome) 19 | output_longer <- mut_matrix(vcf_list = input, ref_genome = ref_genome, extension = 2) 20 | 21 | 22 | # Perform tests 23 | 24 | test_that("Output has correct class", { 25 | expect_true(inherits(output, "matrix")) 26 | expect_true(inherits(output_longer, "matrix")) 27 | }) 28 | 29 | test_that("Output has correct dimensions", { 30 | expect_equal(dim(output), c(96, 9)) 31 | expect_equal(dim(output_longer), c(1536, 9)) 32 | }) 33 | 34 | test_that("Number of variants in output is correct", { 35 | expect_equal(colSums(output), elementNROWS(input)) 36 | expect_equal(colSums(output_longer), elementNROWS(input)) 37 | }) 38 | 39 | test_that("transforms correctly", { 40 | expect_equal(output, expected) 41 | }) 42 | 43 | test_that("a list is also acceptable input", { 44 | output_list <- mut_matrix(as.list(input), ref_genome) 45 | 46 | expect_equal(output_list, output) 47 | expect_equal(output_list, expected) 48 | }) 49 | 50 | test_that("A single GR can also be used as input", { 51 | output_singlesample <- mut_matrix(input[[1]], ref_genome) 52 | expect_true(inherits(output_singlesample, "matrix")) 53 | expect_equal(dim(output_singlesample), c(96, 1)) 54 | }) 55 | -------------------------------------------------------------------------------- /tests/testthat/test-fit_to_signatures.R: -------------------------------------------------------------------------------- 1 | context("test-fit_to_signatures") 2 | 3 | # Get mut_mat 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get signatures 9 | signatures <- get_known_signatures() 10 | 11 | # Run function 12 | output <- fit_to_signatures(mut_mat, signatures) 13 | 14 | # Get expected 15 | expected <- readRDS(system.file("states/snv_refit.rds", 16 | package = "MutationalPatterns" 17 | )) 18 | 19 | # Run tests 20 | test_that("Output has correct class", { 21 | expect_true(inherits(output, "list")) 22 | expect_true(inherits(output$contribution, "matrix")) 23 | expect_true(inherits(output$reconstructed, "matrix")) 24 | }) 25 | 26 | test_that("Output is equal to expected", { 27 | expect_equal(output, expected) 28 | }) 29 | 30 | # Get indel mut_mat 31 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", package = "MutationalPatterns")) 32 | 33 | # Get indel signatures 34 | signatures <- get_known_signatures("indel") 35 | 36 | # Get expected 37 | expected <- readRDS(system.file("states/indel_refit.rds", 38 | package = "MutationalPatterns" 39 | )) 40 | 41 | # Run tests 42 | test_that("Refitting indels gives expected output.", { 43 | output <- fit_to_signatures(indel_counts, signatures) 44 | expect_equal(output, expected) 45 | }) 46 | 47 | # Get dbs mut_mat 48 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", package = "MutationalPatterns")) 49 | 50 | signatures <- get_known_signatures("dbs") 51 | 52 | 53 | expected <- readRDS(system.file("states/dbs_refit.rds", 54 | package = "MutationalPatterns" 55 | )) 56 | 57 | test_that("Refitting dbss gives expected output.", { 58 | output <- fit_to_signatures(dbs_counts, signatures) 59 | expect_equal(output, expected) 60 | }) 61 | -------------------------------------------------------------------------------- /man/fit_to_signatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_to_signatures.R 3 | \name{fit_to_signatures} 4 | \alias{fit_to_signatures} 5 | \title{Find optimal nonnegative linear combination of mutation signatures to 6 | reconstruct the mutation matrix.} 7 | \usage{ 8 | fit_to_signatures(mut_matrix, signatures) 9 | } 10 | \arguments{ 11 | \item{mut_matrix}{mutation count matrix (dimensions: x mutation types 12 | X n samples)} 13 | 14 | \item{signatures}{Signature matrix (dimensions: x mutation types 15 | X n signatures)} 16 | } 17 | \value{ 18 | Named list with signature contributions and reconstructed 19 | mutation matrix 20 | } 21 | \description{ 22 | Find the linear combination of mutation signatures that most closely 23 | reconstructs the mutation matrix by solving the nonnegative least-squares 24 | constraints problem. 25 | } 26 | \examples{ 27 | 28 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 29 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 30 | package = "MutationalPatterns" 31 | )) 32 | 33 | ## Get signatures 34 | signatures <- get_known_signatures() 35 | 36 | ## Perform the fitting 37 | fit_res <- fit_to_signatures(mut_mat, signatures) 38 | 39 | ## This will also work for indels and dbs. 40 | ## An example is given for indels 41 | 42 | ## Get The indel counts 43 | ## See 'count_indel_contexts()' for more info on how to do this. 44 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 45 | package = "MutationalPatterns" 46 | )) 47 | 48 | ## Get signatures 49 | signatures <- get_known_signatures("indel") 50 | 51 | fit_to_signatures(indel_counts, signatures) 52 | } 53 | \seealso{ 54 | \code{\link{mut_matrix}},\code{\link{fit_to_signatures_strict}},\code{\link{fit_to_signatures_bootstrapped}} 55 | } 56 | -------------------------------------------------------------------------------- /man/plot_192_profile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_192_profile.R 3 | \name{plot_192_profile} 4 | \alias{plot_192_profile} 5 | \title{Plot 192 trinucleotide profile} 6 | \usage{ 7 | plot_192_profile(mut_matrix, colors = NA, ymax = 0.2, condensed = FALSE) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{192 trinucleotide profile matrix} 11 | 12 | \item{colors}{6 value color vector} 13 | 14 | \item{ymax}{Y axis maximum value, default = 0.2} 15 | 16 | \item{condensed}{More condensed plotting format. Default = F.} 17 | } 18 | \value{ 19 | 192 trinucleotide profile plot 20 | } 21 | \description{ 22 | Plot relative contribution of 192 trinucleotides 23 | } 24 | \examples{ 25 | ## See the 'mut_matrix_stranded()' example for how we obtained the 26 | ## mutation matrix with transcriptional strand information: 27 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 28 | package = "MutationalPatterns" 29 | )) 30 | 31 | ## Plot profile for some of the samples 32 | plot_192_profile(mut_mat_s[, c(1, 4, 7)]) 33 | 34 | ## You can create a more condensed version of the plot 35 | plot_192_profile(mut_mat_s[, c(1, 4, 7)], condensed = TRUE) 36 | 37 | ## It's also possible to plot signatures, for example signatures 38 | ## generated with NMF 39 | ## See 'extract_signatures()' on how we obtained these signatures. 40 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds", 41 | package = "MutationalPatterns" 42 | )) 43 | 44 | ## Optionally, provide signature names 45 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B") 46 | 47 | ## Generate the plot 48 | plot_192_profile(nmf_res_strand$signatures) 49 | } 50 | \seealso{ 51 | \code{\link{mut_matrix_stranded}}, 52 | \code{\link{extract_signatures}}, 53 | \code{\link{plot_96_profile}} 54 | } 55 | -------------------------------------------------------------------------------- /man/plot_strand_bias.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_strand_bias.R 3 | \name{plot_strand_bias} 4 | \alias{plot_strand_bias} 5 | \title{Plot strand bias per base substitution type per group} 6 | \usage{ 7 | plot_strand_bias(strand_bias, colors = NA, sig_type = c("fdr", "p")) 8 | } 9 | \arguments{ 10 | \item{strand_bias}{data.frame, result from strand_bias function} 11 | 12 | \item{colors}{Optional color vector with 6 values for plotting} 13 | 14 | \item{sig_type}{The type of significance to be used. Possible values: 15 | * 'fdr' False discovery rate. 16 | A type of multiple testing correction.; 17 | * 'p' for regular p values.} 18 | } 19 | \value{ 20 | Barplot 21 | } 22 | \description{ 23 | Plot strand bias per base substitution type per group 24 | } 25 | \examples{ 26 | ## See the 'mut_matrix_stranded()' example for how we obtained the 27 | ## following mutation matrix. 28 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | 32 | 33 | tissue <- c( 34 | "colon", "colon", "colon", 35 | "intestine", "intestine", "intestine", 36 | "liver", "liver", "liver" 37 | ) 38 | 39 | ## Perform the strand bias test. 40 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 41 | strand_bias <- strand_bias_test(strand_counts) 42 | 43 | ## Plot the strand bias. 44 | plot_strand_bias(strand_bias) 45 | 46 | ## Use multiple (max 3) significance cutoffs. 47 | ## This will vary the number of significance stars. 48 | strand_bias_multistars <- strand_bias_test(strand_counts, 49 | p_cutoffs = c(0.05, 0.01, 0.005), 50 | fdr_cutoffs = c(0.1, 0.05, 0.01) 51 | ) 52 | plot_strand_bias(strand_bias_multistars) 53 | } 54 | \seealso{ 55 | \code{\link{mut_matrix_stranded}}, 56 | \code{\link{strand_occurrences}}, 57 | \code{\link{strand_bias_test}} 58 | \code{\link{plot_strand}} 59 | } 60 | -------------------------------------------------------------------------------- /tests/testthat/test-mutations_from_vcf.R: -------------------------------------------------------------------------------- 1 | context("test-mutations_from_vcf") 2 | 3 | # Read vcfs 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | vcf <- vcfs[[1]] 8 | 9 | # Run function 10 | output <- mutations_from_vcf(vcf) 11 | 12 | # Check it works on empty input 13 | output_empty <- mutations_from_vcf(vcf[0]) 14 | 15 | # Check it works on lowercase input 16 | vcf_lowercase <- vcf 17 | colnames(mcols(vcf_lowercase)) <- c("paramRangeID", "ref", "alt", "QUAL", "FILTER") 18 | output_lowercase <- mutations_from_vcf(vcf_lowercase) 19 | 20 | # Check it gives a warning on data with no ref or alt 21 | vcf_noref <- vcf 22 | colnames(mcols(vcf_noref)) <- c("paramRangeID", "a", "ALT", "QUAL", "FILTER") 23 | vcf_noalt <- vcf 24 | colnames(mcols(vcf_noalt)) <- c("paramRangeID", "REF", "a", "QUAL", "FILTER") 25 | 26 | # Unit tests 27 | test_that("Output has correct class", { 28 | expect_true(inherits(output, c("character"))) 29 | expect_true(inherits(output_empty, c("character"))) 30 | }) 31 | 32 | test_that("The 12 substitution types are returned", { 33 | types <- sort(unique(output)) 34 | expect_equal(types, c( 35 | "A>C", "A>G", "A>T", "C>A", "C>G", "C>T", 36 | "G>A", "G>C", "G>T", "T>A", "T>C", "T>G" 37 | )) 38 | }) 39 | 40 | test_that("GRanges with 0 muts as input gives empty output", { 41 | expect_equal(length(output_empty), 0) 42 | }) 43 | 44 | test_that("Input with lowercase doesn't change result", { 45 | expect_equal(output, output_lowercase) 46 | }) 47 | 48 | test_that("GR with no REF or ALT gives an error.", { 49 | expect_error( 50 | { 51 | output_noref <- mutations_from_vcf(vcf_noref) 52 | }, 53 | "missing a REF column" 54 | ) 55 | expect_error( 56 | { 57 | output_noalt <- mutations_from_vcf(vcf_noalt) 58 | }, 59 | "missing a ALT column" 60 | ) 61 | }) 62 | -------------------------------------------------------------------------------- /man/get_indel_context.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_indel_context.R 3 | \name{get_indel_context} 4 | \alias{get_indel_context} 5 | \title{Get indel contexts} 6 | \usage{ 7 | get_indel_context(vcf_list, ref_genome) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRanges or GRangesList object containing Indel mutations. 11 | The mutations should be called similarly to HaplotypeCaller.} 12 | 13 | \item{ref_genome}{BSgenome reference genome object} 14 | } 15 | \value{ 16 | A modified version of the input grl. In each gr two columns have been added. 17 | "muttype" showing the main indel type and "muttype_sub" which shows the subtype. 18 | The subtype is either the number of repeats or the microhomology length. 19 | } 20 | \description{ 21 | Get indel contexts 22 | } 23 | \details{ 24 | Determines the COSMIC context from a GRanges or GRangesList object containing Indel mutations. 25 | It applies the get_indel_context_gr function to each gr in the input. 26 | It searches for repeat units both to the left and right of the indel. 27 | } 28 | \examples{ 29 | 30 | ## Get a GRangesList or GRanges object with only indels. 31 | ## See 'read_vcfs_as_granges' or 'get_mut_type' for more info on how to do this. 32 | indel_grl <- readRDS(system.file("states/blood_grl_indel.rds", 33 | package = "MutationalPatterns" 34 | )) 35 | 36 | ## Load the corresponding reference genome. 37 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 38 | library(ref_genome, character.only = TRUE) 39 | 40 | ## Get the indel contexts 41 | get_indel_context(indel_grl, ref_genome) 42 | } 43 | \seealso{ 44 | \code{\link{read_vcfs_as_granges}}, \code{\link{get_mut_type}} 45 | 46 | Other Indels: 47 | \code{\link{count_indel_contexts}()}, 48 | \code{\link{plot_compare_indels}()}, 49 | \code{\link{plot_indel_contexts}()}, 50 | \code{\link{plot_main_indel_contexts}()} 51 | } 52 | \concept{Indels} 53 | -------------------------------------------------------------------------------- /tests/testthat/test-get_mut_type.R: -------------------------------------------------------------------------------- 1 | context("test-get_mut_type") 2 | 3 | # Get a grl with variants. 4 | grl <- readRDS(system.file("states/blood_grl.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Only use two samples to reduce runtime 9 | grl <- grl[1:2] 10 | 11 | ## Get a specific mutation type. 12 | snv_grl <- get_mut_type(grl, "snv") 13 | indel_grl <- get_mut_type(grl, "indel") 14 | dbs_grl <- get_mut_type(grl, "dbs") 15 | mbs_grl <- get_mut_type(grl, "mbs") 16 | gr_singlesample <- get_mut_type(grl[[1]], type = "dbs") 17 | empty_gr <- get_mut_type(grl[[1]][0], type = "dbs") 18 | gr_nodbs <- get_mut_type(grl[[1]][1:20], type = "dbs") 19 | 20 | # Change names of indel_grl, to make them prettier. 21 | remove_names_gr <- function(gr) { 22 | names(gr) <- seq_along(gr) 23 | return(gr) 24 | } 25 | indel_grl <- purrr::map(as.list(indel_grl), remove_names_gr) %>% 26 | GRangesList() 27 | 28 | expected_indel_grl <- readRDS(system.file("states/blood_grl_indel.rds", 29 | package = "MutationalPatterns" 30 | ))[1:2] 31 | 32 | 33 | test_that("Output has correct class", { 34 | expect_true(inherits(snv_grl, c("GRanges", "CompressedGRangesList"))) 35 | expect_true(inherits(indel_grl, c("GRanges", "CompressedGRangesList"))) 36 | expect_true(inherits(dbs_grl, c("GRanges", "CompressedGRangesList"))) 37 | expect_true(inherits(mbs_grl, c("GRanges", "CompressedGRangesList"))) 38 | expect_true(inherits(gr_singlesample, c("GRanges"))) 39 | expect_true(inherits(empty_gr, c("GRanges"))) 40 | expect_true(inherits(gr_nodbs, c("GRanges"))) 41 | }) 42 | 43 | test_that("Output is equal to expected", { 44 | expect_equal(indel_grl, expected_indel_grl) 45 | }) 46 | 47 | test_that("Empty gr is returned when a mut type is not present", { 48 | expect_equal(length(empty_gr), 0) 49 | }) 50 | 51 | test_that("Empty gr as input results in a empty output gr", { 52 | expect_equal(length(gr_nodbs), 0) 53 | }) 54 | -------------------------------------------------------------------------------- /man/plot_bootstrapped_contribution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_bootstrapped_contribution.R 3 | \name{plot_bootstrapped_contribution} 4 | \alias{plot_bootstrapped_contribution} 5 | \title{Plot the bootstrapped signature contributions} 6 | \usage{ 7 | plot_bootstrapped_contribution( 8 | contri_boots, 9 | mode = c("absolute", "relative"), 10 | plot_type = c("jitter", "barplot", "dotplot") 11 | ) 12 | } 13 | \arguments{ 14 | \item{contri_boots}{matrix showing signature contributions across bootstrap iterations.} 15 | 16 | \item{mode}{Either "absolute" for absolute number of mutations, or 17 | "relative" for relative contribution, default = "absolute"} 18 | 19 | \item{plot_type}{Either "jitter" for a jitter plot, "barplot" for a barplot, or "dotplot" for a dotplot} 20 | } 21 | \value{ 22 | A ggplot2 graph 23 | } 24 | \description{ 25 | Plot the signature contributions retrieved with 'fit_to_signatures_bootstrapped'. 26 | The function can plot both the absolute or the relative signature contribution. 27 | The graph can be plotted as either a jitter plot or as a barplot. 28 | } 29 | \examples{ 30 | ## Get the bootstrapped signature contributions 31 | ## See 'count_indel_contexts()' for more info on how to do this. 32 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds", 33 | package = "MutationalPatterns" 34 | )) 35 | 36 | ## Plot bootstrapped contribution 37 | plot_bootstrapped_contribution(contri_boots) 38 | 39 | ## Plot bootstrapped contribution with relative contributions 40 | plot_bootstrapped_contribution(contri_boots, mode = "relative") 41 | 42 | ## Plot bootstrapped contribution with a barplot 43 | plot_bootstrapped_contribution(contri_boots, plot_type = "barplot") 44 | 45 | ## Plot bootstrapped contribution with a dotplot 46 | plot_bootstrapped_contribution(contri_boots, plot_type = "dotplot", mode = "absolute") 47 | } 48 | -------------------------------------------------------------------------------- /man/plot_profile_region.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_profile_region.R 3 | \name{plot_profile_region} 4 | \alias{plot_profile_region} 5 | \title{Plot 96 trinucleotide profile per subgroup} 6 | \usage{ 7 | plot_profile_region( 8 | mut_matrix, 9 | mode = c("relative_sample", "relative_sample_feature", "absolute"), 10 | colors = NULL, 11 | ymax = 0.2, 12 | condensed = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{mut_matrix}{Mutation matrix} 17 | 18 | \item{mode}{'relative_sample', 'relative_sample_feature' or 'absolute' 19 | When 'relative_sample', the number of variants will be shown 20 | divided by the total number of variants in that sample. 21 | When 'relative_sample_feature', the number of variants will be shown 22 | divided by the total number of variants in that sample. and genomic region.} 23 | 24 | \item{colors}{6 value color vector} 25 | 26 | \item{ymax}{Y axis maximum value, default = 0.2} 27 | 28 | \item{condensed}{More condensed plotting format. Default = FALSE.} 29 | } 30 | \value{ 31 | 96 trinucleotide profile plot per region 32 | } 33 | \description{ 34 | Plot relative contribution of 96 trinucleotides per subgroup. 35 | This can be genomic regions but could also be other subsets. 36 | The function uses a matrix generated by 'lengthen_mut_matrix()' 37 | as its input. 38 | } 39 | \examples{ 40 | ## See the 'lengthen_mut_matrix()' example for how we obtained the 41 | ## mutation matrix information: 42 | mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds", 43 | package = "MutationalPatterns" 44 | )) 45 | 46 | ## Plot the 96-profile of three samples 47 | plot_profile_region(mut_mat_long[, c(1, 4, 7)]) 48 | } 49 | \seealso{ 50 | \code{\link{mut_matrix}} 51 | 52 | Other genomic_regions: 53 | \code{\link{bin_mutation_density}()}, 54 | \code{\link{lengthen_mut_matrix}()}, 55 | \code{\link{plot_spectrum_region}()}, 56 | \code{\link{split_muts_region}()} 57 | } 58 | \concept{genomic_regions} 59 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_cosine_heatmap.R: -------------------------------------------------------------------------------- 1 | context("test-plot_cosine_heatmap") 2 | 3 | # Get mut_mat 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get signatures 9 | signatures <- get_known_signatures() 10 | 11 | 12 | # Calculate the cosine similarity between each signature and each 96 mutational profile 13 | cos_matrix <- cos_sim_matrix(mut_mat, signatures) 14 | 15 | # Plot the cosine similarity between each signature and each sample with hierarchical 16 | # clustering of samples and signatures. 17 | output <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, method = "complete") 18 | 19 | # In the above example, clustering is performed on the similarities of the samples with 20 | # the signatures. It's also possible to cluster the signatures and samples on their (96) profile. 21 | hclust_cosmic <- cluster_signatures(signatures, method = "average") 22 | cosmic_order <- colnames(signatures)[hclust_cosmic$order] 23 | hclust_samples <- cluster_signatures(mut_mat, method = "average") 24 | sample_order <- colnames(mut_mat)[hclust_samples$order] 25 | # Plot the cosine heatmap using this given signature order. 26 | output_supplied_order <- plot_cosine_heatmap(cos_matrix, 27 | cluster_rows = FALSE, cluster_cols = FALSE, row_order = sample_order, 28 | col_order = cosmic_order, method = "complete" 29 | ) 30 | 31 | # You can also plot the similarity of samples with eachother 32 | cos_matrix <- cos_sim_matrix(mut_mat, mut_mat) 33 | output_inner <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, method = "complete") 34 | 35 | # You can also include test 36 | output_text <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, plot_values = TRUE) 37 | 38 | test_that("Output has correct class", { 39 | expect_true(inherits(output, "gg")) 40 | expect_true(inherits(output_supplied_order, "gg")) 41 | expect_true(inherits(output_inner, "gg")) 42 | expect_true(inherits(output_text, "gg")) 43 | }) 44 | -------------------------------------------------------------------------------- /man/strand_bias_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/strand_bias_test.R 3 | \name{strand_bias_test} 4 | \alias{strand_bias_test} 5 | \title{Significance test for strand asymmetry} 6 | \usage{ 7 | strand_bias_test(strand_occurrences, p_cutoffs = 0.05, fdr_cutoffs = 0.1) 8 | } 9 | \arguments{ 10 | \item{strand_occurrences}{Dataframe with mutation count per strand, result 11 | from 'strand_occurrences()'} 12 | 13 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05} 14 | 15 | \item{fdr_cutoffs}{Significance cutoff for the fdr. Default: 0.1} 16 | } 17 | \value{ 18 | Dataframe with poisson test P value for the ratio between the 19 | two strands per group per base substitution type. 20 | } 21 | \description{ 22 | This function performs a two sided Poisson test for the ratio between mutations on 23 | each strand. Multiple testing correction is also performed. 24 | } 25 | \examples{ 26 | ## See the 'mut_matrix_stranded()' example for how we obtained the 27 | ## following mutation matrix. 28 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | 32 | tissue <- c( 33 | "colon", "colon", "colon", 34 | "intestine", "intestine", "intestine", 35 | "liver", "liver", "liver" 36 | ) 37 | 38 | ## Perform the strand bias test. 39 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 40 | strand_bias <- strand_bias_test(strand_counts) 41 | 42 | ## Use different significance cutoffs for the pvalue and fdr 43 | strand_bias_strict <- strand_bias_test(strand_counts, 44 | p_cutoffs = 0.01, fdr_cutoffs = 0.05 45 | ) 46 | 47 | ## Use multiple (max 3) significance cutoffs. 48 | ## This will vary the number of significance stars. 49 | strand_bias_multistars <- strand_bias_test(strand_counts, 50 | p_cutoffs = c(0.05, 0.01, 0.005), 51 | fdr_cutoffs = c(0.1, 0.05, 0.01) 52 | ) 53 | } 54 | \seealso{ 55 | \code{\link{mut_matrix_stranded}}, 56 | \code{\link{strand_occurrences}}, 57 | \code{\link{plot_strand_bias}} 58 | } 59 | -------------------------------------------------------------------------------- /R/intersect_with_region.R: -------------------------------------------------------------------------------- 1 | #' Find overlap between mutations and a genomic region 2 | #' 3 | #' Find the number of mutations that reside in genomic region and take 4 | #' surveyed area of genome into account. 5 | #' 6 | #' @param vcf CollapsedVCF object with mutations 7 | #' @param surveyed GRanges object with regions of the genome that were surveyed 8 | #' @param region GRanges object with genomic region(s) 9 | #' @noRd 10 | #' @return A data.frame containing the overlapping mutations for a 11 | #' genomic region. 12 | 13 | .intersect_with_region <- function(vcf, surveyed, region) { 14 | # Number of mutations in vcf file 15 | n_muts <- length(vcf) 16 | 17 | # Number of base pairs that were surveyed 18 | surveyed_length <- sum(as.numeric(BiocGenerics::width(surveyed))) 19 | 20 | # Check if chromosome names are the same in the objects 21 | if (GenomeInfoDb::seqlevelsStyle(vcf) != GenomeInfoDb::seqlevelsStyle(surveyed)) { 22 | stop(paste( 23 | "The chromosome names (seqlevels) of the VCF and the", 24 | "surveyed GRanges object do not match." 25 | )) 26 | } 27 | 28 | if (GenomeInfoDb::seqlevelsStyle(region) != GenomeInfoDb::seqlevelsStyle(surveyed)) { 29 | stop(paste( 30 | "The chromosome names (seqlevels) of the surveyed and", 31 | "the region GRanges object do not match." 32 | )) 33 | } 34 | 35 | # Intersect genomic region and surveyed region 36 | surveyed_region <- GenomicRanges::intersect(surveyed, region, ignore.strand = TRUE) 37 | surveyed_region_length <- sum(width(surveyed_region)) 38 | 39 | # Find which mutations lie in surveyed genomic region 40 | overlap <- GenomicRanges::findOverlaps(vcf, surveyed_region) 41 | muts_in_region <- as.data.frame(as.matrix(overlap))$queryHits 42 | 43 | observed <- length(muts_in_region) 44 | prob <- n_muts / surveyed_length 45 | expected <- prob * surveyed_region_length 46 | 47 | res <- data.frame( 48 | n_muts, 49 | surveyed_length, 50 | prob, surveyed_region_length, 51 | expected, 52 | observed 53 | ) 54 | return(res) 55 | } 56 | -------------------------------------------------------------------------------- /tests/testthat/test-enrichment_depletion_test.R: -------------------------------------------------------------------------------- 1 | context("test-enrichment_depletion_test") 2 | 3 | # Read distribution data 4 | distr <- readRDS(system.file("states/distr_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | # Set tissue 8 | tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3)) 9 | 10 | ## Perform the enrichment/depletion test by tissue type. 11 | output <- enrichment_depletion_test(distr, by = tissue) 12 | 13 | ## Or without specifying the 'by' parameter. 14 | output_pooled <- enrichment_depletion_test(distr) 15 | 16 | ## Use different cutoffs for p and fdr 17 | output_strictcutoff <- enrichment_depletion_test(distr, 18 | by = tissue, 19 | p_cutoffs = 0.000001, fdr_cutoffs = 0.000005 20 | ) 21 | 22 | # Use multiple cutoffs for p and fdr 23 | output_multistars <- enrichment_depletion_test(distr, 24 | by = tissue, 25 | p_cutoffs = c(0.05, 0.01, 0.00000005), 26 | fdr_cutoffs = c(0.1, 0.05, 0.00000001) 27 | ) 28 | test_that("Output has correct class", { 29 | expect_true(inherits(output, c("data.frame"))) 30 | expect_true(inherits(output_pooled, c("data.frame"))) 31 | expect_true(inherits(output_strictcutoff, c("data.frame"))) 32 | expect_true(inherits(output_multistars, c("data.frame"))) 33 | }) 34 | 35 | test_that("Output has correct size", { 36 | expect_equal(dim(output), c(15, 13)) 37 | expect_equal(dim(output_pooled), c(5, 13)) 38 | expect_equal(dim(output_strictcutoff), c(15, 13)) 39 | expect_equal(dim(output_multistars), c(15, 13)) 40 | }) 41 | 42 | test_that("Number significant is correct", { 43 | expect_equal(sum(output$significant == "*"), 15) 44 | expect_equal(sum(output$significant_fdr == "*"), 15) 45 | expect_equal(sum(output_pooled$significant == "*"), 5) 46 | expect_equal(sum(output_pooled$significant_fdr == "*"), 5) 47 | expect_equal(sum(output_strictcutoff$significant == "*"), 9) 48 | expect_equal(sum(output_strictcutoff$significant_fdr == "*"), 9) 49 | expect_equal(sum(output_multistars$significant == "***"), 8) 50 | expect_equal(sum(output_multistars$significant_fdr == "**"), 9) 51 | }) 52 | -------------------------------------------------------------------------------- /man/enrichment_depletion_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrichment_depletion_test.R 3 | \name{enrichment_depletion_test} 4 | \alias{enrichment_depletion_test} 5 | \title{Test for enrichment or depletion of mutations in genomic regions} 6 | \usage{ 7 | enrichment_depletion_test(x, by = NA, p_cutoffs = 0.05, fdr_cutoffs = 0.1) 8 | } 9 | \arguments{ 10 | \item{x}{data.frame result from genomic_distribution()} 11 | 12 | \item{by}{Optional grouping variable, e.g. tissue type} 13 | 14 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05} 15 | 16 | \item{fdr_cutoffs}{Significance cutoff for the fdr. Default: 0.1} 17 | } 18 | \value{ 19 | data.frame with the observed and expected number of mutations per 20 | genomic region per group (by) or sample 21 | } 22 | \description{ 23 | This function aggregates mutations per group (optional) and performs an 24 | enrichment depletion test. 25 | } 26 | \examples{ 27 | ## See the 'genomic_distribution()' example for how we obtained the 28 | ## following data: 29 | distr <- readRDS(system.file("states/distr_data.rds", 30 | package = "MutationalPatterns" 31 | )) 32 | 33 | tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3)) 34 | 35 | ## Perform the enrichment/depletion test by tissue type. 36 | distr_test <- enrichment_depletion_test(distr, by = tissue) 37 | 38 | ## Or without specifying the 'by' parameter, to pool all samples. 39 | distr_single_sample <- enrichment_depletion_test(distr) 40 | 41 | ## Use different significance cutoffs for the pvalue and fdr 42 | distr_strict <- enrichment_depletion_test(distr, 43 | by = tissue, 44 | p_cutoffs = 0.01, fdr_cutoffs = 0.05 45 | ) 46 | 47 | ## Use multiple (max 3) significance cutoffs. 48 | ## This will vary the number of significance stars. 49 | distr_multistars <- enrichment_depletion_test(distr, 50 | by = tissue, 51 | p_cutoffs = c(0.05, 0.01, 0.005), 52 | fdr_cutoffs = c(0.1, 0.05, 0.01) 53 | ) 54 | } 55 | \seealso{ 56 | \code{\link{genomic_distribution}}, 57 | \code{\link{plot_enrichment_depletion}} 58 | } 59 | -------------------------------------------------------------------------------- /man/lengthen_mut_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lengthen_mut_matrix.R 3 | \name{lengthen_mut_matrix} 4 | \alias{lengthen_mut_matrix} 5 | \title{Lengthen mutation matrix} 6 | \usage{ 7 | lengthen_mut_matrix(mut_matrix) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{Mutation matrix} 11 | } 12 | \value{ 13 | mut_matrix 14 | } 15 | \description{ 16 | A mutation_matrix calculated on a GRangesList or GR object modified by 'split_muts_region()', 17 | will contain a column per combination of sample and genomic region. In essence different regions 18 | are treated as different samples. This function will transform the matrix, so that these regions 19 | are instead treated as different mutation types. For example, instead of 'C[C>T]G', you might have 20 | the feature 'C[C>T]G Promoter'. The number of rows in the matrix will thus be 21 | multiplied by the number of regions. 22 | After using 'split_muts_region()', use 'mut_matrix()' to get a mut_matrix that can be used 23 | for this function. 24 | The result can be plotted with plot_profile_region, but could also be used for NMF, refitting ect. 25 | } 26 | \examples{ 27 | 28 | ## See the 'split_muts_region()' and 'mut_matrix()' examples for how we obtained the 29 | ## mutation matrix information: 30 | mut_mat_split_region <- readRDS(system.file("states/mut_mat_data.rds", 31 | package = "MutationalPatterns" 32 | )) 33 | 34 | long_mut_mat <- lengthen_mut_matrix(mut_mat_split_region) 35 | 36 | 37 | ## This also works on indels: 38 | ## See the 'split_muts_region()' and 'count_indels_context()' examples for how we 39 | ## obtained the indel counts: 40 | indel_counts_split <- readRDS(system.file("states/blood_indels_counts_split_region.rds", 41 | package = "MutationalPatterns" 42 | )) 43 | 44 | 45 | ## Lengthen the matrix 46 | lengthen_mut_matrix(indel_counts_split) 47 | } 48 | \seealso{ 49 | Other genomic_regions: 50 | \code{\link{bin_mutation_density}()}, 51 | \code{\link{plot_profile_region}()}, 52 | \code{\link{plot_spectrum_region}()}, 53 | \code{\link{split_muts_region}()} 54 | } 55 | \concept{genomic_regions} 56 | -------------------------------------------------------------------------------- /man/plot_profile_heatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_profile_heatmap.R 3 | \name{plot_profile_heatmap} 4 | \alias{plot_profile_heatmap} 5 | \title{Plot a mutation matrix as a heatmap} 6 | \usage{ 7 | plot_profile_heatmap(mut_matrix, by = NA, max = 0.02, condensed = FALSE) 8 | } 9 | \arguments{ 10 | \item{mut_matrix}{Matrix containing mutation counts.} 11 | 12 | \item{by}{Optional grouping variable} 13 | 14 | \item{max}{Maximum value used for plotting the relative contributions. 15 | Contributions that are higher will have the maximum colour. (Default: 0.02)} 16 | 17 | \item{condensed}{More condensed plotting format. Default = F.} 18 | } 19 | \value{ 20 | A ggplot object 21 | } 22 | \description{ 23 | Function to plot a SNV mutation matrix as a heatmap. 24 | This is especially useful when looking at a wide mutational context. 25 | } 26 | \examples{ 27 | 28 | ## See the 'mut_matrix()' examples for how we obtained the 29 | ## mutation matrix information: 30 | ## Get regular matrix 31 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 32 | package = "MutationalPatterns" 33 | )) 34 | 35 | ## Create heatmap of profile 36 | plot_profile_heatmap(mut_mat, max = 0.1) 37 | 38 | ## Get extended matrix 39 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds", 40 | package = "MutationalPatterns" 41 | )) 42 | 43 | ## Create heatmap of extended profile 44 | plot_profile_heatmap(mut_mat_extended) 45 | 46 | ## Or plot heatmap per tissue 47 | tissue <- c( 48 | "colon", "colon", "colon", 49 | "intestine", "intestine", "intestine", 50 | "liver", "liver", "liver" 51 | ) 52 | 53 | plot_profile_heatmap(mut_mat_extended, by = tissue) 54 | 55 | ## Or plot the heatmap per sample. 56 | plot_profile_heatmap(mut_mat_extended, 57 | by = colnames(mut_mat_extended), 58 | max = 0.05 59 | ) 60 | 61 | 62 | ## Create a condensed heatmap of extended profile 63 | plot_profile_heatmap(mut_mat_extended, condensed = TRUE) 64 | } 65 | \seealso{ 66 | \code{\link{mut_matrix}}, 67 | \code{\link{plot_96_profile}}, 68 | \code{\link{plot_river}} 69 | } 70 | -------------------------------------------------------------------------------- /inst/scripts/create_example_indels.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(VariantAnnotation) 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 4 | library(ref_genome, character.only = TRUE) 5 | 6 | # Get grl 7 | grl <- readRDS("inst/states/blood_grl.rds") 8 | 9 | # Get indels 10 | grl_indel <- get_mut_type(grl, "indel") 11 | 12 | # Remove names from gr, because they are often very long. 13 | remove_names_gr <- function(gr) { 14 | names(gr) <- seq_along(gr) 15 | return(gr) 16 | } 17 | grl_indel <- purrr::map(as.list(grl_indel), remove_names_gr) %>% 18 | GRangesList() 19 | 20 | saveRDS(grl_indel, "inst/states/blood_grl_indel.rds") 21 | 22 | # Get context 23 | grl_indel_context <- get_indel_context(grl_indel, ref_genome) 24 | saveRDS(grl_indel_context, "inst/states/blood_grl_indel_context.rds") 25 | 26 | # Count contexts 27 | indel_counts <- count_indel_contexts(grl_indel_context) 28 | saveRDS(indel_counts, "inst/states/blood_indel_counts.rds") 29 | 30 | 31 | # Refit to signatures 32 | signatures <- get_known_signatures("indel") 33 | 34 | fit_res <- fit_to_signatures(indel_counts, signatures) 35 | saveRDS(fit_res, "inst/states/indel_refit.rds") 36 | 37 | 38 | 39 | 40 | # Split per region 41 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds", 42 | package = "MutationalPatterns" 43 | )) 44 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds", 45 | package = "MutationalPatterns" 46 | )) 47 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds", 48 | package = "MutationalPatterns" 49 | )) 50 | 51 | # Combine the regions into a single GRangesList 52 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g) 53 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF") 54 | 55 | seqlevelsStyle(regions) <- "UCSC" 56 | grl_indel_split <- split_muts_region(grl_indel_context, regions) 57 | indel_counts_split <- count_indel_contexts(grl_indel_split) 58 | saveRDS(indel_counts_split, "inst/states/blood_indels_counts_split_region.rds") 59 | indel_matrix_long <- lengthen_mut_matrix(indel_counts_split) 60 | saveRDS(indel_matrix_long, "inst/states/blood_indels_longmatrix_split_region.rds") 61 | -------------------------------------------------------------------------------- /man/plot_enrichment_depletion.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_enrichment_depletion.R 3 | \name{plot_enrichment_depletion} 4 | \alias{plot_enrichment_depletion} 5 | \title{Plot enrichment/depletion of mutations in genomic regions} 6 | \usage{ 7 | plot_enrichment_depletion(df, sig_type = c("fdr", "p")) 8 | } 9 | \arguments{ 10 | \item{df}{Dataframe result from enrichment_depletion_test()} 11 | 12 | \item{sig_type}{The type of significance to be used. Possible values: 13 | * 'fdr' False discovery rate. 14 | A type of multiple testing correction.; 15 | * 'p' for regular p values.} 16 | } 17 | \value{ 18 | Plot with two parts. 1: Barplot with no. mutations expected and 19 | observed per region. 2: Effect size of enrichment/depletion 20 | (log2ratio) with results significance test. 21 | } 22 | \description{ 23 | Plot enrichment/depletion of mutations in genomic regions 24 | } 25 | \examples{ 26 | ## See the 'genomic_distribution()' example for how we obtained the 27 | ## following data: 28 | distr <- readRDS(system.file("states/distr_data.rds", 29 | package = "MutationalPatterns" 30 | )) 31 | 32 | tissue <- c( 33 | "colon", "colon", "colon", 34 | "intestine", "intestine", "intestine", 35 | "liver", "liver", "liver" 36 | ) 37 | 38 | ## Perform the enrichment/depletion test. 39 | distr_test <- enrichment_depletion_test(distr, by = tissue) 40 | 41 | ## Plot the enrichment/depletion 42 | plot_enrichment_depletion(distr_test) 43 | 44 | #Perform and plot the enrichmet depletion test for all samples pooled 45 | distr_test2 <- enrichment_depletion_test(distr) 46 | plot_enrichment_depletion(distr_test2) 47 | 48 | ## Plot with p values instead of fdr 49 | plot_enrichment_depletion(distr_test, sig_type = "p") 50 | 51 | ## Use multiple (max 3) significance cutoffs. 52 | ## This will vary the number of significance stars. 53 | distr_multistars <- enrichment_depletion_test(distr, 54 | by = tissue, 55 | p_cutoffs = c(0.05, 0.01, 0.005), 56 | fdr_cutoffs = c(0.1, 0.05, 0.01) 57 | ) 58 | plot_enrichment_depletion(distr_multistars) 59 | } 60 | \seealso{ 61 | \code{\link{enrichment_depletion_test}}, 62 | \code{\link{genomic_distribution}} 63 | } 64 | -------------------------------------------------------------------------------- /tests/testthat/test-fit_to_signatures_strict.R: -------------------------------------------------------------------------------- 1 | context("test-fit_to_signatures_strict") 2 | 3 | # Get mut_mat 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Get signatures 9 | signatures <- get_known_signatures() 10 | 11 | output <- fit_to_signatures_strict(mut_mat, signatures, max_delta = 0.05) 12 | output_best <- fit_to_signatures_strict(mut_mat, signatures[,1:5], max_delta = 0.004, method = "best_subset") 13 | output_single_sig = fit_to_signatures_strict(mut_mat, signatures[,1, drop = F], max_delta = 0.05) 14 | 15 | expected <- readRDS(system.file("states/strict_snv_refit.rds", 16 | package = "MutationalPatterns" 17 | )) 18 | expected_best <- readRDS(system.file("states/strict_best_snv_refit.rds", 19 | package = "MutationalPatterns" 20 | )) 21 | 22 | test_that("Output has correct class", { 23 | expect_true(inherits(output, "list")) 24 | expect_true(inherits(output$fit_res, "list")) 25 | expect_true(inherits(output$fit_res$contribution, "matrix")) 26 | expect_true(inherits(output$fit_res$reconstructed, "matrix")) 27 | expect_true(inherits(output$sim_decay_fig, "list")) 28 | expect_true(inherits(output$sim_decay_fig[[1]], "gg")) 29 | expect_true(inherits(output_best, "list")) 30 | expect_true(inherits(output_best$fit_res, "list")) 31 | expect_true(inherits(output_best$fit_res$contribution, "matrix")) 32 | expect_true(inherits(output_best$fit_res$reconstructed, "matrix")) 33 | expect_true(inherits(output_best$sim_decay_fig, "list")) 34 | expect_true(inherits(output_best$sim_decay_fig[[1]], "gg")) 35 | expect_true(inherits(output_single_sig, "list")) 36 | expect_true(inherits(output_single_sig$fit_res, "list")) 37 | expect_true(inherits(output_single_sig$fit_res$contribution, "matrix")) 38 | expect_true(inherits(output_single_sig$fit_res$reconstructed, "matrix")) 39 | expect_true(inherits(output_single_sig$sim_decay_fig, "list")) 40 | expect_true(inherits(output_single_sig$sim_decay_fig[[1]], "gg")) 41 | }) 42 | 43 | test_that("Output is equal to expected", { 44 | expect_equal(output$fit_res, expected) 45 | expect_equal(output_best$fit_res, expected_best) 46 | }) 47 | -------------------------------------------------------------------------------- /R/mut_matrix.R: -------------------------------------------------------------------------------- 1 | #' Make mutation count matrix of 96 trinucleotides 2 | #' 3 | #' @description Make 96 trinucleotide mutation count matrix 4 | #' @param vcf_list GRangesList or GRanges object. 5 | #' @param ref_genome BSgenome reference genome object 6 | #' @param extension The number of bases, that's extracted upstream and 7 | #' downstream of the base substitutions. (Default: 1). 8 | #' @return 96 mutation count matrix 9 | #' 10 | #' @examples 11 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the 12 | #' ## following data: 13 | #' grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 14 | #' package = "MutationalPatterns" 15 | #' )) 16 | #' 17 | #' ## Load the corresponding reference genome. 18 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 19 | #' library(ref_genome, character.only = TRUE) 20 | #' 21 | #' ## Construct a mutation matrix from the loaded VCFs in comparison to the 22 | #' ## ref_genome. 23 | #' mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome) 24 | #' 25 | #' ## Construct a mutation matrix with a larger context. 26 | #' ## This is most usefull when you have many mutations per sample. 27 | #' mut_mat_extended <- mut_matrix(vcf_list = grl, ref_genome = ref_genome, extension = 2) 28 | #' @seealso 29 | #' \code{\link{read_vcfs_as_granges}} 30 | #' 31 | #' @export 32 | mut_matrix <- function(vcf_list, ref_genome, extension = 1) { 33 | 34 | # Convert list to grl if necessary 35 | if (inherits(vcf_list, "list")) { 36 | vcf_list <- GenomicRanges::GRangesList(vcf_list) 37 | } 38 | 39 | # Determine nr mutations per sample 40 | if (inherits(vcf_list, "CompressedGRangesList")) { 41 | gr_sizes <- S4Vectors::elementNROWS(vcf_list) 42 | gr <- BiocGenerics::unlist(vcf_list) 43 | } else if (inherits(vcf_list, "GRanges")) { 44 | gr <- vcf_list 45 | gr_sizes <- length(gr) 46 | names(gr_sizes) <- "My_sample" 47 | } else { 48 | .not_gr_or_grl(vcf_list) 49 | } 50 | # Determine type and context of all mutations 51 | type_context <- type_context(gr, ref_genome, extension) 52 | 53 | # Count the type and context to create the mut_mat 54 | mut_mat <- mut_96_occurrences(type_context, gr_sizes) 55 | return(mut_mat) 56 | } 57 | -------------------------------------------------------------------------------- /man/plot_original_vs_reconstructed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_original_vs_reconstructed.R 3 | \name{plot_original_vs_reconstructed} 4 | \alias{plot_original_vs_reconstructed} 5 | \title{Plot the similarity between a mutation matrix and its reconstructed profile} 6 | \usage{ 7 | plot_original_vs_reconstructed( 8 | mut_matrix, 9 | reconstructed, 10 | y_intercept = 0.95, 11 | ylims = c(0.6, 1) 12 | ) 13 | } 14 | \arguments{ 15 | \item{mut_matrix}{mutation count matrix (dimensions: x mutation types 16 | X n samples)} 17 | 18 | \item{reconstructed}{A reconstructed mutation count matrix} 19 | 20 | \item{y_intercept}{The y intercept of the plotted horizontal line. Default: 0.95.} 21 | 22 | \item{ylims}{The limits of the y axis. Default: c(0.6, 1)} 23 | } 24 | \value{ 25 | A ggplot figure 26 | } 27 | \description{ 28 | When a reconstructed profile has a cosine similarity of more than 0.95 with 29 | original, the reconstructed profile is considered very good. 30 | } 31 | \examples{ 32 | 33 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 34 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 35 | package = "MutationalPatterns" 36 | )) 37 | 38 | ## Extracting signatures can be computationally intensive, so 39 | ## we use pre-computed data generated with the following command: 40 | # nmf_res <- extract_signatures(mut_mat, rank = 2) 41 | 42 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 43 | package = "MutationalPatterns" 44 | )) 45 | 46 | ## Create figure 47 | plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed) 48 | 49 | ## You can also use the results of signature refitting. 50 | ## Here we load some data as an example 51 | fit_res <- readRDS(system.file("states/snv_refit.rds", 52 | package = "MutationalPatterns" 53 | )) 54 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed) 55 | 56 | ## You can also change the height of the horizontal line 57 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, y_intercept = 0.90) 58 | 59 | ## It's also possible to change the limits of the y axis 60 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, ylims = c(0, 1)) 61 | } 62 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_spectrum.R: -------------------------------------------------------------------------------- 1 | context("test-plot_spectrum") 2 | 3 | # Laad variants: 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | 9 | ## Load a reference genome. 10 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 11 | library(ref_genome, character.only = TRUE) 12 | 13 | ## Get the type occurrences for all VCF objects. 14 | type_occurrences <- mut_type_occurrences(vcfs, ref_genome) 15 | 16 | ## Plot the point mutation spectrum over all samples 17 | output <- plot_spectrum(type_occurrences) 18 | 19 | ## CT distinction 20 | output_CT <- plot_spectrum(type_occurrences, CT = TRUE) 21 | 22 | ## You can also include individual sample points. 23 | output_indv <- plot_spectrum(type_occurrences, CT = TRUE, indv_points = TRUE) 24 | 25 | ## You can also change the type of error bars 26 | output_stdev <- plot_spectrum(type_occurrences, error_bars = "stdev") 27 | output_sem <- plot_spectrum(type_occurrences, error_bars = "SEM") 28 | 29 | ## Or plot spectrum per tissue 30 | tissue <- c( 31 | "colon", "colon", "colon", 32 | "intestine", "intestine", "intestine", 33 | "liver", "liver", "liver" 34 | ) 35 | 36 | output_tissue <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE) 37 | 38 | ## Or plot the spectrum per sample. Error bars are set to 'none', because they can't be plotted. 39 | output_sample <- plot_spectrum(type_occurrences, by = names(vcfs), CT = TRUE, error_bars = "none") 40 | 41 | ## You can also set custom colors. 42 | my_colors <- c( 43 | "pink", "orange", "blue", "lightblue", 44 | "green", "red", "purple" 45 | ) 46 | 47 | ## And use them in a plot. 48 | output_color <- plot_spectrum(type_occurrences, 49 | CT = TRUE, 50 | legend = TRUE, 51 | colors = my_colors 52 | ) 53 | 54 | test_that("Output has correct class", { 55 | expect_true(inherits(output, c("gg"))) 56 | expect_true(inherits(output_CT, c("gg"))) 57 | expect_true(inherits(output_indv, c("gg"))) 58 | expect_true(inherits(output_stdev, c("gg"))) 59 | expect_true(inherits(output_sem, c("gg"))) 60 | expect_true(inherits(output_tissue, c("gg"))) 61 | expect_true(inherits(output_sample, c("gg"))) 62 | expect_true(inherits(output_color, c("gg"))) 63 | }) 64 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_spectrum_region.R: -------------------------------------------------------------------------------- 1 | context("test-plot_spectrum_region") 2 | 3 | # load data 4 | grl <- readRDS(system.file("states/grl_split_region.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Load a reference genome. 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 10 | library(ref_genome, character.only = TRUE) 11 | 12 | 13 | # Get the type occurrences for all VCF objects. 14 | type_occurrences <- mut_type_occurrences(grl, ref_genome) 15 | 16 | # Plot the point mutation spectrum over all samples 17 | output <- plot_spectrum_region(type_occurrences) 18 | 19 | # Plot the point mutation spectrum, relative only to the samples. 20 | output_relative_sample <- plot_spectrum_region(type_occurrences, mode = "relative_sample") 21 | 22 | 23 | # Plot the absolute point mutation spectrum over all samples 24 | output_absolute <- plot_spectrum_region(type_occurrences, mode = "absolute") 25 | 26 | # Plot using different types of error bars. 27 | output_stdev <- plot_spectrum_region(type_occurrences, error_bars = "stdev") 28 | output_sem <- plot_spectrum_region(type_occurrences, error_bars = "SEM") 29 | 30 | # Plot including the sample points themselves 31 | output_indv <- plot_spectrum_region(type_occurrences, indv_points = TRUE) 32 | 33 | # Plot per tissue 34 | tissue <- c( 35 | "colon", "colon", "colon", 36 | "intestine", "intestine", "intestine", 37 | "liver", "liver", "liver" 38 | ) 39 | output_tissue <- plot_spectrum_region(type_occurrences, by = tissue) 40 | 41 | # Plot each sample separately 42 | sample_names <- c( 43 | "colon1", "colon2", "colon3", 44 | "intestine1", "intestine2", "intestine3", 45 | "liver1", "liver2", "liver3" 46 | ) 47 | output_sample <- plot_spectrum_region(type_occurrences, by = sample_names, error_bars = "none") 48 | 49 | # Test different outputs 50 | test_that("Output has correct class", { 51 | expect_true(inherits(output, c("gg"))) 52 | expect_true(inherits(output_relative_sample, c("gg"))) 53 | expect_true(inherits(output_absolute, c("gg"))) 54 | expect_true(inherits(output_stdev, c("gg"))) 55 | expect_true(inherits(output_sem, c("gg"))) 56 | expect_true(inherits(output_indv, c("gg"))) 57 | expect_true(inherits(output_tissue, c("gg"))) 58 | expect_true(inherits(output_sample, c("gg"))) 59 | }) 60 | -------------------------------------------------------------------------------- /tests/testthat/test-strand_bias_test.R: -------------------------------------------------------------------------------- 1 | context("test-strand_bias_test") 2 | 3 | # Load stranded mutation matrix 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Set tissue names 9 | tissue <- c( 10 | "colon", "colon", "colon", 11 | "intestine", "intestine", "intestine", 12 | "liver", "liver", "liver" 13 | ) 14 | 15 | ## Perform the strand bias test. 16 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue) 17 | output <- strand_bias_test(strand_counts) 18 | 19 | # Repeat for replication bias. 20 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds", 21 | package = "MutationalPatterns" 22 | )) 23 | strand_counts_repli <- strand_occurrences(mut_mat_repli, by = tissue) 24 | output_repli <- strand_bias_test(strand_counts_repli) 25 | 26 | ## Use different cutoffs for p and fdr 27 | output_lenientcutoff <- strand_bias_test(strand_counts, p_cutoffs = 0.1, fdr_cutoffs = 0.4) 28 | 29 | # Use multiple cutoffs for p and fdr 30 | output_multistars <- strand_bias_test(strand_counts, 31 | p_cutoffs = c(0.5, 0.1, 0.05), 32 | fdr_cutoffs = c(0.5, 0.35, 0.1) 33 | ) 34 | 35 | # Tests 36 | test_that("Output has correct class", { 37 | expect_true(inherits(output, c("tbl_df"))) 38 | expect_true(inherits(output_repli, c("tbl_df"))) 39 | expect_true(inherits(output_lenientcutoff, c("tbl_df"))) 40 | expect_true(inherits(output_multistars, c("tbl_df"))) 41 | }) 42 | 43 | test_that("Output has correct size", { 44 | expect_equal(dim(output), c(18, 10)) 45 | expect_equal(dim(output_repli), c(18, 10)) 46 | expect_equal(dim(output_lenientcutoff), c(18, 10)) 47 | expect_equal(dim(output_multistars), c(18, 10)) 48 | }) 49 | 50 | test_that("Number significant is correct", { 51 | expect_equal(sum(output$significant == "*"), 1) 52 | expect_equal(sum(output$significant_fdr == "*"), 0) 53 | expect_equal(sum(output_repli$significant == "*"), 0) 54 | expect_equal(sum(output_repli$significant_fdr == "*"), 0) 55 | expect_equal(sum(output_lenientcutoff$significant == "*"), 3) 56 | expect_equal(sum(output_lenientcutoff$significant_fdr == "*"), 3) 57 | expect_equal(sum(output_multistars$significant == "***"), 1) 58 | expect_equal(sum(output_multistars$significant_fdr == "**"), 3) 59 | }) 60 | -------------------------------------------------------------------------------- /man/bin_mutation_density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bin_mutation_density.R 3 | \name{bin_mutation_density} 4 | \alias{bin_mutation_density} 5 | \title{Bin the genome based on mutation density} 6 | \usage{ 7 | bin_mutation_density(vcf_list, ref_genome, nrbins = 3, man_dens_cutoffs = NA) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRangesList or GRanges object.} 11 | 12 | \item{ref_genome}{BSgenome reference genome object} 13 | 14 | \item{nrbins}{The number of bins in which to separate the genome} 15 | 16 | \item{man_dens_cutoffs}{Manual density cutoffs to use.} 17 | } 18 | \value{ 19 | GRangesList 20 | } 21 | \description{ 22 | This function splits the genome based on the mutation density. 23 | The density is calculated per chromosome. The density is split 24 | into bins. The difference in density between subsequent bins is the same 25 | for all bins. In other words, the difference in density between bins 1 and 26 | 2 is the same as between bins 2 and 3. 27 | The function returns a GRangesList. Each GRanges in the list contains the 28 | regions associated with that bin. This can be used with the 29 | 'split_muts_region()' function. 30 | } 31 | \examples{ 32 | 33 | ### See the 'read_vcfs_as_granges()' example for how we obtained the 34 | ## following data: 35 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 36 | package = "MutationalPatterns" 37 | )) 38 | 39 | ## Load the corresponding reference genome. 40 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 41 | library(ref_genome, character.only = TRUE) 42 | 43 | ## Determine region density 44 | dens_grl <- bin_mutation_density(grl, ref_genome, nrbins = 3) 45 | names(dens_grl) <- c("Low", "Medium", "High") 46 | 47 | 48 | ## You can also use manual cutoffs. This feature is meant for more 49 | ## advanced users. It can be usefull if you want to find highly mutated regions, with 50 | ## a consistent cutoff between analyses. 51 | dens_grl_man <- bin_mutation_density(grl, ref_genome, man_dens_cutoffs = c(0, 2e-08, 1)) 52 | } 53 | \seealso{ 54 | Other genomic_regions: 55 | \code{\link{lengthen_mut_matrix}()}, 56 | \code{\link{plot_profile_region}()}, 57 | \code{\link{plot_spectrum_region}()}, 58 | \code{\link{split_muts_region}()} 59 | } 60 | \concept{genomic_regions} 61 | -------------------------------------------------------------------------------- /tests/testthat/test-split_muts_region.R: -------------------------------------------------------------------------------- 1 | context("test-split_muts_region") 2 | 3 | # Read in genomic regions 4 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds", 8 | package = "MutationalPatterns" 9 | )) 10 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds", 11 | package = "MutationalPatterns" 12 | )) 13 | 14 | # Combine the regions into a single GRangesList 15 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g) 16 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF") 17 | seqlevelsStyle(regions) <- "UCSC" 18 | 19 | # Read in some variants. 20 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 21 | package = "MutationalPatterns" 22 | )) 23 | 24 | # Only use two samples to reduce runtime 25 | grl <- grl[1:2] 26 | 27 | 28 | # Run function 29 | output <- split_muts_region(grl, regions) 30 | output_single_gr <- split_muts_region(grl[[1]], regions) 31 | output_single_region <- split_muts_region(grl, regions[[1]]) 32 | output_noother <- split_muts_region(grl, regions, include_other = FALSE) 33 | 34 | 35 | test_that("Output has correct class", { 36 | expect_true(inherits(output, "CompressedGRangesList")) 37 | expect_true(inherits(output_single_gr, "CompressedGRangesList")) 38 | expect_true(inherits(output_single_region, "CompressedGRangesList")) 39 | expect_true(inherits(output_noother, "CompressedGRangesList")) 40 | }) 41 | 42 | expected_length <- function(grl, regions) { 43 | exp_length <- (length(regions) + 1) * length(grl) # nr. samples * nr. regions. +1 is for the variants in 'other' 44 | return(exp_length) 45 | } 46 | 47 | test_that("Output GRangesList has correct length", { 48 | expect_equal(length(output), expected_length(grl, regions)) 49 | expect_equal(length(output_single_gr), expected_length(grl[1], regions)) 50 | expect_equal(length(output_single_region), expected_length(grl, regions[1])) 51 | expect_equal(length(output_noother), length(regions) * length(grl)) 52 | }) 53 | 54 | expected <- readRDS(system.file("states/grl_split_region.rds", 55 | package = "MutationalPatterns" 56 | ))[1:8] 57 | test_that("Output transforms correctly", { 58 | expect_equal(output, expected) 59 | }) 60 | -------------------------------------------------------------------------------- /tests/testthat/test-genomic_distribution.R: -------------------------------------------------------------------------------- 1 | context("test-genomic_distribution") 2 | 3 | # Read vcfs 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | # Load genomic regions 9 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds", 10 | package = "MutationalPatterns" 11 | )) 12 | 13 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds", 14 | package = "MutationalPatterns" 15 | )) 16 | 17 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds", 18 | package = "MutationalPatterns" 19 | )) 20 | 21 | # Combine regions and set seqlevelstyle 22 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g) 23 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF") 24 | seqlevelsStyle(regions) <- "UCSC" 25 | 26 | # Get the callable regions 27 | surveyed_file <- system.file("extdata/callableloci-sample.bed", 28 | package = "MutationalPatterns" 29 | ) 30 | 31 | library(rtracklayer) 32 | surveyed <- rtracklayer::import(surveyed_file) 33 | seqlevelsStyle(surveyed) <- "UCSC" 34 | 35 | # Use the same callable loci for all samples. 36 | surveyed_list <- rep(list(surveyed), 9) 37 | 38 | ## Calculate the number of observed and expected number of mutations in 39 | ## each genomic regions for each sample. 40 | output <- genomic_distribution(vcfs, surveyed_list, regions) 41 | 42 | 43 | test_that("Output has correct class", { 44 | expect_true(inherits(output, c("data.frame"))) 45 | }) 46 | 47 | test_that("Output has correct size", { 48 | expect_equal(dim(output), c(27, 8)) 49 | }) 50 | 51 | # Test that an error is given when the vcf_list and surveyed list are not the same size 52 | test_that("An error is given when input sizes don't match", { 53 | expect_error( 54 | { 55 | genomic_distribution(vcfs, surveyed_list[1:8], regions) 56 | }, 57 | "vcf_list and surveyed_list must have the same length" 58 | ) 59 | }) 60 | 61 | # Test that an error is given when regions_list names are not set. 62 | regions_noname <- regions 63 | names(regions_noname) <- NULL 64 | test_that("An error is given when regions_list names are not set", { 65 | expect_error( 66 | { 67 | genomic_distribution(vcfs, surveyed_list, regions_noname) 68 | }, 69 | "Please set the names of region_list using" 70 | ) 71 | }) 72 | -------------------------------------------------------------------------------- /inst/scripts/create_example_regulatory_regions.R: -------------------------------------------------------------------------------- 1 | # Use biomaRt to obtain data. 2 | 3 | mart <- "ensemble" 4 | library(biomaRt) 5 | regulatory <- useEnsembl( 6 | biomart = "regulation", 7 | dataset = "hsapiens_regulatory_feature", 8 | GRCh = 37 9 | ) 10 | saveRDS(regulatory, "inst/states/regulatory_data.rds") 11 | 12 | # Download the regulatory CTCF binding sites and convert them to 13 | # a GRanges object. 14 | CTCF <- getBM( 15 | attributes = c( 16 | "chromosome_name", 17 | "chromosome_start", 18 | "chromosome_end", 19 | "feature_type_name" 20 | ), 21 | filters = "regulatory_feature_type_name", 22 | values = "CTCF Binding Site", 23 | mart = regulatory 24 | ) 25 | 26 | CTCF_g <- reduce(GRanges( 27 | CTCF$chromosome_name, 28 | IRanges( 29 | CTCF$chromosome_start, 30 | CTCF$chromosome_end 31 | ) 32 | )) 33 | seqlevels(CTCF_g) <- c(1:22, "X", "Y") 34 | CTCF_g <- sort(CTCF_g) 35 | CTCF_g <- CTCF_g[sample.int(length(CTCF_g), 50000)] 36 | saveRDS(CTCF_g, "inst/states/CTCF_g_data.rds") 37 | 38 | # Download the promoter regions and conver them to a GRanges object. 39 | promoter <- getBM( 40 | attributes = c( 41 | "chromosome_name", "chromosome_start", 42 | "chromosome_end", "feature_type_name" 43 | ), 44 | filters = "regulatory_feature_type_name", 45 | values = "Promoter", 46 | mart = regulatory 47 | ) 48 | promoter_g <- reduce(GRanges( 49 | promoter$chromosome_name, 50 | IRanges( 51 | promoter$chromosome_start, 52 | promoter$chromosome_end 53 | ) 54 | )) 55 | seqlevels(promoter_g) <- c(1:22, "X", "Y") 56 | promoter_g <- sort(promoter_g) 57 | saveRDS(promoter_g, "inst/states/promoter_g_data.rds") 58 | 59 | flanking <- getBM( 60 | attributes = c( 61 | "chromosome_name", 62 | "chromosome_start", 63 | "chromosome_end", 64 | "feature_type_name" 65 | ), 66 | filters = "regulatory_feature_type_name", 67 | values = "Promoter Flanking Region", 68 | mart = regulatory 69 | ) 70 | flanking_g <- reduce(GRanges( 71 | flanking$chromosome_name, 72 | IRanges( 73 | flanking$chromosome_start, 74 | flanking$chromosome_end 75 | ) 76 | )) 77 | seqlevels(flanking_g) <- c(1:22, "X", "Y") 78 | flanking_g <- sort(flanking_g) 79 | flanking_g <- flanking_g[sample.int(length(flanking_g), 50000)] 80 | saveRDS(flanking_g, "inst/states/promoter_flanking_g_data.rds") 81 | -------------------------------------------------------------------------------- /tests/testthat/test-plot_rainfall.R: -------------------------------------------------------------------------------- 1 | context("test-plot_rainfall") 2 | 3 | # Read data 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 5 | package = "MutationalPatterns" 6 | )) 7 | 8 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds", 9 | package = "MutationalPatterns" 10 | )) 11 | 12 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds", 13 | package = "MutationalPatterns" 14 | )) 15 | 16 | mbs_grl <- readRDS(system.file("states/blood_grl_mbs.rds", 17 | package = "MutationalPatterns" 18 | )) 19 | 20 | 21 | # Specify chromosomes of interest. 22 | chromosomes <- paste0("chr", c(1:22)) 23 | 24 | # Do a rainfall plot for all chromosomes: 25 | output <- plot_rainfall(vcfs[[1]], title = names(vcfs[1]), chromosomes = chromosomes) 26 | 27 | # Plot a single chromosome (chromosome 1): 28 | output_singlechrom <- plot_rainfall(vcfs[[1]], title = names(vcfs[1]), chromosomes = chromosomes[1]) 29 | 30 | # Plot a subset of the variants 31 | output_subset <- plot_rainfall(vcfs[[1]][1:10], title = names(vcfs[1]), chromosomes = chromosomes) 32 | 33 | # plot indels 34 | output_indel <- plot_rainfall(grl_indel_context[[1]], title = names(grl_indel_context[1]), chromosomes = chromosomes, type = "indel") 35 | 36 | # plot dbs 37 | output_dbs <- plot_rainfall(grl_dbs_context[[1]], title = names(grl_dbs_context[1]), chromosomes = chromosomes, type = "dbs") 38 | 39 | # plot mbs 40 | output_mbs <- plot_rainfall(mbs_grl[[1]], title = names(mbs_grl[1]), chromosomes = chromosomes, type = "mbs") 41 | 42 | # Plot an empty gr 43 | output_empty <- plot_rainfall(vcfs[[1]][0], title = names(vcfs[1]), chromosomes = chromosomes) 44 | 45 | 46 | test_that("Output has correct class", { 47 | expect_true(inherits(output, c("gg"))) 48 | expect_true(inherits(output_singlechrom, c("gg"))) 49 | expect_true(inherits(output_subset, c("gg"))) 50 | expect_true(inherits(output_indel, c("gg"))) 51 | expect_true(inherits(output_dbs, c("gg"))) 52 | expect_true(inherits(output_mbs, c("gg"))) 53 | expect_true(inherits(output_empty, c("gg"))) 54 | }) 55 | 56 | test_that("Subsetted output contains the correct subset of colours", { 57 | colours_used <- unique(ggplot_build(output_subset)$data[[1]][["colour"]]) 58 | expect_equal(colours_used, c("#ADCC54", "#DE1C14", "#2EBAED", "#D4D2D2")) 59 | }) 60 | -------------------------------------------------------------------------------- /R/type_context.R: -------------------------------------------------------------------------------- 1 | #' Retrieve context of base substitution types 2 | #' 3 | #' A function to extract the bases 3' upstream and 5' downstream of the base 4 | #' substitution types. 5 | #' 6 | #' @param vcf A CollapsedVCF object 7 | #' @param ref_genome Reference genome 8 | #' @param extension The number of bases, that's extracted upstream and 9 | #' downstream of the base substitutions. (Default: 1). 10 | #' @return Mutation types and context character vectors in a named list 11 | #' 12 | #' 13 | #' @examples 14 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the 15 | #' ## following data: 16 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 17 | #' package = "MutationalPatterns" 18 | #' )) 19 | #' 20 | #' ## Load the corresponding reference genome. 21 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 22 | #' library(ref_genome, character.only = TRUE) 23 | #' 24 | #' ## Get type context 25 | #' type_context <- type_context(vcfs[[1]], ref_genome) 26 | #' 27 | #' ## Get larger type context 28 | #' type_context_larger <- type_context(vcfs[[1]], ref_genome, extension = 2) 29 | #' @seealso 30 | #' \code{\link{read_vcfs_as_granges}}, 31 | #' \code{\link{mut_context}} 32 | #' 33 | #' @export 34 | 35 | type_context <- function(vcf, ref_genome, extension = 1) { 36 | # Deal with empty GRanges objects. 37 | if (length(vcf) == 0) { 38 | warning("Detected empty GRanges object. 39 | Returning an empty list for this sample.", call. = FALSE) 40 | res <- list("types" = NULL, "context" = NULL) 41 | return(res) 42 | } 43 | 44 | # Get the mut context 45 | mut_context <- mut_context(vcf, ref_genome, extension) 46 | 47 | # Get the mutations 48 | muts <- mutations_from_vcf(vcf) 49 | 50 | # Get the 6 base mutation types 51 | types <- mut_type(vcf) 52 | 53 | # find the mutations for which the context needs to be adjusted 54 | x <- which(muts != types) 55 | 56 | # subset mut_context 57 | y <- mut_context[x] 58 | 59 | # Change the context of these mutations to reverse complement 60 | # of the context 61 | y <- IRanges::reverse(chartr("ATGC", "TACG", y)) 62 | 63 | # replace subset with reverse complement 64 | mut_context[x] <- y 65 | 66 | # return as named list 67 | res <- list(types, mut_context) 68 | names(res) <- c("types", "context") 69 | 70 | return(res) 71 | } 72 | -------------------------------------------------------------------------------- /man/split_muts_region.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/split_muts_region.R 3 | \name{split_muts_region} 4 | \alias{split_muts_region} 5 | \title{Split GRangesList or GRanges based on a list of regions.} 6 | \usage{ 7 | split_muts_region(vcf_list, ranges_grl, include_other = TRUE) 8 | } 9 | \arguments{ 10 | \item{vcf_list}{GRangesList or GRanges object} 11 | 12 | \item{ranges_grl}{GRangesList or GRanges object containing regions of interest} 13 | 14 | \item{include_other}{Boolean. Whether or not to include a "Other" region 15 | containing mutations that aren't in any other region.} 16 | } 17 | \value{ 18 | GRangesList 19 | } 20 | \description{ 21 | A GRangesList or GRanges object containing variants is split based on a list of regions. 22 | This list can be either a GRangesList or a GRanges object. 23 | The result is a GRangesList where each element contains the variants of one sample from one region. 24 | Variant that are not in any of the provided region are put in a list of 'other'. 25 | } 26 | \examples{ 27 | 28 | ## Read in some existing genomic regions. 29 | ## See the 'genomic_distribution()' example for how we obtained the 30 | ## following data: 31 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds", 32 | package = "MutationalPatterns" 33 | )) 34 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds", 35 | package = "MutationalPatterns" 36 | )) 37 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds", 38 | package = "MutationalPatterns" 39 | )) 40 | 41 | ## Combine the regions into a single GRangesList 42 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g) 43 | 44 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF") 45 | 46 | ## Read in some variants. 47 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 48 | ## following data: 49 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 50 | package = "MutationalPatterns" 51 | )) 52 | 53 | ## Split muts based on the supplied regions 54 | split_muts_region(grl, regions) 55 | 56 | ## Don't include muts outside of the supplied regions 57 | split_muts_region(grl, regions, include_other = FALSE) 58 | } 59 | \seealso{ 60 | Other genomic_regions: 61 | \code{\link{bin_mutation_density}()}, 62 | \code{\link{lengthen_mut_matrix}()}, 63 | \code{\link{plot_profile_region}()}, 64 | \code{\link{plot_spectrum_region}()} 65 | } 66 | \concept{genomic_regions} 67 | -------------------------------------------------------------------------------- /man/plot_compare_mbs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_compare_mbs.R 3 | \name{plot_compare_mbs} 4 | \alias{plot_compare_mbs} 5 | \title{Compare two mbs mutation profiles} 6 | \usage{ 7 | plot_compare_mbs( 8 | profile1, 9 | profile2, 10 | profile_names = c("profile 1", "profile 2"), 11 | profile_ymax = 1, 12 | diff_ylim = c(-0.5, 0.5) 13 | ) 14 | } 15 | \arguments{ 16 | \item{profile1}{First mutation profile} 17 | 18 | \item{profile2}{Second mutation profile} 19 | 20 | \item{profile_names}{Character vector with names of the mutations profiles 21 | used for plotting, default = c("profile 1", "profile 2")} 22 | 23 | \item{profile_ymax}{Maximum value of y-axis (relative contribution) for 24 | profile plotting. This can only be used to increase the y axis. 25 | If bars fall outside this limit, the maximum value is 26 | automatically increased. default = 1.} 27 | 28 | \item{diff_ylim}{Y-axis limits for profile difference plot, 29 | default = c(-0.5, 0.5)} 30 | } 31 | \value{ 32 | A ggplot2 object 33 | } 34 | \description{ 35 | Plots two mbs mutation profiles and their difference, reports the residual 36 | sum of squares (RSS). 37 | } 38 | \examples{ 39 | 40 | ## Get the mbs counts 41 | ## See 'count_mbs_contexts()' for more info on how to do this. 42 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds", 43 | package = "MutationalPatterns" 44 | )) 45 | 46 | 47 | ## You could compare regular mutation profiles with eachother. 48 | plot_compare_mbs( 49 | mbs_counts[, 1], 50 | mbs_counts[, 2] 51 | ) 52 | 53 | ## Or change the names of the profiles 54 | plot_compare_mbs(mbs_counts[, 1], 55 | mbs_counts[, 2], 56 | profile_names = c("Original", "Reconstructed") 57 | ) 58 | 59 | ## You can also change the y limits. 60 | ## This can be done separately for the profiles and the different facets. 61 | plot_compare_mbs(mbs_counts[, 1], 62 | mbs_counts[, 2], 63 | profile_ymax = 0.9, 64 | diff_ylim = c(-0.8, 0.8) 65 | ) 66 | 67 | ## You could also compare a reconstructed profile. 68 | ## However, the example data does not contain enough MBS variants to use NMF. 69 | ## Existing signatures have also not yet been defined. 70 | } 71 | \seealso{ 72 | \code{\link{plot_compare_profiles}}, 73 | \code{\link{plot_compare_dbs}}, 74 | \code{\link{plot_compare_indels}} 75 | 76 | Other MBS: 77 | \code{\link{count_mbs_contexts}()}, 78 | \code{\link{plot_mbs_contexts}()} 79 | } 80 | \concept{MBS} 81 | -------------------------------------------------------------------------------- /R/cos_sim_matrix.R: -------------------------------------------------------------------------------- 1 | #' Compute all pairwise cosine similarities between mutational profiles/signatures 2 | #' 3 | #' Computes all pairwise cosine similarities between the mutational profiles provided in the two mutation count matrices. 4 | #' The cosine similarity is a value between 0 (distinct) and 1 (identical) and indicates how much two vectors are alike. 5 | #' 6 | #' @param mut_matrix1 mutation count matrix (dimensions: a mutation features X n samples) 7 | #' @param mut_matrix2 96 mutation count matrix (dimensions: a mutation features X m samples) 8 | #' @return Matrix with pairwise cosine similarities (dimensions: n mutational profiles X m mutational profiles) 9 | #' 10 | #' @examples 11 | #' ## Get signatures 12 | #' signatures <- get_known_signatures() 13 | #' 14 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 15 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 16 | #' package = "MutationalPatterns" 17 | #' )) 18 | #' 19 | #' 20 | #' ## Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile 21 | #' cos_sim_matrix(mut_mat, signatures) 22 | #' @seealso 23 | #' \code{\link{mut_matrix}}, 24 | #' \code{\link{fit_to_signatures}}, 25 | #' \code{\link{plot_cosine_heatmap}} 26 | #' 27 | #' @export 28 | 29 | cos_sim_matrix <- function(mut_matrix1, mut_matrix2) { 30 | 31 | # Check that both inputs are numeric. 32 | if (!all(apply(mut_matrix1, 2, is.numeric))){ 33 | stop("The first input contains non-numeric columns, while all columns should be numeric.") 34 | } 35 | if (!all(apply(mut_matrix2, 2, is.numeric))){ 36 | stop("The second input contains non-numeric columns, while all columns should be numeric.") 37 | } 38 | 39 | # Determine number of samples 40 | n_samples1 <- ncol(mut_matrix1) 41 | n_samples2 <- ncol(mut_matrix2) 42 | res_matrix <- matrix(nrow = n_samples1, ncol = n_samples2) 43 | 44 | # Loop over the columns of both input matrices, 45 | # to determine the cosine similarities. 46 | for (s in seq_len(n_samples1)) 47 | { 48 | signal1 <- mut_matrix1[, s, drop = TRUE] 49 | cos_sim_vector <- c() 50 | for (i in seq_len(n_samples2)) 51 | { 52 | signal2 <- mut_matrix2[, i, drop = TRUE] 53 | cos_sim_vector[i] <- cos_sim(signal1, signal2) 54 | } 55 | res_matrix[s, ] <- cos_sim_vector 56 | } 57 | rownames(res_matrix) <- colnames(mut_matrix1) 58 | colnames(res_matrix) <- colnames(mut_matrix2) 59 | 60 | return(res_matrix) 61 | } 62 | -------------------------------------------------------------------------------- /R/convert_sigs_to_ref.R: -------------------------------------------------------------------------------- 1 | #' Convert tissue specific signature exposures to reference 2 | #' 3 | #' This function converts tissue specific signature contributions into 4 | #' reference signature contributions. This works on SNV signatures from SIGNAL. 5 | #' It uses a conversion matrix to do the conversion. 6 | #' The output can include possible artifact signatures. 7 | #' 8 | #' @param fit_res Named list with signature contributions and reconstructed 9 | #' mutation matrix 10 | #' 11 | #' @return The input fit_res, but with converted signature contributions. 12 | #' @export 13 | #' 14 | #' @importFrom magrittr %>% 15 | #' 16 | #' @examples 17 | #' 18 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 19 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 20 | #' package = "MutationalPatterns" 21 | #' )) 22 | #' 23 | #' ## Get tissue specific signatures 24 | #' signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin") 25 | #' 26 | #' ## Fit tissue specific signatures 27 | #' fit_res <- fit_to_signatures(mut_mat, signatures) 28 | #' 29 | #' ## Convert the tissue specific signatures exposures to reference 30 | #' fit_res <- convert_sigs_to_ref(fit_res) 31 | convert_sigs_to_ref <- function(fit_res) { 32 | 33 | # Get contribution 34 | contri <- fit_res$contribution 35 | 36 | # Determine convertion matrix filename 37 | fname_matrix <- file.path("extdata", "signatures", "SIGNAL_conversion_matrix.txt") 38 | fname_matrix <- system.file(fname_matrix, package = "MutationalPatterns") 39 | 40 | # Read conversion matrix 41 | conv_m <- read.table(fname_matrix, 42 | sep = "\t", 43 | header = TRUE, 44 | stringsAsFactors = FALSE, 45 | dec = ",", 46 | check.names = FALSE 47 | ) %>% 48 | tibble::column_to_rownames("Tissue_sig") %>% 49 | as.matrix() 50 | 51 | # Check that the tissue specific signature names are all in the conversion matrix. 52 | if (sum(!rownames(contri) %in% rownames(conv_m))) { 53 | stop(paste0( 54 | "The signature names of the fit_res don't match that of ", 55 | "the conversion matrix.\n You have to use tissue specific SNV ", 56 | "signatures from SIGNAL." 57 | ), call. = FALSE) 58 | } 59 | 60 | # Remove signatures, that weren't used, from the conversion matrix. 61 | conv_m <- conv_m[rownames(conv_m) %in% rownames(contri), , drop = FALSE] 62 | 63 | 64 | # Convert signatures to reference. 65 | fit_res$contribution <- t(conv_m) %*% contri 66 | 67 | return(fit_res) 68 | } 69 | -------------------------------------------------------------------------------- /man/plot_indel_contexts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_indel_contexts.R 3 | \name{plot_indel_contexts} 4 | \alias{plot_indel_contexts} 5 | \title{Plot the indel contexts} 6 | \usage{ 7 | plot_indel_contexts( 8 | counts, 9 | same_y = FALSE, 10 | extra_labels = FALSE, 11 | condensed = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{counts}{A tibble containing the number of indels per COSMIC context.} 16 | 17 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.} 18 | 19 | \item{extra_labels}{A boolean describing whether extra labels should be added. 20 | These can clarify the plot, but will shift when different plot widths are used. 21 | We recommend saving a plot with a width of 12, when using this argument.} 22 | 23 | \item{condensed}{More condensed plotting format. Default = F.} 24 | } 25 | \value{ 26 | A ggplot figure. 27 | } 28 | \description{ 29 | Plot the indel contexts 30 | } 31 | \details{ 32 | Plots the number of indels COSMIC context per sample. 33 | It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'. 34 | Each sample is plotted in a separate facet. 35 | The same y axis can be used for all samples or a separate y axis can be used. 36 | The facets at the top show the indel types. First the C and T deletions 37 | Then the C and T insertions. Next are the multi base deletions and insertions. 38 | Finally the deletions with microhomology (mh) are shown. 39 | The x-axis at the bottom shows the number of repeat units. 40 | For mh deletions the microhomology length is shown. 41 | } 42 | \examples{ 43 | ## Get The indel counts 44 | ## See 'count_indel_contexts()' for more info on how to do this. 45 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", 46 | package = "MutationalPatterns" 47 | )) 48 | 49 | ## Plot contexts 50 | plot_indel_contexts(indel_counts) 51 | 52 | ## Use the same y axis for all samples. 53 | plot_indel_contexts(indel_counts, same_y = TRUE) 54 | 55 | ## Add extra labels to make plot clearer 56 | plot_indel_contexts(indel_counts, extra_labels = TRUE) 57 | 58 | ## Create a more condensed plot 59 | plot_indel_contexts(indel_counts, condensed = TRUE) 60 | } 61 | \seealso{ 62 | \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}} 63 | 64 | Other Indels: 65 | \code{\link{count_indel_contexts}()}, 66 | \code{\link{get_indel_context}()}, 67 | \code{\link{plot_compare_indels}()}, 68 | \code{\link{plot_main_indel_contexts}()} 69 | } 70 | \concept{Indels} 71 | -------------------------------------------------------------------------------- /R/mut_192_occurrences.R: -------------------------------------------------------------------------------- 1 | #' Count 192 trinucleotide mutation occurrences 2 | #' 3 | #' @details 4 | #' This function is called by mut_matrix_stranded. 5 | #' The 192 trinucleotide context is the 96 trinucleotide context combined with the strands. 6 | #' This function calculates the 192 trinucleotide context for all variants. 7 | #' and then splits these per GRanges (samples). It then calculates how often each 192 trinucleotide context occurs. 8 | #' 9 | #' @param type_context result from type_context function 10 | #' @param strand factor with strand information for each 11 | #' position, for example "U" for untranscribed, "T" for transcribed strand, 12 | #' and "-" for unknown 13 | #' @param gr_sizes A vector indicating the number of variants per GRanges 14 | #' 15 | #' @importFrom magrittr %>% 16 | #' 17 | #' @return Mutation matrix with 192 mutation occurrences and 96 trinucleotides 18 | #' for two strands 19 | 20 | mut_192_occurrences <- function(type_context, strand, gr_sizes) { 21 | # get possible strand values 22 | values <- levels(strand) 23 | 24 | idx1 <- which(strand == values[1]) 25 | idx2 <- which(strand == values[2]) 26 | 27 | # get type context for both vcf subsets 28 | type_context_1 <- purrr::map(type_context, function(x) x[idx1]) 29 | type_context_2 <- purrr::map(type_context, function(x) x[idx2]) 30 | 31 | # Subset the gr_sizes. 32 | sample_vector <- rep(names(gr_sizes), gr_sizes) %>% 33 | factor(levels = names(gr_sizes)) 34 | table_vector_1 <- sample_vector[idx1] %>% 35 | table() 36 | gr_sizes_1 <- as.vector(table_vector_1) 37 | names(gr_sizes_1) <- names(table_vector_1) 38 | table_vector_2 <- sample_vector[idx2] %>% 39 | table() 40 | gr_sizes_2 <- as.vector(table_vector_2) 41 | names(gr_sizes_2) <- names(table_vector_2) 42 | 43 | # make 96-trinucleotide count vector per set 44 | mut_mat_1 <- mut_96_occurrences(type_context_1, gr_sizes_1) 45 | mut_mat_2 <- mut_96_occurrences(type_context_2, gr_sizes_2) 46 | 47 | # add names 48 | names_1 <- paste(rownames(mut_mat_1), values[1], sep = "-") 49 | names_2 <- paste(rownames(mut_mat_2), values[2], sep = "-") 50 | 51 | # combine matrixes 52 | mut_mat <- rbind(mut_mat_1, mut_mat_2) 53 | rownames(mut_mat) <- c(names_1, names_2) 54 | 55 | # Reorder for backwards compatibility 56 | reorder_i <- purrr::map2( 57 | seq(1, nrow(mut_mat) / 2), 58 | seq( 59 | nrow(mut_mat) / 2 + 1, 60 | nrow(mut_mat) 61 | ), 62 | c 63 | ) %>% 64 | unlist() 65 | mut_mat <- mut_mat[reorder_i, , drop = FALSE] 66 | 67 | return(mut_mat) 68 | } 69 | -------------------------------------------------------------------------------- /man/extract_signatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extract_signatures.R 3 | \name{extract_signatures} 4 | \alias{extract_signatures} 5 | \title{Extract mutational signatures from 96 mutation matrix using NMF} 6 | \usage{ 7 | extract_signatures( 8 | mut_matrix, 9 | rank, 10 | nrun = 200, 11 | nmf_type = c("regular", "variational_bayes"), 12 | single_core = FALSE, 13 | fudge = NULL, 14 | seed = 123456 15 | ) 16 | } 17 | \arguments{ 18 | \item{mut_matrix}{96 mutation count matrix} 19 | 20 | \item{rank}{Number of signatures to extract} 21 | 22 | \item{nrun}{Number of iterations, default = 200. 23 | A lower number will be faster, but result in less accurate results.} 24 | 25 | \item{nmf_type}{Type of NMF to be used. 26 | Possible values: 27 | * 'regular' 28 | * 'variational_bayes' 29 | The 'regular' method comes from the NMF package. 30 | The 'variational_bayes' method comes from the ccfindR package. 31 | This method uses bayesian inference, which makes it easier to determine the 32 | mathematically optimal number of signatures.} 33 | 34 | \item{single_core}{Boolean. If TRUE, it forces the NMF algorithm to 35 | use only a single core. This can sometimes prevent issues. 36 | Doesn't apply to variational-bayes NMF} 37 | 38 | \item{fudge}{Small positive number that is used for the variational_bayes NMF. 39 | Setting this to a small value like 0.0001 can prevent errors from occurring, 40 | when extracting many signatures at once. In general, we recommend extracting 41 | less signatures when errors occur, but this parameter can be used when that 42 | is not an option. 43 | Default = NULL.} 44 | 45 | \item{seed}{Random seed used for the regular NMF, default = 123456} 46 | } 47 | \value{ 48 | Named list of mutation matrix, signatures and signature contribution 49 | } 50 | \description{ 51 | Decomposes trinucleotide count matrix into signatures and contribution of 52 | those signatures to the spectra of the samples/vcf files. 53 | } 54 | \examples{ 55 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix: 56 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds", 57 | package = "MutationalPatterns" 58 | )) 59 | 60 | ## This function is computationally intensive. 61 | # nmf_res <- extract_signatures(mut_mat, rank = 2) 62 | 63 | ## It's also possible to use a variational Bayes method. 64 | ## It requires the ccfindR package to work. 65 | # nmf_res <- extract_signatures(mut_mat, rank = 2, nmf_type = "variational_bayes") 66 | } 67 | \seealso{ 68 | \code{\link{mut_matrix}} 69 | } 70 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bin_mutation_density) 4 | export(binomial_test) 5 | export(calculate_lesion_segregation) 6 | export(cluster_signatures) 7 | export(context_potential_damage_analysis) 8 | export(convert_sigs_to_ref) 9 | export(cos_sim) 10 | export(cos_sim_matrix) 11 | export(count_dbs_contexts) 12 | export(count_indel_contexts) 13 | export(count_mbs_contexts) 14 | export(determine_regional_similarity) 15 | export(enrichment_depletion_test) 16 | export(extract_signatures) 17 | export(fit_to_signatures) 18 | export(fit_to_signatures_bootstrapped) 19 | export(fit_to_signatures_strict) 20 | export(genomic_distribution) 21 | export(get_dbs_context) 22 | export(get_indel_context) 23 | export(get_known_signatures) 24 | export(get_mut_type) 25 | export(get_sim_tb) 26 | export(lengthen_mut_matrix) 27 | export(merge_signatures) 28 | export(mut_context) 29 | export(mut_matrix) 30 | export(mut_matrix_stranded) 31 | export(mut_strand) 32 | export(mut_type) 33 | export(mut_type_occurrences) 34 | export(mutations_from_vcf) 35 | export(plot_192_profile) 36 | export(plot_96_profile) 37 | export(plot_bootstrapped_contribution) 38 | export(plot_compare_dbs) 39 | export(plot_compare_indels) 40 | export(plot_compare_mbs) 41 | export(plot_compare_profiles) 42 | export(plot_contribution) 43 | export(plot_contribution_heatmap) 44 | export(plot_correlation_bootstrap) 45 | export(plot_cosine_heatmap) 46 | export(plot_dbs_contexts) 47 | export(plot_enrichment_depletion) 48 | export(plot_indel_contexts) 49 | export(plot_lesion_segregation) 50 | export(plot_main_dbs_contexts) 51 | export(plot_main_indel_contexts) 52 | export(plot_mbs_contexts) 53 | export(plot_original_vs_reconstructed) 54 | export(plot_profile_heatmap) 55 | export(plot_profile_region) 56 | export(plot_rainfall) 57 | export(plot_regional_similarity) 58 | export(plot_river) 59 | export(plot_signature_strand_bias) 60 | export(plot_spectrum) 61 | export(plot_spectrum_region) 62 | export(plot_strand) 63 | export(plot_strand_bias) 64 | export(pool_mut_mat) 65 | export(read_vcfs_as_granges) 66 | export(rename_nmf_signatures) 67 | export(signature_potential_damage_analysis) 68 | export(split_muts_region) 69 | export(strand_bias_test) 70 | export(strand_occurrences) 71 | export(type_context) 72 | exportClasses(region_cossim) 73 | exportMethods(get_sim_tb) 74 | exportMethods(show) 75 | import(GenomicRanges) 76 | import(NMF) 77 | import(ggalluvial) 78 | import(ggplot2) 79 | importFrom(magrittr,"%>%") 80 | importFrom(methods,setClass) 81 | importFrom(methods,setGeneric) 82 | importFrom(methods,setMethod) 83 | importFrom(pracma,lsqnonneg) 84 | -------------------------------------------------------------------------------- /inst/extdata/empty.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##SnpEffCmd="SnpEff GRCh37.74 AC_vcf.filtered_variants.vcf -hgvs -lof -no-downstream -no-upstream -no-intergenic " 3 | ##SnpEffVersion="4.1h (build 2015-08-03), by Pablo Cingolani" 4 | ##SnpSiftCmd="SnpSift annotate -tabix -name GoNLv5_ -info AF,AN,AC /hpc/cog_bioinf/common_dbs/GoNL/gonl_release5/site_freqs/gonl.snps_indels.r5.sorted.vcf.gz AC_snpEff_snpSift_Cosmicv76.vcf.filtered_variants.vcf" 5 | ##SnpSiftVersion="SnpSift 4.1h (build 2015-08-03), by Pablo Cingolani" 6 | ##fileDate=20200528 7 | ##reference=file:///hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta 8 | ##source=SelectVariants 9 | ##FILTER=All filters passed 10 | ##FORMAT= 11 | ##FORMAT= 12 | ##FORMAT= 13 | ##FORMAT= 14 | ##FORMAT= 15 | ##contig= 16 | ##contig= 17 | ##contig= 18 | ##contig= 19 | ##contig= 20 | ##contig= 21 | ##contig= 22 | ##contig= 23 | ##contig= 24 | ##contig= 25 | ##contig= 26 | ##contig= 27 | ##contig= 28 | ##contig= 29 | ##contig= 30 | ##contig= 31 | ##contig= 32 | ##contig= 33 | ##contig= 34 | ##contig= 35 | ##contig= 36 | ##contig= 37 | ##contig= 38 | ##contig= 39 | ##contig= 40 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ACGRABULK 41 | -------------------------------------------------------------------------------- /R/plot_mbs_contexts.R: -------------------------------------------------------------------------------- 1 | #' Plot the MBS contexts 2 | #' 3 | #' @details 4 | #' Plots the number of MBS per MBS length per sample. 5 | #' It takes a tibble with counts as its input. This tibble can be generated by count_mbs_contexts 6 | #' Each sample is plotted in a separate facet. 7 | #' The same y axis can be used for all samples or a separate y axis can be used. 8 | #' 9 | #' @param counts A tibble containing the number of MBS per MBS length. 10 | #' @param same_y A boolean describing whether the same y axis should be used for all samples. 11 | #' 12 | #' @return A ggplot figure. 13 | #' 14 | #' @examples 15 | #' ## Get The mbs counts 16 | #' ## See 'count_mbs_contexts()' for more info on how to do this. 17 | #' mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds", 18 | #' package = "MutationalPatterns" 19 | #' )) 20 | #' 21 | #' ## Plot contexts 22 | #' plot_mbs_contexts(mbs_counts) 23 | #' 24 | #' ## Use a different y axis for all samples. 25 | #' plot_mbs_contexts(mbs_counts, same_y = FALSE) 26 | #' @import ggplot2 27 | #' @importFrom magrittr %>% 28 | #' @family MBS 29 | #' 30 | #' @seealso \code{\link{count_mbs_contexts}} 31 | #' 32 | #' @export 33 | plot_mbs_contexts <- function(counts, same_y = TRUE) { 34 | 35 | # These variables use non standard evaluation. 36 | # To avoid R CMD check complaints we initialize them to NULL. 37 | count <- size <- NULL 38 | 39 | # Make data long 40 | counts <- counts %>% 41 | as.data.frame() %>% 42 | tibble::rownames_to_column("size") %>% 43 | tidyr::pivot_longer(-size, names_to = "sample", values_to = "count") %>% 44 | dplyr::mutate( 45 | size = factor(size, levels = unique(size)), 46 | sample = factor(sample, levels = unique(sample)) 47 | ) 48 | 49 | # Count nr. muts 50 | nr_muts <- counts %>% 51 | dplyr::group_by(sample) %>% 52 | dplyr::summarise(nr_muts = round(sum(count))) 53 | 54 | # Create facets 55 | if (same_y) { 56 | facet_scale <- "fixed" 57 | } else { 58 | facet_scale <- "free_y" 59 | } 60 | 61 | facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")") 62 | names(facet_labs_y) <- nr_muts$sample 63 | 64 | # Create plot 65 | fig <- ggplot(counts, aes(x = size, y = count, fill = size)) + 66 | geom_bar(stat = "identity") + 67 | facet_grid(sample ~ ., 68 | scales = facet_scale, 69 | labeller = labeller(sample = facet_labs_y) 70 | ) + 71 | scale_fill_manual(values = MBS_COLORS) + 72 | labs(x = "MBS size", y = "Nr. of MBSs") + 73 | guides(fill = "none") + 74 | theme_bw() + 75 | theme( 76 | panel.grid.major.x = element_blank(), 77 | panel.grid.minor.y = element_blank(), 78 | ) 79 | return(fig) 80 | } 81 | -------------------------------------------------------------------------------- /man/plot_compare_dbs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_compare_dbs.R 3 | \name{plot_compare_dbs} 4 | \alias{plot_compare_dbs} 5 | \title{Compare two DBS mutation profiles} 6 | \usage{ 7 | plot_compare_dbs( 8 | profile1, 9 | profile2, 10 | profile_names = c("profile 1", "profile 2"), 11 | profile_ymax = 0.2, 12 | diff_ylim = c(-0.1, 0.1) 13 | ) 14 | } 15 | \arguments{ 16 | \item{profile1}{First mutation profile} 17 | 18 | \item{profile2}{Second mutation profile} 19 | 20 | \item{profile_names}{Character vector with names of the mutations profiles 21 | used for plotting, default = c("profile 1", "profile 2")} 22 | 23 | \item{profile_ymax}{Maximum value of y-axis (relative contribution) for 24 | profile plotting. This can only be used to increase the y axis. 25 | If bars fall outside this limit, the maximum value is 26 | automatically increased. default = 0.2.} 27 | 28 | \item{diff_ylim}{Y-axis limits for profile difference plot, 29 | default = c(-0.1, 0.1)} 30 | } 31 | \value{ 32 | A ggplot2 object 33 | } 34 | \description{ 35 | Plots two DBS mutation profiles and their difference, reports the residual 36 | sum of squares (RSS). 37 | } 38 | \examples{ 39 | 40 | ## Get the DBS counts 41 | ## See 'count_dbs_contexts()' for more info on how to do this. 42 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", 43 | package = "MutationalPatterns" 44 | )) 45 | 46 | ## Get DBS refit info. 47 | ## See 'fit_to_signatures()' for more info on how to do this. 48 | fit_res <- readRDS(system.file("states/dbs_refit.rds", 49 | package = "MutationalPatterns" 50 | )) 51 | 52 | ## Compare the reconstructed profile of sample 1 with the original profile 53 | ## The same thing could be done with a reconstructed profile from NMF. 54 | plot_compare_dbs(dbs_counts[, 1], fit_res$reconstructed[, 1]) 55 | 56 | ## You could also compare regular mutation profiles with eachother. 57 | plot_compare_dbs( 58 | dbs_counts[, 1], 59 | dbs_counts[, 2] 60 | ) 61 | 62 | ## Or change the names of the profiles 63 | plot_compare_dbs(dbs_counts[, 1], 64 | dbs_counts[, 2], 65 | profile_names = c("Original", "Reconstructed") 66 | ) 67 | 68 | ## You can also change the y limits. 69 | ## This can be done separately for the profiles and the different facets. 70 | plot_compare_dbs(dbs_counts[, 1], 71 | dbs_counts[, 2], 72 | profile_ymax = 0.3, 73 | diff_ylim = c(-0.03, 0.03) 74 | ) 75 | } 76 | \seealso{ 77 | \code{\link{plot_compare_profiles}}, 78 | \code{\link{plot_compare_indels}}, 79 | \code{\link{plot_compare_mbs}} 80 | 81 | Other DBS: 82 | \code{\link{count_dbs_contexts}()}, 83 | \code{\link{get_dbs_context}()}, 84 | \code{\link{plot_dbs_contexts}()}, 85 | \code{\link{plot_main_dbs_contexts}()} 86 | } 87 | \concept{DBS} 88 | -------------------------------------------------------------------------------- /man/plot_rainfall.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_rainfall.R 3 | \name{plot_rainfall} 4 | \alias{plot_rainfall} 5 | \title{Plot genomic rainfall} 6 | \usage{ 7 | plot_rainfall( 8 | vcf, 9 | chromosomes, 10 | title = "", 11 | colors = NA, 12 | cex = 2.5, 13 | cex_text = 3, 14 | ylim = 1e+08, 15 | type = c("snv", "indel", "dbs", "mbs") 16 | ) 17 | } 18 | \arguments{ 19 | \item{vcf}{GRanges object} 20 | 21 | \item{chromosomes}{Vector of chromosome/contig names of the reference 22 | genome to be plotted} 23 | 24 | \item{title}{Optional plot title} 25 | 26 | \item{colors}{Vector of 6 colors used for plotting} 27 | 28 | \item{cex}{Point size} 29 | 30 | \item{cex_text}{Text size} 31 | 32 | \item{ylim}{Maximum y value (genomic distance)} 33 | 34 | \item{type}{The mutation type of the GRanges object that will be used. 35 | Possible values: 36 | * 'snv' (default) 37 | * 'indel' 38 | * 'dbs' 39 | * 'mbs'} 40 | } 41 | \value{ 42 | Rainfall plot 43 | } 44 | \description{ 45 | Rainfall plot visualizes the types of mutations and intermutation distance 46 | } 47 | \details{ 48 | Rainfall plots can be used to visualize the distribution of mutations 49 | along the genome or a subset of chromosomes. The distance of a mutation 50 | with the mutation prior to it (the intermutation distance) is plotted on 51 | the y-axis on a log scale. The input GRanges are sorted before plotting. 52 | 53 | The colour of the points indicates the base substitution type. 54 | Clusters of mutations with lower intermutation distance represent mutation 55 | hotspots. 56 | } 57 | \examples{ 58 | ## See the 'read_vcfs_as_granges()' example for how we obtained the 59 | ## following data: 60 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 61 | package = "MutationalPatterns" 62 | )) 63 | 64 | # Specify chromosomes of interest. 65 | chromosomes <- names(genome(vcfs[[1]])[1:22]) 66 | 67 | ## Do a rainfall plot for all chromosomes: 68 | plot_rainfall(vcfs[[1]], 69 | title = names(vcfs[1]), 70 | chromosomes = chromosomes, 71 | cex = 1 72 | ) 73 | 74 | ## Or for a single chromosome (chromosome 1): 75 | plot_rainfall(vcfs[[1]], 76 | title = names(vcfs[1]), 77 | chromosomes = chromosomes[1], 78 | cex = 2 79 | ) 80 | 81 | ## You can also use other variant types 82 | 83 | ## Get a GRangesList or GRanges object with indel contexts. 84 | ## See 'indel_get_context' for more info on how to do this. 85 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds", 86 | package = "MutationalPatterns" 87 | )) 88 | 89 | plot_rainfall(grl_indel_context[[1]], 90 | title = "Indel rainfall", 91 | chromosomes, 92 | type = "indel" 93 | ) 94 | 95 | } 96 | \seealso{ 97 | \code{\link{read_vcfs_as_granges}} 98 | } 99 | -------------------------------------------------------------------------------- /man/rename_nmf_signatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rename_nmf_signatures.R 3 | \name{rename_nmf_signatures} 4 | \alias{rename_nmf_signatures} 5 | \title{Rename NMF signatures based on previously defined signatures} 6 | \usage{ 7 | rename_nmf_signatures( 8 | nmf_res, 9 | signatures, 10 | cutoff = 0.85, 11 | base_name = "SBS", 12 | suffix = "-like" 13 | ) 14 | } 15 | \arguments{ 16 | \item{nmf_res}{Named list of mutation matrix, signatures and signature contribution} 17 | 18 | \item{signatures}{A signature matrix} 19 | 20 | \item{cutoff}{Cutoff at which signatures are considered similar. Default: 0.85} 21 | 22 | \item{base_name}{The base part of a letter based signature name. Default: "SBS"} 23 | 24 | \item{suffix}{String. The suffix added to the name of a renamed signature. Default: "-like"} 25 | } 26 | \value{ 27 | A nmf_res with changed signature names 28 | } 29 | \description{ 30 | This function renames signatures identified with NMF based on previously defined signatures. 31 | If a NMF signature has a cosine similarity with a previously defined signature, 32 | that is higher than the cutoff, then this NMF signature will get the name 33 | of the previously defined signature. If not the NMF signature will receive a letter based name. 34 | For example: SBSA. 35 | This only changes the names of signatures, not their actual values. 36 | This function can be help with identifying whether signatures found with NMF are already known, 37 | which can be useful for interpretation. 38 | An extracted signature that is not similar to any previously defined signatures, 39 | is not proof of a "novel" signature. The extracted signature might be a 40 | combination of known signatures, that could not be split by NMF. This can happen 41 | when, for example, too few samples were used for the NMF. 42 | } 43 | \examples{ 44 | 45 | ## Extracting signatures can be computationally intensive, so 46 | ## we use pre-computed data generated with the following command: 47 | # nmf_res <- extract_signatures(mut_mat, rank = 2) 48 | 49 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds", 50 | package = "MutationalPatterns" 51 | )) 52 | 53 | ## Get signatures 54 | signatures <- get_known_signatures() 55 | 56 | rename_nmf_signatures(nmf_res, signatures) 57 | 58 | ## You can change or remove the suffix of the renamed signatures. 59 | rename_nmf_signatures(nmf_res, signatures, suffix = "") 60 | 61 | ## You can change how similar the signatures have to be, before they are considered similar. 62 | rename_nmf_signatures(nmf_res, signatures, cutoff = 0.95) 63 | 64 | ## You can also change the base_name of the signatures that end up with a letter name. 65 | rename_nmf_signatures(nmf_res, signatures, cutoff = 0.95, base_name = "Signature_") 66 | } 67 | -------------------------------------------------------------------------------- /tests/testthat/test-mut_matrix_stranded.R: -------------------------------------------------------------------------------- 1 | context("test-mut_matrix_stranded") 2 | 3 | 4 | # To test mut_matrix, we need to load the reference genome and the genes first. 5 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19" 6 | library(ref_genome, character.only = TRUE) 7 | library("TxDb.Hsapiens.UCSC.hg19.knownGene") 8 | 9 | # Test that the function works with default arguments 10 | genes_hg19 <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene) 11 | input <- readRDS(system.file("states/read_vcfs_as_granges_output.rds", 12 | package = "MutationalPatterns" 13 | )) 14 | expected <- readRDS(system.file("states/mut_mat_s_data.rds", 15 | package = "MutationalPatterns" 16 | )) 17 | 18 | test_that("transforms correctly", { 19 | output <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19) 20 | expect_equal(output, expected) 21 | }) 22 | 23 | # Test that a list is an acceptable input 24 | test_that("a list is also acceptable input", { 25 | output <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19) 26 | output_list <- mut_matrix_stranded(as.list(input), ref_genome, ranges = genes_hg19) 27 | 28 | expect_equal(output_list, output) 29 | expect_equal(output_list, expected) 30 | }) 31 | 32 | # A single sample can be used as input. 33 | test_that("A single GR can also be used as input", { 34 | output_singlesample <- mut_matrix_stranded(input[[1]], ref_genome, ranges = genes_hg19) 35 | expect_true(inherits(output_singlesample, "matrix")) 36 | expect_equal(dim(output_singlesample), c(192, 1)) 37 | }) 38 | 39 | # seqlevels genes need to match the input 40 | genes_badseqlevel <- genes_hg19 41 | seqlevels(genes_badseqlevel)[1] <- "chrtest" 42 | test_that("A single GR can also be used as input", { 43 | expect_error( 44 | { 45 | mut_matrix_stranded(input[[1]], ref_genome, ranges = genes_badseqlevel) 46 | }, 47 | "Chromosome names \\(seqlevels\\) of vcf and genes Granges object do not match" 48 | ) 49 | }) 50 | 51 | 52 | # Test replication mode 53 | repli_strand_granges <- readRDS(system.file("states/repli_strand.rds", 54 | package = "MutationalPatterns" 55 | )) 56 | expected_repli <- readRDS(system.file("states/mut_mat_repli.rds", 57 | package = "MutationalPatterns" 58 | )) 59 | 60 | test_that("replication mode transforms correctly", { 61 | mut_mat_repli <- mut_matrix_stranded(input, ref_genome, repli_strand_granges, mode = "replication") 62 | expect_equal(mut_mat_repli, expected_repli) 63 | }) 64 | 65 | 66 | # Test longer context 67 | output_longer <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19, extension = 2) 68 | 69 | test_that("Output has correct class", { 70 | expect_true(inherits(output_longer, "matrix")) 71 | }) 72 | 73 | test_that("Output has correct dimensions", { 74 | expect_equal(dim(output_longer), c(3072, 9)) 75 | }) 76 | --------------------------------------------------------------------------------